(defun numbers-only (data) "Args: data. DATA is a simple list. Returns a list where non-numbers are replace by NIL." (if-else (mapcar #'numberp data) data nil)) (defun code-missing (code data) "Args: (CODE DATA-LIST). DATA-LIST is a simple list with CODE representing missing data value(s). Returns a list where the missing data is coded as NIL. CODE may be a list." (if (listp code) (let ((result data)) (dolist (c code result) (setf result (code-missing c result)))) (if-else (mapcar #'(lambda (x) (equalp code x)) data) nil data))) (defun make-ind (data) "Args: (DATA). DATA is a list of variables (of same length) where missing observations are coded as NIL. Returns variables where NILs are replaced by 1s, and numbers are replaced by 0. Thus, an indicator list of missing values for each variable." (mapcar #'(lambda (x) (if-else (mapcar #'numberp x) 0 1)) data)) (defun observedp (&rest args) "Args: variables. Variables are lists with NIL for missing data. Returns predicate list of completely observed cases. NIL if there is at least one missing observation in the case. Non-NIL otherwise. (Non-NILs are index numbers.)" (let ((result (iseq (length (car args))))) (dolist (var args result) (setf result (if-else var result nil))))) (defun which-nil (x) "Args: X. X is a variable, with missing data coded as NIL. The function returns the indices of the missing observations." (let* ( (result (iseq (length x))) ) (remove nil (if-else x nil result)) ) ) (defun which-one (x) "Args: X. X is a 0-1 variable. The function returns the indices of the 1's." (let* ( (result (iseq (length x))) ) (remove nil (if-else (= x 0) nil result)) ) ) (defun select-observed (data &optional index) "Args: (DATA &optional INDEX). DATA is a list of variables (of same length) where missing observations are coded as NIL. Returns variables containing only fully observed cases, where fully means observed on the INDEXed variables. If INDEX is not supplied, all variables are used to eliminate deficient cases." (let* ((critical (if index (select data index) data)) (ind (remove nil (apply #'observedp critical)))) (mapcar #'(lambda (var) (select var ind)) data))) (defun select-stratum (data strata code) "Args: (DATA) (STRATA) (CODE) DATA is a list of variables. STRATA is a variable indicating the stratum of each observation for some population aspect, e.g., race. Code is the specific stratum of interest, e.g., blacks. Returns variables corresponding to this stratum." (let ((ind (which (map-elements #'equalp strata code)))) (mapcar #'(lambda (var) (select var ind)) data))) (defun two-sample-t (X1 X2) "Args: (X1) (X2) X1 and X2 are lists of numbers. The function computes the two-sample t-statistic for difference of means, and outputs a p-value. Either the pooled or un-pooled sample variance is used, depending on the results of a standard F-test for equality of variance." (let* ((mean1 (mean x1)) (mean2 (mean x2)) (n1 (length x1)) (n2 (length x2)) (s1sq (standard-deviation x1)) (s2sq (standard-deviation x2)) (sp (/ (+ (* (- n1 1) s1sq) (* (- n2 1) s2sq)) (+ n1 n2 (- 2)))) (t-p (/ (- mean1 mean2) (^ (* sp (+ (^ n1 - 1) (^ n2 -1))) .5))) (t-up (/ (- mean1 mean2) (^ (+ (/ s1sq n1) (/ s2sq n2)) .5))) (F (/ (max s1sq s2sq) (min s2sq s1sq))) (F-p (f-cdf F (max s1sq s2sq) (min s2sq s1sq))) (F-p (if-else (> .5) (- 1 F-p) F-p))) (if-else (< F-p .05) (t-cdf t-up (+ n1 n1 -2)) (t-cdf t-p (min (- n1 1) (- n2 1)))))) (defun var-present (data) "Args: (DATA). DATA is a list of variables (of same length) where missing observations are coded as NIL. Returns a list of lists, where each list consists of the positions of the non-missing values in each variable." (mapcar #'(lambda (var) (which var)) data)) (defun var-missing (data) "Args: (DATA). DATA is a list of variables (of same length) where missing observations are coded as NIL. Returns a list of lists, where each list consists of the positions of the missing values in each variable." (mapcar #'(lambda (var) (which (mapcar #'not var))) data)) (defun select-by-miss (var1 var2 &key (code nil) (stat #'list)) "Args: (VAR1 VAR2 &code &stat). VAR1 and VAR2 are two variables, both simple lists. VAR1 is complete while VAR2 has missing values, coded as CODE or NIL (default). Returns two lists, the first list contains the values of VAR1 corresponding to VAR2 observed, while the second contains the values of VAR1 corresponding to VAR2 missing. The optional function STAT compares some aspect, e.g., difference of means, of these two lists; as a default, the two lists are simply returned." (let* ((var2 (if code (code-missing code var2) var2)) (var2-obs (which var2)) (var2-miss (which (mapcar #'not var2))) (var1-by-var2-obs (select var1 var2-obs)) (var1-by-var2-miss (select var1 var2-miss))) (funcall stat var1-by-var2-obs var1-by-var2-miss))) (defun select-by-perm (var1 var2 &optional (num 1) &key code (stat #'list)) "Args: (VAR1 VAR2 &code). VAR1 and VAR2 are two variables, both simple lists. VAR1 is complete while VAR2 has missing values, coded as CODE or NIL, default. Returns a list of two sub-lists, by randomly permuting the orignial list var1. NUM is an optional argument that determines how many lists are returned. The size of each sub-list corresponds to the size of the missing and non-missing subsets of var2. The optional function STAT compares some aspect, e.g., difference of means, of these two sub-lists; as a default, the two lists are simply returned. " (let* ((permlist nil) (var2 (code-missing code var2)) (var2-miss (which (mapcar #'not var2))) (len1 (length var1)) (len2 (length var2-miss)) (ind1 (iseq len2)) (ind2 (iseq len2 (- len1 1)))) (dotimes (i num permlist) (let* ((var1-perm (sample var1 len1)) (var1-perm-miss (select var1-perm ind1)) (var1-perm-obs (select var1-perm ind2))) (setf permlist (cons (funcall stat var1-perm-miss var1-perm-obs) permlist))) ))) (defun select-by-perm-test (var1 var2 &optional (num 1) &key code (stat #'list)) "Args: (VAR1 VAR2 &code). VAR1 and VAR2 are two variables, both simple lists. VAR1 is complete while VAR2 has missing values, coded as CODE or NIL, default. Computes a list of two sub-lists, by randomly permuting the orignial list var1. NUM is an optional argument that determines how many lists are returned. The size of each sub-list corresponds to the size of the missing and non-missing subsets of var2. The function STAT compares some aspect, e.g., difference of means, of these two sub-lists. Returns a p-value comparing the aspect of the two sub-lists formed with respect to mising values on VAR2 with these randomly formed sub-lists." (let* ((permlist 0) (stat-value (select-by-miss var1 var2 :code code :stat stat)) (var2 (code-missing code var2)) (var2-miss (which (mapcar #'not var2))) (len1 (length var1)) (len2 (length var2-miss)) (ind1 (iseq len2)) (ind2 (iseq len2 (- len1 1)))) (dotimes (I NUM (if (< (/ permlist num) .5) (/ permlist num) (- 1 (/ permlist num)))) (let* ((var1-perm (sample var1 len1)) (var1-perm-miss (select var1-perm ind1)) (var1-perm-obs (select var1-perm ind2))) (setf permlist (+ (if (< stat-value (funcall stat var1-perm-miss var1-perm-obs)) 0 1) permlist)))))) (defun collect-p (num &optional (num-it 10)) "Args: (NUM NUM-IT) NUM is a number specifying how many p-values are to be computed, i.e., how many times we test our statistic. NUM-IT represents the number of randomly permuted sub- populations are used to test our statistic. Returns a list of p-values as explained above." (let* ((p-values ())) (dotimes (i num p-values) (let* ((p-value (select-by-perm-test sex race num-it :code 8 :stat 'diff-mean))) (setf p-values (cons p-value p-values)))))) (defun impute-rand (data &optional replace) "Args: (DATA &optional REPLACE). DATA is a list with NILs for missing observations. Returns randomly imputed data, where imputed values are a random sample from observed values (with replacement if REPLACE is T)." (let* ((ind (which (mapcar #'not data))) (l (length ind)) (obs (remove nil data)) (copy (copy-list data))) (unless (or (and (not replace) (< (length obs) l)) (null obs)) (setf (select copy ind) (sample obs l replace))) copy)) (defun impute-strata (data strata &optional replace) "Args: (DATA STRATA &optional REPLACE). Carries out random imputation per STRATA. STRATA and DATA must be lists of equal length. See the function IMPUTE-RAND." (let* ((mis-ind (which (mapcar #'not data))) (strata-inds (mapcar #'(lambda (x) (which (map-elements #'equalp x strata))) (remove-duplicates (select strata mis-ind)))) (copy (copy-list data))) (dolist (ind strata-inds) (setf (select copy ind) (impute-rand (select copy ind) replace)) ) (impute-rand copy) ))