(def prop (lambda (x) (/ (sum x) (length x)) num num-whites)) (defun corr (x y) "Args: (X) (Y) X and Y are lists of equal length, and the function returns the simple correlation." (let* ( (xbar (mean x)) (ybar (mean y)) (SX (^ (- x xbar) 2)) (SSX (sum SX)) (SY (^ (- y ybar) 2)) (SSY (sum SY)) (bott (^ (* SSY SSX) .5)) (CS (* (- x xbar) (- y ybar))) (top (sum CS)) ) (/ top bott) ) ) (defun prop (x) "Args: (X) X is a 0-1 indicator list. Output is the proportion of 1's." (/ (sum x) (length x))) (defun look-at-students (schl-ind) "Args: (SCHL-IND) SCHL-IND is the index of the school selected, e.g., the 377th of the 1052. Will output the data matrix for the students within this school." (let* ((schl-ids (select schl-data (iseq 1052) 0)) (schl-ids (combine schl-ids)) (schl-id (select schl-ids schl-ind)) (studs-schl-id (select stud-data (iseq (array-dimension stud-data 0)) 0)) (studs-schl-id (combine studs-schl-id)) (studs-ind (which (= schl-id studs-schl-id)))) (select stud-data studs-ind (iseq (array-dimension stud-data 1))))) (defun select-col (matrix col) "Args: (MATRIX) (COL). MATRIX is an array, and the function selects the desired column, which is provided by COL." (let* ((m (array-dimension matrix 0))) (combine (select matrix (iseq m) col)))) (defun samp-stud-var (schl-ind ind) "Args: (SCHL-IND) SCHL-IND is the indices of the schools selected. The local variable SAMPLED-SCHOOLS is a list of matrices of students, one for each sampled school. IND is the column of the desired student variable. Output is list, containing info on this variable for all the sampled schools. length of this list correponds to the total number of students sampled." (let* ((sampled-schools (mapcar #'look-at-students schl-ind)) (result (list ))) (dolist (i sampled-schools result) (setf result (combine (append result (list (select-col i ind)))))))) (defun samp-stud-var-2 (schl-ind ind) "Args: (SCHL-IND) Same as above except combine is removed, allowing one to examine data according to schools. SCHL-IND is the indices of the schools selected. The local variable SAMPLED-SCHOOLS is a list of matrices of students, one for each sampled school. IND is the column of the desired student variable. Output is LIST OF LISTS, containing info on this variable for all the sampled schools. Length of this list correponds to number of schools sampled." (let* ((sampled-schools (mapcar #'look-at-students schl-ind)) (result (list ))) (dolist (i sampled-schools result) (setf result (append result (list (select-col i ind))))))) (defun repeated-sample (num-samples samp-size stud-var &key (stat #'list)) "Args: (NUM-SAMPLES) (SAMP-SIZE) (STUD-VAR) NUM-SAMPLES is a number specifying how many random samples are to be taken from the data. SAMP-SIZE is a number specifying how large each sample should be. STUD-VAR is the student variable of interest. The optional function STAT will be applied to characteristic specified by stud-var, with respected to each selected sample, e.g., the total number of white students in each random sample of schools. es a default, the list is simply returned." (let* ((result nil)) (dotimes (i num-samples result) (let* ((row-ind (sample (iseq (array-dimension schl-data 0)) samp-size)) (stud-cols (samp-stud-var row-ind stud-var))) (setf result (cons (funcall stat stud-cols) result)))))) (defun repeated-sample-2 (num-samples samp-size stud-var &key (stat #'list)) "Args: (NUM-SAMPLES) (SAMP-SIZE) (STUD-VAR) Same as original except stud-cols is now a list of lists, one for each school. Thus stud-cols is a list of lists, instead of one long list as before. We may invoke stat to calculate the percentage of whites in each school of each sample." (let* ((result nil)) (dotimes (i num-samples result) (let* ((row-ind (sample (iseq (array-dimension schl-data 0)) samp-size)) (stud-cols (samp-stud-var-2 row-ind stud-var))) (setf result (cons (mapcar #'(lambda (x) (funcall stat x)) stud-cols) result)))))) ;;;; ;;;; ;;;; Now, here are some functions for looking at school level varibles ;;;; ;;;; (defun samp-schl (num) "Args: (NUM) NUM is the number of schools on wishes to sampe. The output is the data matrix for the sampled schools." (let* ((row-ind (sample (iseq (array-dimension schl-data 0)) num))) (select schl-data row-ind (iseq (array-dimension schl-data 1))))) (defun samp-schl-2 (row-ind) "Args: (row-ind) ROW-IND is a list of indices for the desried schools. The output is the data matrix for the sampled schools." (select schl-data row-ind (iseq (array-dimension schl-data 1)))) (defun samp-schl-var (var num) "Args: (VAR) (NUM) VAR is the school variable of interest. NUM is the number of schools sampled. Outputs is a list of the value of each school for the variable of interest, e.g., public school or not." (let* ((sampled-schools (samp-schl num))) (select-col sampled-schools var))) (defun samp-schl-var-2 (row-ind ind) "Args: (ROW-IND) (IND) Same idea as samp-schl-var, except the selected schools are specified by the user. ROW-IND is a list of indices for the desried schools. IND specifies the school variable of interest. Outputs is a list of the value of each school for the variable of interest, e.g., public school or not." (let* ((sampled-schools (samp-schl-2 row-ind))) (select-col sampled-schools ind))) (defun rep-samp-schl-var (schools ind) "Args: (SCHOOLS) (IND) SCHOOLS is a list of lists of equal length, where each list contains the indices for a group of schools. IND is the school variable of interest. Output is a list of lists, where each list contains information about the desired variable for each group of schools." ( let* ((sampled-schools (mapcar #'samp-schl-2 schools))) (map-elements #'select-col sampled-schools (repeat ind (length sampled-schools))))) (defun schl-rep-sample (num-samples samp-size schl-var &key (stat #'list)) "Args: (NUM-SAMPLES) (SAMP-SIZE) (SCHL-VAR) NUM-SAMPLES is a number specifying how many random samples are to be taken from the data. SAMP-SIZE is a number specifying how large each sample should be. SCHL-VAR is the school variable of interest. The optional function STAT will be applied to characteristic specified by schl-var. For example, for 20 samples of size 20, we can determine the percentage of public schools for each sample." (let* ((result nil)) (dotimes (i num-samples result) (let* ((row-ind (sample (iseq (array-dimension schl-data 0)) samp-size)) (schl (samp-schl-var schl-var samp-size))) (setf result (cons (funcall stat schl) result))))))