#Lab 6 - Solutions: #a. Access the data: a <- read.table("http://www.stat.ucla.edu/~nchristo/statistics10/soil.txt", header=TRUE) #b. Mean and sd of lead: mlead <- mean(a$lead) sdlead <- sd(a$lead) #c. Central limit theorem: # T=X1+X2+…+X200 # Xbar = sum(X1+X2+…+X200)/200 # T ~ N[200*mlead, sdlead*sqrt(200)] # Xbar ~ N[mlead, sdlead/sqrt(200)] #d. Use R to verify: #Sample 10000 values from lead with replacement: q <- sample(a$lead, 100000, replace=TRUE) #Collapse the 100000 values into a 500-by-200 matrix: qq <- matrix(q,ncol=200, nrow=500) #e. Compute the sample mean for each column: means <- colMeans(qq) #Construct a histogram using the 200 sample means: hist(colMeans(qq)) #f. #Compute mlead +- 2.5* sdlead/sqrt(200) to verify that the histogram approximately agrees with the central limit theorem. #g. Histogram now is still skewed to the right because n=5 is small. #Sample 2000 values from lead with replacement: q <- sample(a$lead, 2000, replace=TRUE) #Collapse the 100000 values into a 500-by-200 matrix: qq <- matrix(q,ncol=400, nrow=5) #Compute the sample mean for each column: means <- colMeans(qq) #Construct a histogram using the 200 sample means: hist(colMeans(qq))