#Lab 6 - Solutions:
#a. Access the data:
a <- read.table("http://www.stat.ucla.edu/~nchristo/statistics10/soil.txt", header=TRUE)
#b. Mean and sd of lead:
mlead <- mean(a$lead)
sdlead <- sd(a$lead)
#c. Central limit theorem:
# T=X1+X2+…+X200
# Xbar = sum(X1+X2+…+X200)/200
# T ~ N[200*mlead, sdlead*sqrt(200)]
# Xbar ~ N[mlead, sdlead/sqrt(200)]
#d. Use R to verify:
#Sample 10000 values from lead with replacement:
q <- sample(a$lead, 100000, replace=TRUE)
#Collapse the 100000 values into a 500-by-200 matrix:
qq <- matrix(q,ncol=200, nrow=500)
#e. Compute the sample mean for each column:
means <- colMeans(qq)
#Construct a histogram using the 200 sample means:
hist(colMeans(qq))
#f.
#Compute mlead +- 2.5* sdlead/sqrt(200) to verify that the histogram approximately agrees with the central limit theorem.
#g. Histogram now is still skewed to the right because n=5 is small.
#Sample 2000 values from lead with replacement:
q <- sample(a$lead, 2000, replace=TRUE)
#Collapse the 100000 values into a 500-by-200 matrix:
qq <- matrix(q,ncol=400, nrow=5)
#Compute the sample mean for each column:
means <- colMeans(qq)
#Construct a histogram using the 200 sample means:
hist(colMeans(qq))