#Lab 6 - Solutions:

#a.  Access the data:
a <- read.table("http://www.stat.ucla.edu/~nchristo/statistics10/soil.txt", header=TRUE)

#b.  Mean and sd of lead:

mlead <- mean(a$lead)

sdlead <- sd(a$lead)

#c.  Central limit theorem:
# T=X1+X2+…+X200
# Xbar = sum(X1+X2+…+X200)/200

# T ~ N[200*mlead, sdlead*sqrt(200)]

# Xbar ~ N[mlead, sdlead/sqrt(200)]

#d.  Use R to verify:
#Sample 10000 values from lead with replacement:
q <- sample(a$lead, 100000, replace=TRUE)

#Collapse the 100000 values into a 500-by-200 matrix:
qq <- matrix(q,ncol=200, nrow=500)

#e.  Compute the sample mean for each column:
means <- colMeans(qq)

#Construct a histogram using the 200 sample means:
hist(colMeans(qq))

#f.
#Compute mlead +- 2.5* sdlead/sqrt(200) to verify that the histogram approximately agrees with the central limit theorem.

#g.  Histogram now is still skewed to the right because n=5 is small.

#Sample 2000 values from lead with replacement:
q <- sample(a$lead, 2000, replace=TRUE)
 
#Collapse the 100000 values into a 500-by-200 matrix:
qq <- matrix(q,ncol=400, nrow=5)
 
#Compute the sample mean for each column:
means <- colMeans(qq)
 
#Construct a histogram using the 200 sample means:
hist(colMeans(qq))