#INTRODUCTION:
#You can skip it (or do it very quickly) to remind students how the sample function works.  
==================================================================================
#PART A:
#Question 1 and 2:
#Generate 100 samples each of size 100 and commute the sample mean and sample sd:

x <- seq(2, 12, 1)

#Sample 10000 values from lead with replacement:
q <- sample(x, 10000, replace=TRUE)

#Collapse the 10000 values into a 100-by-100 matrix:
qq <- matrix(q,ncol=100, nrow=100)

Compute the sample mean and sample standard deviation for each column:
m <- colMeans(qq)

s <- apply(qq, 2, sd)


#Question 3:
#Construct the confidence intervals:
ci_left <- m - qt(.975, 99)*s/sqrt(100)  #Lower bound.
ci_right <- m + qt(.975, 99)*s/sqrt(100)  #upper bound.


#Question 4:
#We expect about 5 intervals to miss the true mean (95% confidence level).
#Count how many confidence intervals missed the true mean, mu=7:

100 - sum(ci_left < 7 & 7 < ci_right)

#Find the intervals that cover the true mean, mu=7:
yes_no <- ci_left <7 & 7 < ci_right

#Question 5:
#Compute the length of each interval.
2*qt(0.975,99)*s/sqrt(100)

#Length is different because we use the sample standard deviation to compute the margin of error:  s/sqrt(100).  In handout #32, page 4, we assume that sigma=2.42 was known, therefore the margin of error is 2*1.96*2.42/sqrt(50).  That's why all the intervals in the handout have the same length.


#Question 6:
q <- as.data.frame(cbind(m, s, ci_left, ci_right, yes_no))

#Identify the intervals that missed the true mean, mu=7:
no <- which(q$yes_no==0)

==================================================================================
==================================================================================
==================================================================================
#The following is just to see which confidence intervals missed the true mean, mu=7:
#Make a plot and draw a vertical line at 7:
x1 <- c(1, 13, 1)

y1 <- c(1,200, 1)

plot(x1,y1, "n", xlim=c(1,13),  yaxt="n", ylab="Confidence intervals", xlab="Length of confidence interval")

abline(v=7)

num <- seq(1,200,2)

points(m, num)

#Draw the confidence intervals:
segments(m,num, m - qt(.975, 99)*s/sqrt(100), num)

segments(m,num, m + qt(.975, 99)*s/sqrt(100), num)

#Note:  This plot many not be easy for many students to understand, but give them the code and ask them to color the dot for the intervals that missed the true mean.  To answer this, inspect the data frame in question 6 to see which confidence intervals missed the true mean.  Then in order to color the dot you can do this:  

points(m[no], 2*no, col="green", pch=19)  #It is 2*no because num <- seq(1,200,2). 

#Feel free to add any other commands if you like to make this plot easier!

====================================================================
#PART B:
#Question 1 and 2:
#Generate 100 samples each of size 100 and commute the sample mean and sample sd:

x <- a$lead

#Sample 10000 values from lead with replacement:
q <- sample(x, 10000, replace=TRUE)

#Collapse the 10000 values into a 100-by-100 matrix:
qq <- matrix(q,ncol=100, nrow=100)

Compute the sample mean and sample standard deviation for each column:
m <- colMeans(qq)

s <- apply(qq, 2, sd)


#Question 3:
#Construct the confidence intervals:
ci_left <- m - qt(.975, 99)*s/sqrt(100)  #Lower bound.
ci_right <- m + qt(.975, 99)*s/sqrt(100)  #upper bound.


#Question 4:
#We expect about 5 intervals to miss the true mean (95% confidence level).
#Count how many confidence intervals missed the true mean, mu=7:

100 - sum(ci_left < 153 & 153 < ci_right)

#Find the intervals that cover the true mean, mu=7:
yes_no <- ci_left <153 & 153 < ci_right

#Question 5:
#Compute the length of each interval.
2*qt(0.975,99)*s/sqrt(100)

#Length is different because we use the sample standard deviation to compute the margin of error:  s/sqrt(100).  In handout #32, page 4, we assume that sigma=2.42 was known, therefore the margin of error is 2*1.96*2.42/sqrt(50).  That's why all the intervals in the handout have the same length.


#Question 6:
q <- as.data.frame(cbind(m, s, ci_left, ci_right, yes_no))

#Identify the intervals that missed the true mean, mu=7:
no <- which(q$yes_no==0)
==================================================================================
==================================================================================
==================================================================================
#The following is just to see which confidence intervals missed the true mean, mu=153:
#Make a plot and draw a vertical line at 153:
x1 <- c(120, 190, 1)

y1 <- c(1,200, 1)

plot(x1,y1, "n", xlim=c(120,190),  yaxt="n", ylab="Confidence intervals", xlab="Length of confidence interval")

abline(v=153)

num <- seq(1,200,2)

points(m, num)

#Draw the confidence intervals:
segments(m,num, m - qt(.975, 99)*s/sqrt(100), num)

segments(m,num, m + qt(.975, 99)*s/sqrt(100), num)

#To answer this, inspect the data frame in question 6 to see which confidence intervals missed the true mean.  Then if you want to color the dots you can do this:  

points(m[no], 2*no, col="green", pch=19)  #It is 2*no because num <- seq(1,200,2). 

====================================================================