#Lab 8: Simulation-based approach for the difference between two proportions.
#Read the article "Statistics in the courtroom", by George Cobb and Stephen Gehlbach. You can access it on the course website: http://www.stat.ucla.edu/~nchristo/statistics13/article.pdf .
#The data:
Gilbert on shift Gilbert not on shift Total
#Patient died 40 34 74
#No death 217 1350 1567
#Total 257 1384 1641
#Proportion 0.156 0.025
#Question 1:
#p=74/1641=0.045
#n=257
#Using the binomial formula compute:
#P(X >= 40) = sum(from x=40 to 257) (257 choose x)*p^40*(1-p)^217
#Question 2:
#The two proportions are very different but we want to see if the difference is #statistically significant.
#We want to test:
#H0: p1-p2=0
#Ha: p1-p2>0
#Under the H0 hypothesis the two proportions are equal and therefore we treat the data as #one sample with 74 shifts in which at least one death occurred and 1567 shifts without #deaths.
#We generate the data here:
x3 <- c(rep("yes", 74), rep("no", 1567))
#Initialize the two sample proportions for the loop that will follow:
p1hat <- rep(0,1000)
p2hat <- rep(0,1000)
#We will shuffle the data in x3 and randomly select each time n1=257 and n2=1384 to #compute the simulated p1_hat. Similarly we compute the simulated p2_hat:
#Shuffle the x3 vector:
x33 <- sample(x3)
n1<- 257
n2 <- 1384
n <- n1+n2
#Here is the for loop:
for(i in 1:1000){
choose_n1 <- sample(1:n, n1)
y1 <- x33[choose_n1]
y2 <- x33[-choose_n1]
p1hat[i] <- sum(y1=="yes")/n1
p2hat[i] <- sum(y2=="yes")/n2
}
#Construct a histogram using the 1000 simulated p1_hat-p2_hat differences:
pdiff_hat <- p1hat - p2hat
hist(pdiff_hat, xlim=c(-.15, .15))
#Find p1_hat-p2_hat from the original data:
pdiff_hat_orig <- 0.156-0.025
#Place it on the histogram:
segments(pdiff_hat_orig ,0,pdiff_hat_orig ,300, col="green")
#Compute p-value:
sum(pdiff_hat > pdiff_hat_orig)/1000
#Conclusion: Highly significant.