#Lab 8:  Simulation-based approach for the difference between two proportions.

#Read the article "Statistics in the courtroom", by George Cobb and Stephen Gehlbach.  You can access it on the course website:  http://www.stat.ucla.edu/~nchristo/statistics13/article.pdf .

#The data:
		Gilbert on shift	Gilbert not on shift	Total
#Patient died		 40			  34		  74
#No death		217			1350		1567
#Total			257			1384		1641
#Proportion		0.156			0.025#Question 1:
#p=74/1641=0.045
#n=257
#Using the binomial formula compute:
#P(X >= 40) = sum(from x=40 to 257) (257 choose x)*p^40*(1-p)^217

#Question 2:#The two proportions are very different but we want to see if the difference is #statistically significant.
#We want to test:

#H0:  p1-p2=0
#Ha:  p1-p2>0

#Under the H0 hypothesis the two proportions are equal and therefore we treat the data as #one sample with 74 shifts in which at least one death occurred and 1567 shifts without #deaths.

#We generate the data here:
x3 <- c(rep("yes", 74), rep("no", 1567))

#Initialize the two sample proportions for the loop that will follow:
p1hat <- rep(0,1000)
p2hat <- rep(0,1000)

#We will shuffle the data in x3 and randomly select each time n1=257 and n2=1384 to #compute the simulated p1_hat.  Similarly we compute the simulated p2_hat:
 
#Shuffle the x3 vector:
x33 <- sample(x3)

n1<- 257
n2 <- 1384 
n <- n1+n2


#Here is the for loop:
for(i in 1:1000){
        choose_n1 <- sample(1:n, n1)
	y1 <- x33[choose_n1]
        y2 <- x33[-choose_n1]
    p1hat[i] <- sum(y1=="yes")/n1
    p2hat[i] <- sum(y2=="yes")/n2
}


#Construct a histogram using the 1000 simulated p1_hat-p2_hat differences:

pdiff_hat <- p1hat - p2hat

hist(pdiff_hat, xlim=c(-.15, .15))

#Find p1_hat-p2_hat from the original data:
pdiff_hat_orig <- 0.156-0.025

#Place it on the histogram:
segments(pdiff_hat_orig ,0,pdiff_hat_orig ,300, col="green")

#Compute p-value:
sum(pdiff_hat > pdiff_hat_orig)/1000

#Conclusion:  Highly significant.