#Initialize vectors p1hat and p2hat: p1hat <- rep(0,1000) p2hat <- rep(0,1000) #A total of 2230 boys and 1937 girls born: nboys <- 2230 ngirls <- 1937 #Create a vector of 2230 boys: x1 <- rep("boy", nboys) #Create a vector of 1937 girls: x2 <- rep("girl", ngirls) #We want to test H0: p1-p2=0 Vs. Ha: p1-p2 != 0. #Under H0 we treat the data as one sample: x3 <- c(x1,x2) #We will shuffle the data in x3 and randomly select n1=3602 and n2=565 each time to compute the simulated p1hat and p2hat. #Shuffle the x3 vector: x33 <- sample(x3) n1<- 3602 n2 <- 565 n <- n1+n2 #A for loop that will split the data into two parts: for(i in 1:1000){ choose_n1 <- sample(1:n, n1) y1 <- x33[choose_n1] y2 <- x33[-choose_n1] p1hat[i] <- sum(y1=="boy")/n1 p2hat[i] <- sum(y2=="boy")/n2 } #Construct a histogram using the 1000 simulated differences p1hat-p2hat: pdiff_hat <- p1hat - p2hat hist(pdiff_hat, xlim=c(-0.1, 0.1), main="Histogram of 1000 simulated p1hat-p2hat values", xlab="pdiff_hat = p1hat-p2hat") #Find p1_hat-p2_hat from the original data: pdiff_hat_orig <- 1975/3602 - 255/565 #Place it on the histogram: points(pdiff_hat_orig , 0, pch=19, col="green") #Compute the p-value: 2*sum(pdiff_hat > pdiff_hat_orig)/1000