#Permutation test for the slope in simple regression. #Example 1: bb <- read.table("http://www.stat.ucla.edu/~nchristo/statistics13/q13.txt", header=TRUE) bb <- read.table("q13.txt", header=TRUE) #if your file is located on your computer. #Initialize the vector b and r: b <- rep(0,1000) r <- rep(0,1000) #Another way to initialize the vectors b and r: b <- as.numeric(1000) r <- as.numeric(1000) #A for loop that will run 1000 regressions: x is fixed, the y values are permuted. for(i in 1:1000){ y <- sample(bb$y) qqq <- lm(y ~ bb$x) b[i] <- qqq$coef[2] r[i] <- cor(y, bb$x) } #Contruct a histogram of using the 1000 values of b: hist(b) #Compute beta1_hat from the actual data (original data): q1 <- lm(bb$y ~ bb$x) beta_1 <- q1$coef[2] #Place the actual beta_1 on the histrogram to see how plausible its value is under H0: segments(beta_1,0,beta_1,200, col="green") #Count how many of the 1000 simulated beta values are larger than the actual beta_1: sum(b > beta_1) ========================================================================================== ========================================================================================== #Example 2: #Read the data: a <- read.table("http://www.stat.ucla.edu/~nchristo/statistics13/body_fat.txt", header=TRUE) a <- read.table("body_fat.txt", header=TRUE) #if your file is located on your computer. #Create a new data frame with y (percent of body fat) and x10 (thigh circumference): qq <- a[, c(2,10)] #Initialize the vector b and r: b <- rep(0,1000) r <- rep(0,1000) #Another way to initialize the vectors b and r: b <- as.numeric(1000) r <- as.numeric(1000) #A for loop that will run 1000 regressions: x is fixed, the y values are permuted. for(i in 1:1000){ y <- sample(qq$y) qqq <- lm(y ~ qq$x) b[i] <- qqq$coef[2] r[i] <- cor(y, qq$x) } #Contruct a histogram of using the 1000 values of b: hist(b) #Compute beta_hat from the actual data (original data): q1 <- lm(qq$y ~ qq$x10) beta_1 <- q1$coef[2] #Place the actual beta_1 on the histrogram to see how plausible it value is under H0: segments(beta_1,0,beta_1,200, col="green") #Count how many of the 1000 simulated beta values are larger than the actual beta_1: sum(b > beta_1) ========================================================================================== ========================================================================================== #Another way is to sample from the y values with replacement. Again the x values are fixed. b <- rep(0,1000) for(i in 1:1000){ y <- sample(qq$y, nrow(qq), replace=TRUE) qqq <- lm(y ~ qq$x) b[i] <- qqq$coef[2] r[i] <- cor(y, qq$x) } ========================================================================================== ========================================================================================== #Example 3: bb <- read.table("http://www.stat.ucla.edu/~nchristo/statistics13/q15.txt", header=TRUE) bb <- read.table("q15.txt", header=TRUE) #if your file is located on your computer. #Initialize the vector b and r: b <- rep(0,1000) r <- rep(0,1000) #Another way to initialize the vectors b and r: b <- as.numeric(1000) r <- as.numeric(1000) #A for loop that will run 1000 regressions: x is fixed, the y values are permuted. for(i in 1:1000){ y <- sample(bb$y) qqq <- lm(y ~ bb$x) b[i] <- qqq$coef[2] r[i] <- cor(y, bb$x) } #Contruct a histogram of using the 1000 values of b: hist(b) #Compute beta_hat from the actual data (original data): q1 <- lm(bb$y ~ bb$x) beta_1 <- q1$coef[2] #Place the actual beta_1 on the histrogram to see how plausible it value is under H0: segments(beta_1,0,beta_1,200, col="green") #Count how many of the 1000 simulated beta values are larger than the actual beta_1: sum(b > beta_1) ========================================================================================== ========================================================================================== #Example 4: #Testing beta1=20. a <- read.table("http://www.stat.ucla.edu/~nchristo/statistics_c173_c273/soil_complete.txt", header=TRUE) #Use lead as the response. #Use cadmium as the predictor. #Initialize the vector bc: bc <- rep(0,1000) #Another way to initialize the vectors b and r: bc <- as.numeric(1000) yc <- a$lead - 20*a$cadmium #A for loop that will run 1000 regressions: for(i in 1:1000){ y <- sample(yc) qqq <- lm(y ~ a$cadmium) bc[i] <- qqq$coef[2] } #Contruct a histogram of using the 1000 values of b: hist(bc) #Compute beta1_hat from the actual data (original data): q1 <- lm(yc ~ a$cadmium) beta_1 <- q1$coef[2] #Place the actual beta_1 on the histrogram to see how plausible it value is under H0: segments(beta_1,0,beta_1,200, col="green") #Count how many of the 1000 simulated beta values are larger than the actual beta_1: sum(bc > beta_1) ========================================================================================== ==========================================================================================