#Permutation test for the slope in simple regression.

#Example 1:
bb <- read.table("http://www.stat.ucla.edu/~nchristo/statistics13/q13.txt", header=TRUE)
bb <- read.table("q13.txt", header=TRUE)  #if your file is located on your computer.

#Initialize the vector b and r:
b <- rep(0,1000)
r <- rep(0,1000)

#Another way to initialize the vectors b and r:
b <- as.numeric(1000)
r <- as.numeric(1000)

#A for loop that will run 1000 regressions:  x is fixed, the y values are permuted.
for(i in 1:1000){
	y <- sample(bb$y)
	qqq <- lm(y ~ bb$x)
	b[i] <- qqq$coef[2]
	r[i] <- cor(y, bb$x)
}


#Contruct a histogram of using the 1000 values of b:
hist(b)
#Compute beta1_hat from the actual data (original data):
q1 <- lm(bb$y ~ bb$x)
beta_1 <- q1$coef[2]

#Place the actual beta_1 on the histrogram to see how plausible its value is under H0:
segments(beta_1,0,beta_1,200, col="green")

#Count how many of the 1000 simulated beta values are larger than the actual beta_1:
sum(b > beta_1)


==========================================================================================
==========================================================================================
#Example 2:
#Read the data:
a <- read.table("http://www.stat.ucla.edu/~nchristo/statistics13/body_fat.txt", header=TRUE)
a <- read.table("body_fat.txt", header=TRUE)  #if your file is located on your computer.

#Create a new data frame with y (percent of body fat) and x10 (thigh circumference):
qq <- a[, c(2,10)]

#Initialize the vector b and r:
b <- rep(0,1000)
r <- rep(0,1000)

#Another way to initialize the vectors b and r:
b <- as.numeric(1000)
r <- as.numeric(1000)

#A for loop that will run 1000 regressions:  x is fixed, the y values are permuted.
for(i in 1:1000){
	y <- sample(qq$y)
	qqq <- lm(y ~ qq$x)
	b[i] <- qqq$coef[2]
	r[i] <- cor(y, qq$x)
}

#Contruct a histogram of using the 1000 values of b:
hist(b)
#Compute beta_hat from the actual data (original data):
q1 <- lm(qq$y ~ qq$x10)
beta_1 <- q1$coef[2]

#Place the actual beta_1 on the histrogram to see how plausible it value is under H0:
segments(beta_1,0,beta_1,200, col="green")

#Count how many of the 1000 simulated beta values are larger than the actual beta_1:
sum(b > beta_1)


==========================================================================================
==========================================================================================
#Another way is to sample from the y values with replacement.  Again the x values are fixed.
b <- rep(0,1000)

for(i in 1:1000){
	y <- sample(qq$y, nrow(qq), replace=TRUE)
	qqq <- lm(y ~ qq$x)
	b[i] <- qqq$coef[2]
	r[i] <- cor(y, qq$x)
}

==========================================================================================
==========================================================================================
#Example 3:
bb <- read.table("http://www.stat.ucla.edu/~nchristo/statistics13/q15.txt", header=TRUE)

bb <- read.table("q15.txt", header=TRUE)  #if your file is located on your computer.

#Initialize the vector b and r:
b <- rep(0,1000)
r <- rep(0,1000)

#Another way to initialize the vectors b and r:
b <- as.numeric(1000)
r <- as.numeric(1000)

#A for loop that will run 1000 regressions:  x is fixed, the y values are permuted.
for(i in 1:1000){
	y <- sample(bb$y)
	qqq <- lm(y ~ bb$x)
	b[i] <- qqq$coef[2]
	r[i] <- cor(y, bb$x)
}


#Contruct a histogram of using the 1000 values of b:
hist(b)
#Compute beta_hat from the actual data (original data):
q1 <- lm(bb$y ~ bb$x)
beta_1 <- q1$coef[2]

#Place the actual beta_1 on the histrogram to see how plausible it value is under H0:
segments(beta_1,0,beta_1,200, col="green")

#Count how many of the 1000 simulated beta values are larger than the actual beta_1:
sum(b > beta_1)

==========================================================================================
==========================================================================================
#Example 4:
#Testing beta1=20.
a <- read.table("http://www.stat.ucla.edu/~nchristo/statistics_c173_c273/soil_complete.txt", header=TRUE)
#Use lead as the response.
#Use cadmium as the predictor.

#Initialize the vector bc:
bc <- rep(0,1000)

#Another way to initialize the vectors b and r:
bc <- as.numeric(1000)

yc <- a$lead - 20*a$cadmium

#A for loop that will run 1000 regressions:  
for(i in 1:1000){
	y <- sample(yc)
	qqq <- lm(y ~ a$cadmium)
	bc[i] <- qqq$coef[2]
}


#Contruct a histogram of using the 1000 values of b:
hist(bc)
#Compute beta1_hat from the actual data (original data):
q1 <- lm(yc ~ a$cadmium)
beta_1 <- q1$coef[2]

#Place the actual beta_1 on the histrogram to see how plausible it value is under H0:
segments(beta_1,0,beta_1,200, col="green")

#Count how many of the 1000 simulated beta values are larger than the actual beta_1:
sum(bc > beta_1)


==========================================================================================
==========================================================================================