#A. Wheat and rain data: #Read data: a <- read.table("http://www.stat.ucla.edu/~nchristo/statistics13/rain_wheat.txt", header=TRUE) #Run the regression of wheat on rain: q <- lm(a$wheat ~ a$rain) #Summary of the regression: summary(q) #Scatterplot: plot(a$rain, a$wheat) #Check the assumptions: plot(q$fitted, q$res) plot(a$rain, q$res) #Obtain leverage values: influence(q)$hat #Obtain standardized residuals: rstandard(q) #Remove the two influential points (cases 25 and 26): a1 <- a[-c(25,26), ] #Run the regression again without case 25 and 26: q1 <- lm(a1$wheat ~ a1$rain) #Summary of the new regression: summary(q1) #New scatterplot: plot(a1$rain, a1$wheat) ====================================================================================== ====================================================================================== #B. District data: #Access data: b <- read.table("http://www.stat.ucla.edu/~nchristo/statistics13/district.txt", header=TRUE) #Run the regression of change on white: q <- lm(b$change ~ b$white) #Summary of the regression: summary(q) #Scatterplot: plot(b$white, b$change) #Check the assumptions: plot(q$fitted, q$res) plot(b$white, q$res) #Obtain leverage values: influence(q)$hat #Obtain standardized residuals: rstandard(q) #Remove the two influential points (cases 14 and 40): b1 <- b[-c(14,40), ] #Run the regression again without case 25 and 26: q1 <- lm(b1$change ~ b1$white) #Summary of the new regression: summary(q1) #New scatterplot: plot(b1$white, b1$change)