#Example y <- c(41, 43, 50, 62, 66, 69, 72, 74, 78, 80) x <- c(13, 17, 17, 22, 19, 25, 22, 25, 24, 27) #Calculate: #sum of y #$sum of x #sum of x*y #sum of x^2 #sum of y^2 sum(x) sum(y) sum(x*y) sum(x^2) sum(y^2) #What is the difference between sum(x^2) and sum(x)^2 #Use R as calculator to compute beta0_hat and beta1_hat (see formulas in the handout): b <- ( sum(x*y) - (1/10)*sum(x)*sum(y) ) / (sum(x^2) - (1/10)*sum(x)^2) m <- mean(y) - b*mean(x) #Scattterplot: plot(x,y) #Run the regression of y on x: q <- lm(y ~ x) #Type q to see the estimates. #Or type summary(q). Show the estimates in the summary. #Add the fitted line on the scatterplot: abline(q) #Add the ybar line on the scatterplot: abline(h=mean(y)) abline(v=mean(x)) #Comment on the variability around the fitted line and around the ybar line. #Ask students to give an estimate of R^2 by looking at the plot. #New task: Access a larger data set from a website: a <- read.table("http://www.stat.ucla.edu/~nchristo/maadmeg/soildmeg.txt", header=TRUE) #a it is a called a dataframe: A table of rows and columns. #See the first 6 rows of the data: head(a) #See the names of the variables: names(a) #Access a particular column: a$long a$lead #Or using a[,1] a[,5] #Access a particular row: #Say, the first row: a[1,] #Plot lead against zinc and comment: plot(a$zinc, a$lead) #Plot log(lead) against log(zinc): #Rename the variables: y <- log(a$lead) x <- log(a$zinc) plot(x,y) #Regress y on x: q <- lm(y ~ x) #Get summary: summary(q) #Show beta0_hat, beta1_hat, and R^2: #Add the fitted line: abline(q) #================================= #================================= #================================= #Some other basic plots: #Histogram: hist(a$lead) boxplot(a$lead) #Summary statistics: summary(a$lead)