#Breast cancer mortality data: #Read the data: a <- read.table("http://www.stat.ucla.edu/~nchristo/statistics13/cancer.txt", header=TRUE) #See the names of the variables: names(a) #Plot y on x: We see non-constant variance. plot(a$x, a$y) #Run the regression of y on x (without the intercept): q <- lm(a$y ~ a$x + 0) #See summary of the regression: summary(q) #Non-constant variance can be detected with the following two plots: #Residuals on fitted values: plot(q$fitted, q$res) #Residuals on x: plot(a$x, q$res) #One suggestion is to transform the variables (take square roots): #Run the regression on the transformed variables: q1 <- lm(sqrt(a$y) ~ sqrt(a$x) + 0) #See summary of the new regression: summary(q1) #Make some plots: #First scatterplot of the transformed variables: plot(sqrt(a$x), sqrt(a$y)) #Then plot of residuals on fitted values of the regression on the transformed variables: plot(q1$fitted, q1$res) #And residuals on sqrt(x): plot(sqrt(a$x), q1$res) #These plots using the transformed variables showed that the variance is definitely more constant than before.