Mon, Apr 26, 2010.
1. LAD, Huber, LTS regression and B-spline examples in R.
2. Generalized Additive Models.

1. R examples.
## a) LAD regression## library(MASS)x2 = sort(c(runif(40),3))y = 3*x2+rnorm(41)/3y[41] = 3plot(x2,y,main="LAD regression and Huber's method")abline(lm(y~x2),lty=2)## z4 = rlm(y ~ x2,scale.est="MAD")
library(quantreg)
z4 = rq(y ~ x2)abline(z4)## b) Huber's methodz5 = rlm(y ~ x2,scale.est="Huber")abline(z5,col=4)legend(2,1,c("Least squares","LAD","Huber"),lty=c(2,1,1),col=c(1,1,4))z4$coefz5$coef## c) LTS regression
z6 = ltsreg(y ~ x2)
abline(z6,col=5)x2[41] = 8y[c(1,11,21,31)] = rep(1.4,4)plot(x2,y,main="LTS regression")abline(lm(y~x2),lty=2)z6 = ltsreg(y ~ x2,.92)abline(z6,col=4)legend(4,1,c("Least Squares","LTS"),lty=c(2,1),col=c(1,4))
abline(rlm(y ~ x2,scale.est="Huber"),col=3)
abline(rlm(y ~ x2,scale.est="MAD"),col=5)
abline(rq(y ~ x2))

d) B-splines
library(splines)
x2[41] = 2
plot(x2,y,main="B-splines")
knots = c(0,0,0,seq(0,2,length=20),2,2,2)
x3 = splineDesign(knots,x2)
z7 = lm(y ~ x3)
lines(x2,z7$fit,col=4)
knot2 = c(0,0,0,seq(0,2,length=7),2,2,2)
x4 = splineDesign(knot2,x2)
z8 = lm(y ~ x4)
lines(x2,z8$fit,col=5)
legend(1.5,1,c("20 knots","7 knots"),lty=c(1,1),col=c(4,5))

2. Generalized Additive Models
Fit a model Y = f1(x1) + f2(x2) + ... + fp(xp) + eps, where f1,...,fp are continuous functions, and eps_i are iid with mean 0.
Estimate f1, ..., fp nonparametrically.
gam() does this with splines, but you could do this with kernels.
x1 = runif(100)*10x2 = runif(100)*20x3 = runif(100)*30y = x1^3 - 10*x1^2 + 5*x1 - .1*x2^3 + 2*x2^2 + 4*x2 + 12 + 7 * x3 + 14*rnorm(100)plot(x1,y)c1 = ksmooth(x1,y, bandwidth = bw.nrd(x1),x.points=x1)lines(c1)fit1 = rep(0,100)fit1[order(x1)] = c1$yy1 = y - fit1 ## these are the residuals from this first fit## note that x1 is not ordered, so if you do lines(x1,y1), it won't look good## because lines joins the first point to the second point to the third, etc.## but you can do points(x1,fit1,pch=2) to check that things are going ok.plot(x2,y1,ylab="residuals after first kernel smoothing")c2 = ksmooth(x2,y1, bandwidth = bw.nrd(x2),x.points=x2)lines(c2)fit2 = fit1fit2[order(x2)] = c2$y## to check, try points(x2,fit2,pch=2)y2 = y1 - fit2 ## residuals after 2nd fitplot(x3, y2,xlab="x3",ylab="residuals after 2 kernel smoothings")c3 = lsfit(x3, y2)abline(c3)c3$coef ## compare with 7plot(x3,c3$resid,ylab="residuals after 2 kernels and a line")par(mfrow=c(1,2))hist(y2,nclass=20,xlab="residuals after 2 kernels",main="", xlim=range(c(y2,c3$resid)))hist(c3$resid, nclass=20, xlab="residuals after 2 kernels and a line",main="", xlim=range(c(y2,c3$resid)))par(mfrow=c(1,1))plot(x3, y2,xlab="x3",ylab="residuals after 2 kernel smoothings")c4 = ksmooth(x3, y2, bandwidth = bw.nrd(x3),x.points=x3)lines(c4)fit3 = fit1fit3[order(x3)] = c4$yy3 = y2 - fit3plot(x3, y3,ylab="residuals after 3 kernel smoothings")par(mfrow=c(1,2))x5 = hist(c3$resid, nclass=20, xlab="residuals after 2 kernels and a line",main="", xlim=range(c(y3,c3$resid)))hist(y3, breaks=20, xlab="residuals after 3 kernels",main="", xlim=range(c(y3,c3$resid)))