####### # Access school data to be read by program on page 137-Hoff’s book ######### # Use the dget command the author has in this web site. # http://www.stat.washington.edu/hoff/Book/Data/data/chapter8.r # This reads an R object of class matrix with two variables # Y.school.mathscore<-dget("http://www.stat.washington.edu/~hoff/Book/Data/data/Y.school.mathscore") ######## Rename the data set Y = Y.school.mathscore ##### # Some descriptive plots of the data to get an idea of variability # First, boxplots (without ranking as in Fig 8.4) # Then histogram of means and scatter plot of sample size and sample mean # (as in Fig 8.5 boxplot(Y[,2]~Y[,1]) # see boxplots of scores in all schools m<-length(unique(Y[,1])) # finds out m (number of schools) ybar=n=NULL for(j in 1:m) { ybar[j]<-mean(Y[Y[,1]==j,2]) # put sample mean (ybar) of each school n[j]<-sum(Y[,1]==j) # put sample size of each school } # see programs for other plots in the author's web site. ##### #### MCMC analysis for school data ###### Program on page 137. ## with some comments # Model in the chapter and the lecture notes: # y_ij ~ N(thetaj, sigma^2) # theta_j ~ N(mu, tau^2) # mu ~ N(mu_0, gamma_0^2) # 1/tau^2 ~ Gamma(eta_0/2, eta_0 tau_0^2/2) # 1/sigma^2 ~Gamma(nu_0/2, nu_0 sigma_0^2/2) # Model with names given in the code # y_ij ~ N(theta[j], sigma2) # theta[j] ~ N(mu, tau2) # mu ~ N(mu0=50, g20=25) # 1/tau^2 ~ Gamma((eta0=1)/2,(eta0=1)(t20=100)/2) # 1/sigma^2 ~Gamma((nu0=1)/2, (nu0=1) (s20=100)/2) # ### weakly informative priors nu0<-1 ; s20<-100 # parameter values in dist of 1/sigma^2 eta0<-1 ; t20<-100 # parameter values in dist of 1/tau^2 mu0<-50 ; g20<-25 # parameter values in dist of mu ### ### starting values m<-length(unique(Y[,1])) # finds out m (number of schools) n<-sv<-ybar<-rep(NA,m) # create vectors for n_j, sv_j, ybar_j for(j in 1:m) { ybar[j]<-mean(Y[Y[,1]==j,2]) # put sample mean (ybar) of each school sv[j]<-var(Y[Y[,1]==j,2]) # put variance (sv) of each school n[j]<-sum(Y[,1]==j) # put sample size of each school } par(mfrow=c(1,2)) hist(ybar,xlab="sample mean", main="Empirical distribution of sample means") plot(n,ybar,xlab="sample size",ylab="sample mean", main="relationship between sample mean and sample size") # After copy pasting the picture, type dev.off() to close the window. #### set theta_j^(0), sigma^2(0),tau^2(0), mu(0), all hyperparameters theta<-ybar # for init values of thetaj in each school, use ybar_j sigma2<-mean(sv) # for init sigma^2 use mean of variances of all schools mu<-mean(theta) # for init for mu use the mean of the ybars tau2<-var(theta) # for init for tau^2 use the variance of the y-bars ### setup MCMC S<-5000 # we will do 5000 iterations THETA<-matrix( nrow=S,ncol=m) # 5000 iterations of theta_j j =1-9 MST<-matrix( nrow=S,ncol=3) # 5000 iterations of mu,tau^2, sigma^2 ### ### MCMC algorithm ### The loops here are based on the conditional distributions ## See class notes or full conditional on pages 134 and 135 of ### Hoff’s book for(s in 1:S) # Repeat S times { # sample new values of the thetas from conditional for theta_j for(j in 1:m) { vtheta<-1/(n[j]/sigma2+1/tau2) # var of condit. dist of theta_j etheta<-vtheta*(ybar[j]*n[j]/sigma2+mu/tau2) #mean of dist theta_j theta[j]<-rnorm(1,etheta,sqrt(vtheta)) # draw random number of theta_j } #sample new value of sigma2 from conditional for sigma^2 nun<-nu0+sum(n) ss<-nu0*s20 for(j in 1:m){ss<-ss+sum((Y[Y[,1]==j,2]-theta[j])^2)} sigma2<-1/rgamma(1,nun/2,ss/2) # draw sigma^2 #sample a new value of mu from conditional for mu vmu<- 1/(m/tau2+1/g20) #variance of mu emu<- vmu*(m*mean(theta)/tau2 + mu0/g20) # mean of mu mu<-rnorm(1,emu,sqrt(vmu)) # draw random number from dist of mu # sample a new value of tau2 etam<-eta0+m ss<- eta0*t20 + sum( (theta-mu)^2 ) tau2<-1/rgamma(1,etam/2,ss/2) #store results THETA[s,]<-theta # each row of THETA matrix contains 100 thetas MST[s,]<-c(mu,sigma2,tau2) # each row of MST matrix contains a mu, a sigma^2 and a tau^2 } ###