library(sm) library(spatstat) library(splancs) library(spatial) library(geoR) ## 1) Enter Data ## http://www.montereybaywhalewatch.com/sightings/smap0303.htm x1 = c(0.1,0.77,.97,.99, .96,.9,.92,.81,.78,.77, .76,.755,.75, .757,.754,.745,.74, .732,.734,.736, .71,.715,.715, .68, .72,.725,.7,.707,.712,.71, .75,.76,.765,.755,.757,.756, .77,.78,.73,.73, .83) y1 = c(.92,.9,.97,.99, .7,.66,.59,.65,.7,.64, .69,.68,.675, .63,.64,.638,.625, .65,.645,.648, .64,.641,.646, .65, .61,.6,.6,.595,.59,.583, .575,.57,.565,.56,.555,.55, .555,.569,.54,.541, .35) par(mfrow=c(1,1)) plot(c(0,1),c(0,1),type="n",xlab="x-coordinate",ylab="y-coordinate", main="whales and dolphins") points(x1,y1,pch=3) n = length(x1) ######### 2) K-function & L-function: ### (Remember to LOAD SPLANCS first!!!) b1 = as.points(x1,y1) n = npts(b1) bdry = matrix(c(0,0,1,0,1,1,0,1,0,0),ncol=2,byrow=T) ## for square boundary plot(c(0,1),c(0,1),type="n",xlab="x",ylab="y") lines(bdry) points(b1,pch="*") par(mfrow=c(2,1)) ## makes a 2x1 grid of plots s = seq(.001,.3,length=50) k4 = khat(b1,bdry,s) plot(s,k4,xlab="distance",ylab="K4(h)",pch="*") lines(s,k4) lines(s,pi*s^2,lty=2) L4 = sqrt(k4/pi)-s plot(c(0,.3),range(L4),type="n",xlab="lag, h",ylab="L4(h) - h") points(s,L4,pch="*") lines(s,L4) lines(s,rep(0,50),lty=2) ### CONFIDENCE BOUNDS FOR K-FUNCTION via simulation k4conf = Kenv.csr(npts(b1), bdry, 1000, s) plot(c(0,max(s)),c(0,max(k4conf$upper,k4)), type="n",xlab="distance",ylab="K4(h)") points(s,k4,pch="*") lines(s,k4) lines(s,k4conf$upper,lty=3) lines(s,k4conf$lower,lty=3) L4upper = sqrt(k4conf$upper/pi) - s L4lower = sqrt(k4conf$lower/pi) - s plot(c(0,max(s)),c(min(L4lower,L4),max(L4upper,L4)), type="n",xlab="distance",ylab="L4(h) - h") points(s,L4,pch="*") lines(s,L4) lines(s,L4upper,lty=2) lines(s,L4lower,lty=2) ### THEORETICAL BOUNDS for L-function ## bounds = 1.96 * sqrt(2*pi*A) * h / E(N), where ## A = area of space, and ## E(N) = expected # of pts in the space (approximated here using ## the observed # of pts L4upper = 1.96 * sqrt(2*pi*1*1) * s / n L4lower = -1.0 * L4upper lines(s,L4upper,lty=3) lines(s,L4lower,lty=3) #### 3) F-function: b2 = as.ppp(b1, W = c(0,1,0,1)) ## the above convert the points into a "ppp" object, ## using as a window [0,1] x [0,1] par(mfrow=c(1,1)) f4 = Fest(b2) plot(f4) ## or to control the plot yourself, try: plot(f4$r[f4$r<.5],f4$theo[f4$r<.5],xlab="h",ylab="F(h)",type="l",lty=2) lines(f4$r[f4$r<.5],f4$rs[f4$r<.5],lty=1) legend(.2,.2,lty=c(1,2),legend=c("data","Poisson")) #### 4) G-function: g4 = Gest(b2) plot(g4) ## or to control the plot yourself, try: plot(g4$r[g4$r<.3],g4$rs[g4$r<.3],xlab="h",ylab="G(h)",type="l",lty=1) lines(g4$r[g4$r<.3],g4$theo[g4$r<.3],lty=2) legend(.2,.2,lty=c(1,2),legend=c("data","Poisson")) #### 5) J-function: j4 = Jest(b2) plot(j4) ## or to control the plot yourself, try: plot(j4$r[j4$r<.3],j4$rs[j4$r<.3],xlab="h",ylab="J(h)",type="l",lty=1) lines(j4$r[j4$r<.3],j4$theo[j4$r<.3],lty=2) legend(.2,.2,lty=c(1,2),legend=c("data","Poisson")) ### 6) Kernel smoothing, using kernel2d bdw = sqrt(bw.nrd0(x1)^2+bw.nrd0(y1)^2) ## default bandwidth z = kernel2d(b1,bdry,bdw) par(mfrow=c(1,2)) image(z,col=gray((64:20)/64),xlab="x",ylab="y") points(b1) x4 = (0:100)/100*(max(z$z)-min(z$z))+min(z$z) plot(c(0,10),c(.8*min(x4),1.2*max(x4)),type="n",axes=F,xlab="",ylab="") image(c(-1:1),x4,matrix(rep(x4,2),ncol=101,byrow=T),add=T,col=gray((64:20)/64)) text(2,min(x4),as.character(signif(min(x4),2)),cex=1) text(2,(max(x4)+min(x4))/2,as.character(signif((max(x4)+min(x4))/2,2)),cex=1) text(2,max(x4),as.character(signif(max(x4),2)),cex=1) mtext(s=3,l=-3,at=1,"Rate (pts per unit area)") ## repeat the above, trying other values of bdw, for more or less smoothing ##### 7) Quadrat counts x = matrix(0,ncol=10,nrow=10) for(i in 1:10){ for(j in 1:10){ for(k in 1:n){ if((x1[k]= (i-1)/10) && (y1[k]= (j-1)/10)) x[i,j] = x[i,j] + 1 } }} sum(x) ## should = n, which here is 43 ##### Plot the quadrat counts par(mfrow=c(1,2)) ## makes a 1x2 grid of plots on the graphic screen plot(c(0,1),c(0,1),type="n",xlab="x-coordinate",ylab="y-coordinate", main="Quadrat counts of \n Whales & Dolphins") image(x=c(0:9)/10+.05,y=c(0:9)/10+.05,z=x,col=grey(c(64:20)/64),add=T) points(x1,y1,pch=".") ######### LEGEND: zmin = min(x) zmax = max(x) zrng = zmax - zmin zmid = zmin + zrng/2 plot(c(0,10),c(zmid-2*zrng/3,zmid+2*zrng/3),type="n",axes=F,xlab="",ylab="") zgrid = seq(zmin,zmax,length=100) ## zgrid = vector of 100 equally-spaced numbers spanning range of the values. image(c(-1:1),zgrid,matrix(rep(zgrid,2),ncol=100,byrow=T),add=T,col=gray((64:20)/64)) text(2.5,zmin,as.character(signif(zmin,2)),cex=1) text(2.5,zmax,as.character(signif(zmax,2)),cex=1) text(2.5,zmid,as.character(signif(zmid,2)),cex=1) text(4.5,zmid,"Values",srt=-90) ## 8) Variogram of quadrat counts, using variog: ## first, make a list of values of the quadrat counts d1 = data.frame(x=rep(c(0:9)/10+.05,times=rep(10,10)),y=rep(c(0:9)/10+.05,10),z=c(t(x))) v1 = variog(coords = cbind(d1$x, d1$y), data = d1$z,max.dist=.4) plot(v1) v4 = variog4(coords = cbind(d1$x, d1$y), data = d1$z) plot(v4) ## 9) Non-parametric and variogram estimates, using correlogram: sill1 = var(d1$z) par(mfrow=c(2,2)) my.ls0 = surf.ls(0,d1) c1 = correlogram(my.ls0,100,plot=F) m1 = max((1:length(c1$x))[c1$x<.5]) ## to only go up to distance 0.5 in what follows a1 = seq(0.01,0.5,length=50) plot(c1$x[1:m1], 2*sill1 - 2*c1$y[1:m1] * sill1, xlab="distance, h", ylab="2gamma(h)",type="l", lty=2,main="exponential, d=5") lines(a1, 2*sill1 - 2*expcov(a1,5)*sill1) plot(c1$x[1:m1], 2*sill1 - 2*c1$y[1:m1] * sill1, xlab="distance, h", ylab="2gamma(h)",type="l", lty=2,main="exponential, d=0.5") lines(a1, 2*sill1 - 2*expcov(a1,0.5)*sill1) plot(c1$x[1:m1], 2*sill1 - 2*c1$y[1:m1] * sill1, xlab="distance, h", ylab="2gamma(h)",type="l", lty=2,main="exponential, d=0.1") lines(a1, 2*sill1 - 2*expcov(a1,0.1)*sill1) plot(c1$x[1:m1], 2*sill1 - 2*c1$y[1:m1] * sill1, xlab="distance, h", ylab="2gamma(h)",type="l", lty=2,main="exponential, d=0.01") lines(a1, 2*sill1 - 2*expcov(a1,0.01)*sill1) par(mfrow=c(2,2)) plot(c1$x[1:m1], 2*sill1 - 2*c1$y[1:m1] * sill1, xlab="distance, h", ylab="2gamma(h)",type="l", lty=2,main="Gaussian, d=5") lines(a1, 2*sill1 - 2*gaucov(a1,5)*sill1) plot(c1$x[1:m1], 2*sill1 - 2*c1$y[1:m1] * sill1, xlab="distance, h", ylab="2gamma(h)",type="l", lty=2,main="Gaussian, d=0.5") lines(a1, 2*sill1 - 2*gaucov(a1,0.5)*sill1) plot(c1$x[1:m1], 2*sill1 - 2*c1$y[1:m1] * sill1, xlab="distance, h", ylab="2gamma(h)",type="l", lty=2,main="Gaussian, d=0.1") lines(a1, 2*sill1 - 2*gaucov(a1,0.1)*sill1) plot(c1$x[1:m1], 2*sill1 - 2*c1$y[1:m1] * sill1, xlab="distance, h", ylab="2gamma(h)",type="l", lty=2,main="Gaussian, d=0.01") lines(a1, 2*sill1 - 2*gaucov(a1,0.01)*sill1) par(mfrow=c(2,2)) plot(c1$x[1:m1], 2*sill1 - 2*c1$y[1:m1] * sill1, xlab="distance, h", ylab="2gamma(h)",type="l", lty=2,main="spherical, d=5") lines(a1, 2*sill1 - 2*sphercov(a1,5)*sill1) plot(c1$x[1:m1], 2*sill1 - 2*c1$y[1:m1] * sill1, xlab="distance, h", ylab="2gamma(h)",type="l", lty=2,main="spherical, d=0.5") lines(a1, 2*sill1 - 2*sphercov(a1,0.5)*sill1) plot(c1$x[1:m1], 2*sill1 - 2*c1$y[1:m1] * sill1, xlab="distance, h", ylab="2gamma(h)",type="l", lty=2,main="spherical, d=0.1") lines(a1, 2*sill1 - 2*sphercov(a1,0.1)*sill1) plot(c1$x[1:m1], 2*sill1 - 2*c1$y[1:m1] * sill1, xlab="distance, h", ylab="2gamma(h)",type="l", lty=2,main="spherical, d=0.01") lines(a1, 2*sill1 - 2*sphercov(a1,0.01)*sill1) ## Compare all 3 with d = 0.1: par(mfrow=c(1,1)) plot(c1$x[1:m1], 2*sill1 - 2*c1$y[1:m1] * sill1, xlab="distance, h", ylab="2gamma(h)",type="l", lty=2,main="variograms, d=0.1") lines(a1, 2*sill1 - 2*expcov(a1,0.1)*sill1,col=3) lines(a1, 2*sill1 - 2*gaucov(a1,0.1)*sill1,col=4) lines(a1, 2*sill1 - 2*sphercov(a1,0.1)*sill1,col=5) legend(0.3,3,c("nonparametric","exponential", "Gaussian", "spherical"), col=c(1,3,4,5),lty=c(2,1,1,1)) ### 10) Fitting a Pseudo-Likelihood model ## I'm using the model lambda_p ( z | z_1, ..., z_k) = ## mu + alpha x + beta y + gamma SUM_{i = 1 to k} exp{-a1 D(z_i,z)} ## where z = (x,y), and where D means distance. ## So, if gamma is positive, then there is clustering; otherwise inhibition d1 = as.matrix(dist(cbind(x1,y1))) ## matrix of distances between pts f = function(p){ ## returns the negative pseudo log-likelihood ## p = (mu,alpha,beta,gamma,a1) if(p[1] < 0) return(99999) if(p[1] + p[2] < 0) return(99999) if(p[1] + p[3] < 0) return(99999) if(p[1] + p[2] + p[3] < 0) return(99999) if(p[4] < 0) return(99999) p0 = exp(- (p[1] + p[2]/2 + p[3]/2)) lam = p[1] + p[2] * x1 + p[3] * y1 for(i in 1:n){ for(j in c(1:n)[-i]){ lam[i] = lam[i] + p[4] * exp(-p[5] * d1[i,j]) } } if (min(lam) < 0) return (99999) lam2 = p[1] + p[2] * simx + p[3] * simy for(i in 1:1000){ for(j in c(1:n)){ lam2[i] = lam2[i] + p[4] * exp(-p[5] * sqrt((simx[i]-x1[j])^2+(simy[i]-y1[j])^2)) } } cat(mean(lam2),mean(lam2)-sum(log(lam)), " ",p,"\n") ## the 1st column should be roughly n when it's done return(mean(lam2)-sum(log(lam))) } simx = runif(1000) simy = runif(1000) pstart = c(1, 1, 1, 1, 1) z1 = optim(pstart,f,control=list(maxit=500)) pend = z1$par ### TO CHECK, COMPARE THE FOLLOWING: f(pstart) ## -120 f(pend) ## -283 ### the latter one should be less. pend ## interpret these parameters!!! ### 11) Plot the Model's Background Rate par(mfrow=c(1,3)) plot(c(0,1),c(0,1),type="n",xlab="x-coordinate",ylab="y-coordinate", main="background rate") x2 = seq(0.05,0.95,length=10) y2 = seq(0.05,0.95,length=10) z2 = matrix(rep(0,(10*10)),ncol=10) z3 = matrix(rep(0,(10*10)),ncol=10) for(i in 1:10){ for(j in 1:10){ z2[i,j] = pend[1] + pend[2]*x2[i] + pend[3]*y2[j] z3[i,j] = pstart[1] + pstart[2]*x2[i] + pstart[3]*y2[j] }} zmin = min(c(z2,z3)) zmax = max(c(z2,z3)) image(x2,y2,z2,col=gray((64:20)/64),zlim=c(zmin,zmax),add=T) points(x1,y1) ######### LEGEND: zrng = zmax - zmin zmid = zmin + zrng/2 plot(c(0,10),c(zmid-2*zrng/3,zmid+2*zrng/3),type="n",axes=F,xlab="",ylab="") zgrid = seq(zmin,zmax,length=100) ## zgrid = vector of 100 equally-spaced numbers spanning range of the values. image(c(-1:1),zgrid,matrix(rep(zgrid,2),ncol=100,byrow=T),add=T,col=gray((64:20)/64)) text(2.5,zmin,as.character(signif(zmin,2)),cex=1) text(2.5,zmax,as.character(signif(zmax,2)),cex=1) text(2.5,zmid,as.character(signif(zmid,2)),cex=1) text(4.5,zmid,"Values",srt=-90) plot(c(0,1),c(0,1),type="n",xlab="x-coordinate",ylab="y-coordinate", main="original guess") image(x2,y2,z3,col=gray((64:20)/64),zlim=c(zmin,zmax),add=T) points(x1,y1) ### 12) PLOT LAMBDA_p par(mfrow=c(1,3)) plot(c(0,1),c(0,1),type="n",xlab="x-coordinate",ylab="y-coordinate", main="lambda_p") x2 = seq(0.05,0.95,length=10) y2 = seq(0.05,0.95,length=10) zz2 = matrix(rep(0,(10*10)),ncol=10) zz3 = matrix(rep(0,(10*10)),ncol=10) for(i in 1:10){ for(j in 1:10){ zz2[i,j] = pend[1] + pend[2] * x2[i] + pend[3] * y2[j] zz3[i,j] = pstart[1] + pstart[2] * x2[i] + pstart[3] * y2[j] for(k in c(1:n)){ zz2[i,j] = zz2[i,j] + pend[4] * exp(-pend[5] * sqrt((x2[i]-x1[k])^2+(y2[j]-y1[k])^2)) zz3[i,j] = zz3[i,j] + pstart[4] * exp(-pstart[5] * sqrt((x2[i]-x1[k])^2+(y2[j]-y1[k])^2)) } } } zmin = min(c(zz2,zz3)) zmax = max(c(zz2,zz3)) image(x2,y2,zz2,col=gray((64:20)/64),zlim=c(zmin,zmax),add=T) points(x1,y1) ######### LEGEND: zrng = zmax - zmin zmid = zmin + zrng/2 plot(c(0,10),c(zmid-2*zrng/3,zmid+2*zrng/3),type="n",axes=F,xlab="",ylab="") zgrid = seq(zmin,zmax,length=100) ## zgrid = vector of 100 equally-spaced numbers spanning range of the values. image(c(-1:1),zgrid,matrix(rep(zgrid,2),ncol=100,byrow=T),add=T,col=gray((64:20)/64)) text(2.5,zmin,as.character(signif(zmin,2)),cex=1) text(2.5,zmax,as.character(signif(zmax,2)),cex=1) text(2.5,zmid,as.character(signif(zmid,2)),cex=1) text(4.5,zmid,"Values",srt=-90) plot(c(0,1),c(0,1),type="n",xlab="x-coordinate",ylab="y-coordinate", main="original guess") image(x2,y2,zz3,col=gray((64:20)/64),zlim=c(zmin,zmax),add=T) points(x1,y1)