library(sm) library(spatstat) library(splancs) library(spatial) library(geoR) ## 1) Enter Data ## http://www.montereybaywhalewatch.com/sightings/smap0303.htm x1 = c(0.1,0.77,.97,.99, .96,.9,.92,.81,.78,.77, .76,.755,.75, .757,.754,.745,.74, .732,.734,.736, .71,.715,.715, .68, .72,.725,.7,.707,.712,.71, .75,.76,.765,.755,.757,.756, .77,.78,.73,.73, .83) y1 = c(.92,.9,.97,.99, .7,.66,.59,.65,.7,.64, .69,.68,.675, .63,.64,.638,.625, .65,.645,.648, .64,.641,.646, .65, .61,.6,.6,.595,.59,.583, .575,.57,.565,.56,.555,.55, .555,.569,.54,.541, .35) z1 = c(7,3,5,8,7, 2,7,5,4,5, 3,3,9,4,5, 3,2,4,1,8, 3,7,6,8,4, 4,4,8,9,8, 4,8,5,7,2, 4,3,5,8,9, 1) z2 = (z1 - min(z1))/(max(z1)-min(z1)) ## rescaled to [0,1]. par(mfrow=c(1,1)) plot(c(0,1),c(0,1),type="n",xlab="x-coordinate",ylab="y-coordinate", main="whales and dolphins") points(x1,y1,pch=1,cex=1+3*z2) n = length(x1) ### 2) Marked J-function: b2 = as.ppp(cbind(x1,y1), W = c(0,1,0,1)) b2$marks = z1 b2$n = n ## the above convert the points into a "marked ppp" object, ## using as a window [0,1] x [0,1] par(mfrow=c(1,1)) jm4 = Jmulti(b2, b2$marks < 4.5, b2$marks > 5.5) plot(jm4) ## or to control the plot yourself, try: plot(jm4$r[jm4$r<.5],jm4$rs[jm4$r<.5],xlab="h",ylab="J(h)",type="l",lty=1) lines(jm4$r[jm4$r<.5],jm4$theo[jm4$r<.5],lty=2) legend(.2,.2,lty=c(1,2),legend=c("data","Poisson")) ## try playing around with this, for different classes of marks instead ## of < 4.5, and > 5.5 ######### 3) Marked K-function & L-function: km4 = Kmulti(b2, b2$marks < 4.5, b2$marks > 5.5) plot(km4$r[km4$r<.3],km4$border[km4$r<.3],xlab="h",ylab="K(h)",type="l",lty=1) lines(km4$r[km4$r<.3],km4$theo[km4$r<.3],lty=2) legend(.2,.2,lty=c(1,2),legend=c("data","Poisson")) Lm4 = sqrt(km4$border[km4$r<.3]/pi)-km4$r[km4$r<.3] plot(c(0,.3),range(Lm4),type="n",xlab="lag, h",ylab="L4(h) - h") points(km4$r[km4$r<.3],Lm4,pch="*") lines(km4$r[km4$r<.3],Lm4) abline(h=0,lty=2) ### THEORETICAL BOUNDS for L-function ## bounds = 1.96 * sqrt(2*pi*A) * h / E(N), where ## A = area of space, and ## E(N) = expected # of pts of type j in the space (approximated here using ## the observed # of pts of type j s = km4$r[km4$r<.3] L4upper = 1.96 * sqrt(2*pi*1*1) * s / sum(b2$marks > 5.5) L4lower = -1.0 * L4upper lines(s,L4upper,lty=3) lines(s,L4lower,lty=3) ### 4) Kernel smoothing, using bandwidth h par(mfrow=c(1,2)) h = .05 ## h should usually be a fraction (roughly 1/4 or so) ## of the range of your x-coordinates or y-coordinates ## you can use bw.nrd0(x1) or bw.nrd0(y1)as a guide n1 = 20 mygrid1 = seq(0,1,length=n1) mygrid2 = seq(0,1,length=n1) a1 = matrix(0,ncol=n1,nrow=n1) for(i in 1:n1){ for(j in 1:n1){ a1[i,j] = sum(z1 * dnorm( sqrt((mygrid1[i]-x1)^2 + (mygrid2[j]-y1)^2),sd=h)) / sum(dnorm(sqrt((mygrid1[i]-x1)^2 + (mygrid2[j]-y1)^2),sd=h)) } } image(mygrid1,mygrid2,a1,xlab="x",ylab="y",zlim=range(z1), col=grey(c(64:20)/64)) ## legend x = z1 zmin = min(x) zmax = max(x) zrng = zmax - zmin zmid = zmin + zrng/2 plot(c(0,10),c(zmid-2*zrng/3,zmid+2*zrng/3),type="n",axes=F,xlab="",ylab="") zgrid = seq(zmin,zmax,length=100) ## zgrid = vector of 100 equally-spaced numbers spanning range of the values. image(c(-1:1),zgrid,matrix(rep(zgrid,2),ncol=100,byrow=T),add=T, zlim=range(z1),col=grey((64:20)/64)) text(2.5,zmin,as.character(signif(zmin,2)),cex=1) text(2.5,zmax,as.character(signif(zmax,2)),cex=1) text(2.5,zmid,as.character(signif(zmid,2)),cex=1) text(4.5,zmid,"Values",srt=-90) ##### 5) Quadrat totals x = matrix(0,ncol=10,nrow=10) for(i in 1:10){ for(j in 1:10){ for(k in 1:n){ if((x1[k]= (i-1)/10) && (y1[k]= (j-1)/10)) x[i,j] = x[i,j] + z1[k] } }} ## can check that sum(x) should = sum(z1) ##### Plot the quadrat counts par(mfrow=c(1,2)) ## makes a 1x2 grid of plots on the graphic screen plot(c(0,1),c(0,1),type="n",xlab="x-coordinate",ylab="y-coordinate", main="Quadrat counts of \n Whales & Dolphins") image(x=c(0:9)/10+.05,y=c(0:9)/10+.05,z=x,col=grey(c(64:20)/64),add=T) points(x1,y1,pch=1,cex=1+3*z2) ######### LEGEND: zmin = min(x) zmax = max(x) zrng = zmax - zmin zmid = zmin + zrng/2 plot(c(0,10),c(zmid-2*zrng/3,zmid+2*zrng/3),type="n",axes=F,xlab="",ylab="") zgrid = seq(zmin,zmax,length=100) ## zgrid = vector of 100 equally-spaced numbers spanning range of the values. image(c(-1:1),zgrid,matrix(rep(zgrid,2),ncol=100,byrow=T),add=T,col=gray((64:20)/64)) text(2.5,zmin,as.character(signif(zmin,2)),cex=1) text(2.5,zmax,as.character(signif(zmax,2)),cex=1) text(2.5,zmid,as.character(signif(zmid,2)),cex=1) text(4.5,zmid,"Values",srt=-90) ## 6) Variogram of the list data directly ## (or can substitute quadrat counts) v1 = variog(coords = cbind(x1,y1), data = z1,max.dist=.4) plot(v1) v4 = variog4(coords = cbind(x1,y1), data = z1) plot(v4) ## 7) Non-parametric and variogram estimates of the quadrats, using correlogram: d1 = data.frame(x=rep(c(0:9)/10+.05,times=rep(10,10)),y=rep(c(0:9)/10+.05,10),z=c(t(x))) sill1 = var(d1$z) par(mfrow=c(2,2)) my.ls0 = surf.ls(0,d1) c1 = correlogram(my.ls0,100,plot=F) m1 = max((1:length(c1$x))[c1$x<.5]) ## to only go up to distance 0.5 in what follows a1 = seq(0.01,0.5,length=50) plot(c1$x[1:m1], 2*sill1 - 2*c1$y[1:m1] * sill1, xlab="distance, h", ylab="2gamma(h)",type="l", lty=2,main="exponential, d=5") lines(a1, 2*sill1 - 2*expcov(a1,5)*sill1) plot(c1$x[1:m1], 2*sill1 - 2*c1$y[1:m1] * sill1, xlab="distance, h", ylab="2gamma(h)",type="l", lty=2,main="exponential, d=0.5") lines(a1, 2*sill1 - 2*expcov(a1,0.5)*sill1) plot(c1$x[1:m1], 2*sill1 - 2*c1$y[1:m1] * sill1, xlab="distance, h", ylab="2gamma(h)",type="l", lty=2,main="exponential, d=0.1") lines(a1, 2*sill1 - 2*expcov(a1,0.1)*sill1) plot(c1$x[1:m1], 2*sill1 - 2*c1$y[1:m1] * sill1, xlab="distance, h", ylab="2gamma(h)",type="l", lty=2,main="exponential, d=0.01") lines(a1, 2*sill1 - 2*expcov(a1,0.01)*sill1) par(mfrow=c(2,2)) plot(c1$x[1:m1], 2*sill1 - 2*c1$y[1:m1] * sill1, xlab="distance, h", ylab="2gamma(h)",type="l", lty=2,main="Gaussian, d=5") lines(a1, 2*sill1 - 2*gaucov(a1,5)*sill1) plot(c1$x[1:m1], 2*sill1 - 2*c1$y[1:m1] * sill1, xlab="distance, h", ylab="2gamma(h)",type="l", lty=2,main="Gaussian, d=0.5") lines(a1, 2*sill1 - 2*gaucov(a1,0.5)*sill1) plot(c1$x[1:m1], 2*sill1 - 2*c1$y[1:m1] * sill1, xlab="distance, h", ylab="2gamma(h)",type="l", lty=2,main="Gaussian, d=0.1") lines(a1, 2*sill1 - 2*gaucov(a1,0.1)*sill1) plot(c1$x[1:m1], 2*sill1 - 2*c1$y[1:m1] * sill1, xlab="distance, h", ylab="2gamma(h)",type="l", lty=2,main="Gaussian, d=0.01") lines(a1, 2*sill1 - 2*gaucov(a1,0.01)*sill1) par(mfrow=c(2,2)) plot(c1$x[1:m1], 2*sill1 - 2*c1$y[1:m1] * sill1, xlab="distance, h", ylab="2gamma(h)",type="l", lty=2,main="spherical, d=5") lines(a1, 2*sill1 - 2*sphercov(a1,5)*sill1) plot(c1$x[1:m1], 2*sill1 - 2*c1$y[1:m1] * sill1, xlab="distance, h", ylab="2gamma(h)",type="l", lty=2,main="spherical, d=0.5") lines(a1, 2*sill1 - 2*sphercov(a1,0.5)*sill1) plot(c1$x[1:m1], 2*sill1 - 2*c1$y[1:m1] * sill1, xlab="distance, h", ylab="2gamma(h)",type="l", lty=2,main="spherical, d=0.1") lines(a1, 2*sill1 - 2*sphercov(a1,0.1)*sill1) plot(c1$x[1:m1], 2*sill1 - 2*c1$y[1:m1] * sill1, xlab="distance, h", ylab="2gamma(h)",type="l", lty=2,main="spherical, d=0.01") lines(a1, 2*sill1 - 2*sphercov(a1,0.01)*sill1) ## Compare all 3 with d = 0.05: par(mfrow=c(1,1)) plot(c1$x[1:m1], 2*sill1 - 2*c1$y[1:m1] * sill1, xlab="distance, h", ylab="2gamma(h)",type="l", lty=2,main="variograms, d=0.05") lines(a1, 2*sill1 - 2*expcov(a1,0.05)*sill1,col=3) lines(a1, 2*sill1 - 2*gaucov(a1,0.05)*sill1,col=4) lines(a1, 2*sill1 - 2*sphercov(a1,0.05)*sill1,col=5) legend(0.3,3,c("nonparametric","exponential", "Gaussian", "spherical"), col=c(1,3,4,5),lty=c(2,1,1,1)) ### 8) Fitting a Pseudo-Likelihood model ## I'm using the model lambda_p ( s | s_1, ..., s_k) = ## mu + alpha x + beta y + gamma SUM_{i = 1 to k} exp{-a1 * z_i * D(s_i,s)} ## where s = (x,y), and where D means distance. ## So, if gamma is positive, then there is clustering; otherwise inhibition d1 = as.matrix(dist(cbind(x1,y1))) ## matrix of distances between pts f = function(p){ ## returns the negative pseudo log-likelihood ## p = (mu,alpha,beta,gamma,a1) if(p[1] < 0) return(99999) if(p[1] + p[2] < 0) return(99999) if(p[1] + p[3] < 0) return(99999) if(p[1] + p[2] + p[3] < 0) return(99999) if(p[4] < 0) return(99999) p0 = exp(- (p[1] + p[2]/2 + p[3]/2)) lam = p[1] + p[2] * x1 + p[3] * y1 for(i in 1:n){ for(j in c(1:n)[-i]){ lam[i] = lam[i] + p[4] * exp(-p[5] * z1[i] * d1[i,j]) } } if (min(lam) < 0) return (99999) lam2 = p[1] + p[2] * simx + p[3] * simy for(i in 1:155){ for(j in c(1:n)){ lam2[i] = lam2[i] + p[4] * exp(-p[5] * z1[j] * sqrt((simx[i]-x1[j])^2+(simy[i]-y1[j])^2)) } } cat(mean(lam2),mean(lam2)-sum(log(lam)), " ",p,"\n") ## the 1st column should be roughly n when it's done return(mean(lam2)-sum(log(lam))) } simx = runif(155) simy = runif(155) pstart = c(1, 1, 1, 1, 1) fit1 = optim(pstart,f,control=list(maxit=10)) pend = fit1$par ### TO CHECK, COMPARE THE FOLLOWING: f(pstart) f(pend) ### the latter one should be less. pend ## interpret these parameters!!! ### 9) Plot the Model's Background Rate par(mfrow=c(1,3)) plot(c(0,1),c(0,1),type="n",xlab="x-coordinate",ylab="y-coordinate", main="background rate") x2 = seq(0.05,0.95,length=10) y2 = seq(0.05,0.95,length=10) zz2 = matrix(rep(0,(10*10)),ncol=10) z3 = matrix(rep(0,(10*10)),ncol=10) for(i in 1:10){ for(j in 1:10){ zz2[i,j] = pend[1] + pend[2]*x2[i] + pend[3]*y2[j] z3[i,j] = pstart[1] + pstart[2]*x2[i] + pstart[3]*y2[j] }} zmin = min(c(zz2,z3)) zmax = max(c(zz2,z3)) image(x2,y2,zz2,col=gray((64:20)/64),zlim=c(zmin,zmax),add=T) points(x1,y1,pch=1,cex=1+3*z2) ######### LEGEND: zrng = zmax - zmin zmid = zmin + zrng/2 plot(c(0,10),c(zmid-2*zrng/3,zmid+2*zrng/3),type="n",axes=F,xlab="",ylab="") zgrid = seq(zmin,zmax,length=100) ## zgrid = vector of 100 equally-spaced numbers spanning range of the values. image(c(-1:1),zgrid,matrix(rep(zgrid,2),ncol=100,byrow=T),add=T,col=gray((64:20)/64)) text(2.5,zmin,as.character(signif(zmin,2)),cex=1) text(2.5,zmax,as.character(signif(zmax,2)),cex=1) text(2.5,zmid,as.character(signif(zmid,2)),cex=1) text(4.5,zmid,"Values",srt=-90) plot(c(0,1),c(0,1),type="n",xlab="x-coordinate",ylab="y-coordinate", main="original guess") image(x2,y2,z3,col=gray((64:20)/64),zlim=c(zmin,zmax),add=T) points(x1,y1,pch=1,cex=1+3*z2) ### 10) PLOT LAMBDA_p par(mfrow=c(1,3)) plot(c(0,1),c(0,1),type="n",xlab="x-coordinate",ylab="y-coordinate", main="lambda_p") x2 = seq(0.05,0.95,length=10) y2 = seq(0.05,0.95,length=10) zz2 = matrix(rep(0,(10*10)),ncol=10) zz3 = matrix(rep(0,(10*10)),ncol=10) for(i in 1:10){ for(j in 1:10){ zz2[i,j] = pend[1] + pend[2] * x2[i] + pend[3] * y2[j] zz3[i,j] = pstart[1] + pstart[2] * x2[i] + pstart[3] * y2[j] for(k in c(1:n)){ zz2[i,j] = zz2[i,j] + pend[4] * exp(-pend[5] * z1[k] * sqrt((x2[i]-x1[k])^2+(y2[j]-y1[k])^2)) zz3[i,j] = zz3[i,j] + pstart[4] * exp(-pstart[5] * z1[k] * sqrt((x2[i]-x1[k])^2+(y2[j]-y1[k])^2)) } } } zmin = min(c(zz2,zz3)) zmax = max(c(zz2,zz3)) image(x2,y2,zz2,col=gray((64:20)/64),zlim=c(zmin,zmax),add=T) points(x1,y1,pch=1,cex=1+3*z2) ######### LEGEND: zrng = zmax - zmin zmid = zmin + zrng/2 plot(c(0,10),c(zmid-2*zrng/3,zmid+2*zrng/3),type="n",axes=F,xlab="",ylab="") zgrid = seq(zmin,zmax,length=100) ## zgrid = vector of 100 equally-spaced numbers spanning range of the values. image(c(-1:1),zgrid,matrix(rep(zgrid,2),ncol=100,byrow=T),add=T,col=gray((64:20)/64)) text(2.5,zmin,as.character(signif(zmin,2)),cex=1) text(2.5,zmax,as.character(signif(zmax,2)),cex=1) text(2.5,zmid,as.character(signif(zmid,2)),cex=1) text(4.5,zmid,"Values",srt=-90) plot(c(0,1),c(0,1),type="n",xlab="x-coordinate",ylab="y-coordinate", main="original guess") image(x2,y2,zz3,col=gray((64:20)/64),zlim=c(zmin,zmax),add=T) points(x1,y1,pch=1,cex=1+3*z2)