### NMST543 Spatial Statistics, exercise 10 ### ############################################### # Today we discuss some specifics of spatial regional data. library(spdep) library(maptools) # Available datasets: data(package="spdep") # Crime data - 49 neigbourhoods in Columbus (Ohio, USA); # residential burglaries and vehicle thefts per # thousand households in the neighbourhood (+covariates): data(columbus) columbus <- readShapePoly(system.file("etc/shapes/columbus.shp",package="spdep")[1]) plot(columbus,border="grey") coords <- coordinates(columbus) points(coords,pch=19) ### Defining the neighbours relation ### ######################################## # Defining relation of being neighbours: col.gal.nb plot(col.gal.nb,coords,add=TRUE) summary(col.gal.nb) is.symmetric.nb(col.gal.nb) # How many neigbours? print(cards <- card(col.gal.nb)) # Weights, we want to study the spatial autocorrelation; # normalized weights (sum up to 1): col.w <- nb2listw(col.gal.nb,style="W") col.w$weights # binary weights: col.b <- nb2listw(col.gal.nb,style="B") col.b$weights # second-order neighbours: col.lags <- nblag(col.gal.nb, 2) plot(col.lags[[2]], coords, add=TRUE, col="red", lty=2) summary(col.lags[[2]], coords) # k nearest neigbours (in Eukleidean space): library(RANN) # 4 nearest neighbours: col.k4.nb <- knn2nb(knearneigh(coords, 4)) is.symmetric.nb(col.k4.nb) plot(columbus,border="grey") plot(col.k4.nb,coords,arrows=TRUE,add=TRUE) # Find differences between two relations: col.k1.nb <- knn2nb(knearneigh(coords, 1)) col.k2.nb <- knn2nb(knearneigh(coords, 2)) diffs <- diffnb(col.k2.nb,col.k1.nb) plot(columbus, border="grey") plot(col.k1.nb,coords,add=TRUE) plot(diffs,coords,add=TRUE,col="red",lty=2) # Relation based on Euclidean distance; # two points are neighbours if closer than 0.5: col.d.nb <- dnearneigh(coords,0,0.5) is.symmetric.nb(col.d.nb) plot(columbus,border="grey") plot(col.d.nb,coords,add=TRUE) # Delaunay triangulation: col.tri.nb <- tri2nb(coords) plot(columbus,border="grey") plot(col.tri.nb,coords,add=TRUE) # no vertex lies inside the circumscribed circle # of any triangle; # maximizes the minimum angle in the triangles # (we do not want too sharp triangles); # dual to the Voronoi tesselation. ### Continuous data ### ####################### columbus$CRIME summary(columbus$CRIME) # We assume constant mean value and constant variance! # Moran's index for normalized weights, randomization assumption: moran.test(columbus$CRIME,listw=col.w) # Moran's index for normalized weights, normality assumption: moran.test(columbus$CRIME,listw=col.w,randomisation=FALSE) # Moran's index for binary weights: moran.test(columbus$CRIME,listw=col.b) # Moran's index for normalized weights given by 4 nearest neighbours: moran.test(columbus$CRIME,listw=nb2listw(col.k4.nb)) # Geary's index for normalized weights, randomization assumption: geary.test(columbus$CRIME,listw=col.w) # Geary's index for normalized weights, normality assumption: geary.test(columbus$CRIME,listw=col.b,randomisation=FALSE) # Geary's index for binary weights: geary.test(columbus$CRIME,listw=col.b) # Geary's index for normalized weights given by 4 nearest neighbours: geary.test(columbus$CRIME,listw=nb2listw(col.k4.nb)) ### Binary data ### ################### HICRIME <- cut(columbus$CRIME, breaks=c(0,35,80), labels=c("low","high")) plot(columbus) points(coords[HICRIME=="high",],col="red",pch=19) points(coords[HICRIME=="low",],col="blue",pch=19) # BB statistics, two tests for 'high' and 'low', # are they joined more frequently than expected? joincount.test(HICRIME, listw=col.w) # The same with binary weights: joincount.test(HICRIME, listw=col.b) # The same with different relation: joincount.test(HICRIME, listw=nb2listw(col.k4.nb)) # Note that the test are performed under the assumption of # 'nonfree sampling', i.e. using the hypergeometric sampling # scheme (fixed number of red and blue points). ### Regular grid ### #################### # Neighbours on a regular grid: nb7rt <- cell2nb(7, 7) nb7rt summary(nb7rt) # Positions of points: xyc <- attr(nb7rt, "region.id") xy <- matrix(as.integer(unlist(strsplit(xyc, ":"))), ncol=2, byrow=TRUE) plot(nb7rt, xy) # Number of neigbours: print(card(nb7rt)) # Toroidal situation (no edge-effects, every point # has 4 neighbours): nb7rtt <- cell2nb(7, 7, torus=TRUE) summary(nb7rtt) print(card(nb7rtt)) # Uncorrelated data on a regular grid: data7rt <- runif(49,0,1) moran.test(data7rt,nb2listw(nb7rt)) geary.test(data7rt,nb2listw(nb7rt)) # Binary data: HIdata <- cut(data7rt, breaks=c(0,0.6,1), labels=c("low","high")) plot(nb7rt, xy) points(xy[HIdata=="high",],col="red",pch=19) points(xy[HIdata=="low",],col="blue",pch=19) # Normalized weights: col.wd <- nb2listw(nb7rt,style="W") col.wd$weights # Binary weights: col.bd <- nb2listw(nb7rt,style="B") col.bd$weights # BB statistics, two tests for 'high' and 'low', # are they joined more frequently than expected? joincount.test(HIdata, listw=col.wd) # The same with binary weights: joincount.test(HIdata, listw=col.bd) ### Individual task: ### ######################## # Decide whether significant spatial autocorrelations # are present in the dataset 'afcon' with the neighbourhood # relation 'africa.rook.nb', or 'paper.nb', respectively; # the values themselves are recorded in the vector afcon$totcon. # Conflicts in Africa: data(afcon) ?afcon plot(africa.rook.nb, afxy) plot(diffnb(paper.nb, africa.rook.nb), afxy, col="red", add=TRUE) text(afxy, labels=attr(africa.rook.nb, "region.id"), pos=4, offset=0.4) HIdata <- cut(afcon$totcon, breaks=c(0,1000,6000), labels=c("low","high")) plot(africa.rook.nb, afxy) points(afcon[,1:2][HIdata=="high",],col="red",pch=19) points(afcon[,1:2][HIdata=="low",],col="blue",pch=19)