### NMST543 Spatial Statistics, exercise 6 ### ############################################## library(spatstat) ######################################### ### Point patterns on linear networks ### ######################################### # Multidimensional point pattern - street crimes in Chicago # (close to the University of Chicago - you may try # Google Maps to see that the road system is "really there") ?chicago X <- chicago uX <- unmark(chicago) plot(uX, chars=19, main="") uX plot(X, chars=19, cols=1:7, show.window=FALSE, main="") plot(split(chicago),mar.panel=c(0,0,0,0),chars=19) # How to treat such a dataset? # Our approach: the network is fixed, non-random; # positions and the number of observed points # on the network are random. plot(uX$domain, main="") summary(uX$domain) plot(uX, chars=19, main="", show.window=FALSE) # Setting up a point process on a linear network: # - finite number of line segments of finite length # (the whole network is contained in a compact set) # - any two segments can have at most one point in common, # and it has to be the end-point # - sigma-algebra of subsets of the linear network is the # trace of the Borel sigma-algebra in R^2 # - consider locally finite subsets of the linear network # - sigma-algebra on the system of locally finite subsets # of the network is defined similarly to the planar case. # In this approach we assume we observe the whole network, # not a part of a bigger network. # Hence we do not have to worry about edge-effects etc., # we do not miss any information. # Note: it is not appropriate to talk about stationarity! ### Estimating intensity ### ############################ # Constant intensity: mean number of point per unit length int.uX <- intensity(uX) int.uX intensity(as.ppp(uX)) # Non-constant intensity: kernel estimation plot(spiders) plot(density(spiders, 40), style="width", adjust=5) ?density.lpp # From now we consider first- and second-order homogeneity. # What does a Poisson process on the network look like? Xp <- rpoislpp(lambda=int.uX, L=uX$domain) Xp plot(Xp, chars=19, main="") par(mfrow=c(1,2), mar=c(0,0,2,0)) plot(uX, chars=19, main="Data", show.window=FALSE) plot(Xp, chars=19, main="Poisson", show.window=FALSE) dev.off() # Is it a Poisson process as we know it? No! plot(as.ppp(Xp), chars=19, main="") # Envelope tests can reveal the difference: set.seed(123456) env.pois <- envelope(as.ppp(Xp), fun=Kest, do.pwrong=TRUE, savepatterns=TRUE, savefuns=TRUE, nsim=3999) plot(env.pois) # the data curve leaves the envelope? yes attr(env.pois,"pwrong") # p-value of the global test = 0.0270 mad.test(env.pois) # p-value = 0.0005 dclf.test(env.pois) # p-value = 0.0005 set.seed(123457) env.pois2 <- envelope(as.ppp(Xp), fun=Kest, do.pwrong=TRUE, savepatterns=TRUE, savefuns=TRUE, nsim=3999) plot(env.pois2) # the data curve leaves the envelope? yes, but very little attr(env.pois2,"pwrong") # p-value of the global test = 0.0225 install.packages("devtools") library(devtools) install_github('myllym/spptest', username = 'myllym', ref = 'no_fastdepth') library(spptest) res <- rank_envelope(env.pois,alpha=0.05) # 95% envelope plot(res) # the data curve leaves the envelope # The problem is that we simulated realizations of the Poisson # process in plane, not on the network: env.pois.l <- envelope(as.ppp(Xp), simulate=expression(as.ppp(rpoislpp(lambda=int.uX, L=uX$domain))), fun=Kest, do.pwrong=TRUE, savepatterns=TRUE, savefuns=TRUE, nsim=3999) plot(env.pois.l) # the data curve leaves the envelope? attr(env.pois.l,"pwrong") # p-value of the global test = 0.0185 mad.test(env.pois.l) # p-value = 0.637 dclf.test(env.pois.l) # p-value = 0.862 res.l <- rank_envelope(env.pois.l,alpha=0.05) # 95% envelope plot(res.l) # the data curve does not leave the envelope # The result of the testing is that we do not reject. # However, is the K-function we used so far really an appropriate # characteristic of a point process on a linear network? ### K-functions on the linear network ### ######################################### # Problem: the Euclidean distance between a pair of points does not # really make sense when assessing their interaction! # We can define so-called "network distance" as the length # of the shortest path between the points along the network. # The corresponding characteristic is then the "network K-function" # linearK. # This illustrates the fact that for different application it may be relevant # to consider different variants of the K-function such as # - the directional K-function (one or two circular sectors instead of the disc) # - cyllindrical K-function (for 3D data) # - etc. ?linearK plot(linearK(Xp)) plot(linearK(uX)) # The original, uncorrected version (Okabe) of the network K-function was # (up to a constant) the mean number of pairs of points which are # network-closer than r. # Disadvantage of the Okabe network K-function: even for the Poisson process # the theoretical values depend on the geometry of the network. Xp2 <- rpoislpp(lambda=0.005, L=spiders$domain) plot(Xp2, chars=19) plot(linearK(Xp2,correction="none")) plot(linearK(Xp,correction="none")) # A solution to this problem is the so-called Ang geometric correction. # Define the "network r-neighbourhood" of a point as the set of all the # points of the network which are network-closer than r. For different # points of the network the size of the network r-neigbourhood (the total # length of the relevant segments) is different. # This cannot happen in the planar case since the area of the ball # with radius r is the same in all parts of the plane. # The geometric correction consists in weighting the contribution of each # pair of points x, y by the size of the network r-neigbourhood of point x # where r = network distance between x, y. # For a Poisson process the theoretical value of the corrected network K-function # is K_{net}(r) = r no matter the shape and geometry of the network. # Hence we can compare tendencies towards regularity or clustering # for patterns observed on different networks. plot(uX$domain, lwd=2) lineardisc(uX$domain, x=c(639,1190), r=300) Xp2 <- rpoislpp(lambda=0.005, L=spiders$domain) plot(linearK(Xp2,correction="Ang")) plot(linearK(Xp,correction="Ang")) ### Individual task: ### ######################## # Determine if the "chicago" dataset corresponds # to a homogeneous Poisson process.