### NMST543 Spatial Statistics, exercise 3 ### ############################################## library(spatstat) ######################################### ### Assessing interpoint interactions ### ######################################### # We have already talked about the K-function. We discuss some # other tools today. ### Visual inspection ### ######################### # Fry Plot - calculate pairwise differences and plot them; # Stienen diagram - plot circles with diameter = distance # to the nearest neighbour (disk not shaded # if the nearest neighbour may not be observed # due to edge-effects). plot(redwood) fryplot(redwood) # when zooming, it is clear that coordinates are rounded stienen(redwood) plot(cells) fryplot(cells) stienen(cells) plot(japanesepines) fryplot(japanesepines) stienen(japanesepines) ### Pair-correlation function ### ################################# # Two ways to estimate pcf: ?pcf.ppp # kernel smoothing ?pcf.fv # differentiation of (smoothed) estimate of K-function ?shapley plot(unmark(shapley)) # Kernel smoothing (takes a while!): g.kernel <- pcf(shapley) g.kernel plot(g.kernel) # Use estimated values of K-function (note that for # this large dataset only border correction is used): K <- Kest(shapley) g.K <- pcf(K) plot(g.K) # The previous estimate is not very nice, but we may specify # the amount of smoothing applied before differentiation: g.K2 <- pcf(K, spar=0.5) plot(g.K2) # We have in fact specified a parameter passed to the function # smooth.spline. It is used to smooth the estimated K-function # or its transformation before numerical differentiation. # For details, see the methods "a", ..., "d" in ?pcf.fv. # Distinguishing regularity and clustering (note that # the translation and Ripley corrections are available): plot(pcf(redwood)) plot(pcf(cells)) plot(pcf(japanesepines)) # Some problems with kernel estimation of pcf: # 1) estimates with r close to 0 are unreliable (presumably based on # just a few pairs of points observed at distance r; also note the # factor 1/r in the formula, see the lecture notes) plot(pcf(japanesepines)) # Instead of 1/r for all terms we could use 1/d_{ij} for each pair # of points individually, where d_{ij} is the interpoint distance. # By doing this we avoid the singularity near 0: plot(pcf(japanesepines, divisor="d")) # 2) edge effects: which correction should we use if the estimates differ? plot(pcf(redwood)) plot(pcf(cells)) # 3) choice of the smoothing kernel and its bandwidth: plot(pcf(redwood)) plot(pcf(redwood,bw=0.001)) plot(pcf(redwood,bw=0.003)) plot(pcf(redwood,bw=0.005)) plot(pcf(redwood,bw=0.007)) plot(pcf(redwood,bw=0.009)) attr(pcf(redwood),"bw") plot(pcf(redwood,kernel="epanechnikov",bw=0.009),ylim=c(0,3.5)) plot(pcf(redwood,kernel="triangular",bw=0.009),ylim=c(0,3.5)) plot(pcf(redwood,kernel="gaussian",bw=0.009),ylim=c(0,3.5)) # Conclusion: choice of suitable bandwidth is essential, # choice of the kernel type is usually not. ### Empty-space function F ### ############################## # Non-parametric estimates with different edge-corrections: # - Kaplan-Meier (black) # - reduced sample or border correction (red) # - Chiu-Stoyan (green) Fj <- Fest(japanesepines) plot(Fj) # Theoretical value for Poisson process (blue): G(r) = 1 - exp(-lambda*pi*r^2) plot(Fj, cbind(km, theo) ~ theo) # Discussion: which values indicate regularity/clustering? # Higher or lower than the values for Poisson process? ### Nearest-neighbour distance distribution function G ### ########################################################## # Non-parametric estimates with different edge-corrections: # - Kaplan-Meier (black) # - reduced sample or border correction (red) # - Hanisch (green) Gj <- Gest(japanesepines) plot(Gj) # Theoretical value for Poisson process (blue): G(r) = 1 - exp(-lambda*pi*r^2) plot(Gj, cbind(km,theo) ~ theo) # Discussion: which values indicate regularity/clustering? # Higher or lower than the values for Poisson process? ### J-function ### ################## # Deviations from the complete spatial randomness often affect the # F- and G-function in oposite ways, therefore it is useful to define # J(r) = (1-G(r)) / (1-F(r)) # Non-parametric estimates with different edge-corrections: # - Kaplan-Meier (black) # - Hanisch (red) # - reduced sample or border correction (green) Jj <- Jest(japanesepines) plot(Jj) # Theoretical value for Poisson process (blue): J(r)=1 plot(Jj, cbind(km,theo) ~ theo) # not useful here! # Discussion: which values indicate regularity/clustering? # Higher or lower than the values for Poisson process? # A rough approximation to J(r), ignoring all moments higher than # the second moment, is J(r) - 1 =(approx) -lambda*(K(r) - pi*r^2). # Hence, K(r) is in a way a second-order approximation to J(r). # It should not be too surprising if the empirical J- and K-functions # support the same conclusion about a dataset. ### Summary ### ############### # Quick plotting of (some of) the summary statistics together: plot(allstats(redwood), legend=FALSE) plot(allstats(cells), legend=FALSE) plot(allstats(japanesepines), legend=FALSE) # Discussion: do all the functions above support the same conclusion? ### Warning!!! ### ################## # Sometimes people in practice tend to use these summary statistics # uncritically and without detailed considerations. However, it is # important to realize that: # 1. the functions F, G, J and K above were defined and estimated under # the stationarity assumption, # 2. the distribution of a point process is not fully determined by them, # 3. if the distribution of the process is in fact not stationary, # the discrepancies between the empirical and the theoretical values # may not indicate the presence of interpoint interactions; they may # be caused e.g. by a non-constant intensity function. # For illustration of 2. we consider the Baddeley-Silverman proces # which has the same K-function as the Poisson process: ?rcell BS <- rcell(nx = 15) plot(BS) plot(Kest(BS)) ### Individual task ### ####################### # Discuss if and how the estimates of the K-, F- and G-functions depend on the # intensity of the process. If the intensity increases/decreases while all # the other properties of the process stay the same, how do the estimates change? # Check this also empiricaly on the three datasets above using the function "rthin". # Using that you can delete individual points of the pattern with given probability, # independently of the others.