### NMST543 Spatial Statistics, exercise 2 ###
##############################################


library(spatstat)

###########################################
### K-function for stationary processes ###
###########################################

# K-function defined for stationary processes.

# lambda*K(r) = expected number of additional points within
# distance r > 0 from a typical point of the process.

?japanesepines
plot(japanesepines)
Kjap <- Kest(japanesepines) # non-parametric estimation
plot(Kjap)                  # plot with recommended range of r
plot(Kjap, legend=FALSE)


### Regularity vs. clustering ###
#################################

plot(redwood)
plot(cells)
plot(japanesepines)

plot(Kest(redwood), legend=FALSE)      
plot(Kest(cells), legend=FALSE)
plot(Kest(japanesepines), legend=FALSE)


### Discussion - edge-effects ###
#################################

# Edge-effects constitute a form of sampling bias (pairs of
# points with larger distance are less likely to be observed).

# Border correction (or minus sampling) discards part of the data.

# Alternatively, Horvitz-Thompson principle can be used to assign
# weights to observed pairs of points in order to remove the 
# sampling bias (Ripley's isotropic correction or translation correction).

# By default all the three corrections are used: 
#   iso    lty=1 (solid line), col=1 (black)
#   trans  lty=2 (dashed line), col=2 (red)
#   border lty=3 (dotted line), col=3 (green)

# Theoretical (benchmark) value for spatial Poisson process is K(r) = pi*r^2:
#   theo   lty=4 (dash-dotted line), col=4 (blue)

# The estimated functions do not differ much from the theoretical one.
# Conclusion: complete spatial randomness?

# Estimates of K(r) calculated on a finite discrete grid of r values
# (available in the vector Kjap$r). Estimated values are stored in
# Kjap$iso, Kjap$trans, Kjap$border (note also Kjap$theo).
Kjap$r
Kjap$theo
Kjap$iso

# One can of course choose just one type of correction:
Kjap_iso <- Kest(japanesepines,correction="isotropic")  
plot(Kjap_iso)
# Or more than one:
Kjap_tb <- Kest(japanesepines,correction=c("translate","border"))
plot(Kjap_tb)
# Also the uncorrected estimate can be computed (correction="none"):
plot(Kest(japanesepines,correction=c("none","translate","isotropic")))

# Plotting transformed values of the estimates:
plot(Kjap, trans ~ r)
plot(Kjap, iso - pi*r^2 ~ r)
plot(Kjap,cbind(iso,theo) ~ theo)
# so-called L-function, L(r) = L(r) = sqrt( K(r) / pi ):
plot(Kjap, sqrt(./pi) ~ r)


### Discussion - choosing edge-correction ###
#############################################

# Using SOME edge-correction is essential, WHICH one is usually not:
sim.pois <- rpoispp(200)
plot(Kest(sim.pois, correction="none"), legend="FALSE")
plot(Kest(sim.pois, correction=c("none","isotropic")), legend="FALSE")

# However, border correction is not suitable for patterns with low
# number of points - it discards too much information. Isotropic or
# translation correction more useful in such a case.

# Border correction does not even secure monotonicity of the estimates:
plot(Kest(rpoispp(20), correction="border"), legend=FALSE)

# For high number of points (thousands to tens of thousands) border correction
# usually works satisfactorily, amount of discarded information may be
# negligible (depending on the size of W and the scale of r).

# For huge datasets (milions of points) it is advisable from the computational
# point of view NOT to use any edge-correction. The bias should be negligible.

# Large discrepancies between the different estimates (different edge-corrections)
# indicate something unusual in the data, e.g. violation of the stationarity assumption.
sim.pois <- rpoispp(200)
plot(sim.pois, pch=20)
plot(Kest(sim.pois), legend=FALSE)
sim.pois2 <- rpoispp(lambda=function(x,y){400*x})
plot(sim.pois2, pch=20)
plot(Kest(sim.pois2), legend=FALSE)


### Discussion - variance of estimators ###
###########################################

# The variance of hat(K)(r) grows with r, for Poisson process:
Kp <- Kest(sim.pois)
plot(Kp, legend=FALSE)

env <- envelope(sim.pois)
plot(env, legend=FALSE)
# We apply transformation to get more clear picture:
plot(env, .-pi*r^2~r, legend=FALSE)

# This is the reason for using the L-function: L(r) = sqrt( K(r) / pi );
# for planar Poisson process K(r) = pi r^2, and hence L(r) = r.
plot(env, sqrt(./pi)~r, legend=FALSE)
# Centered L-function:
plot(env, sqrt(./pi)-r~r, legend=FALSE)

# The variance is stabilized now, the deviations from the theoretical values
# now have "same weight" for the whole range of values of r.


### Discussion - range of r values ###
######################################

# Estimates of K-function should not be used for analysis if r is "too big".
# The increments of hat(K(r)) are calculated from low number of observed
# pairs of points with high weights, i.e. estimation is unstable.

# Spatstat library uses a rule of thumb to decide what is "recommended range"
# of r values and refuses to plot longer range. It's not a bug, it's a feature.

# The description gives "Recommended range" and "Available range":
Kest(sim.pois,correction=c("none","isotropic"))
plot(Kest(sim.pois,correction=c("none","isotropic")), legend=FALSE)

# One may force the software to calculate estimates for large values of r:
pts <- 3*Kest(sim.pois,correction=c("none","isotropic"))$r
Kest(sim.pois,correction=c("none","isotropic"),r=pts)
plot(Kest(sim.pois,correction=c("none","isotropic"),r=pts), legend=FALSE)

# And then force it even harder to plot the estimates:
plot(Kest(sim.pois,correction=c("none","isotropic"),r=pts),xlim=c(0,0.75), legend=FALSE)


### Conclusion ###
##################

# 1) edge-corrections work well even in complicated situations,
#    it is appropriate to use them;

# 2) it is necessary to determine the "correct" range of r values
#    to be used in the analysis.