
### NMST543 Spatial Statistics, exercise 6 ###
##############################################


library(spatstat)

#########################################
### Point patterns on linear networks ###
#########################################

# Multidimensional point pattern - street crimes in Chicago
# (close to the University of Chicago - you may try 
# Google Maps to see that the road system is "really there")
?chicago
X <- chicago
uX <- unmark(chicago)

plot(uX, chars=19, main="")
uX

plot(X, chars=19, cols=1:7, show.window=FALSE, main="")
plot(split(chicago),mar.panel=c(0,0,0,0),chars=19)

# How to treat such a dataset?

# Our approach: the network is fixed, non-random;
#               positions and the number of observed points
#               on the network are random.
plot(uX$domain, main="")
summary(uX$domain)
plot(uX, chars=19, main="", show.window=FALSE)

# Setting up a point process on a linear network:
# - finite number of line segments of finite length
#   (the whole network is contained in a compact set)
# - any two segments can have at most one point in common,
#   and it has to be the end-point
# - sigma-algebra of subsets of the linear network is the
#   trace of the Borel sigma-algebra in R^2
# - consider locally finite subsets of the linear network
# - sigma-algebra on the system of locally finite subsets
#   of the network is defined similarly to the planar case.

# In this approach we assume we observe the whole network,
# not a part of a bigger network.
# Hence we do not have to worry about edge-effects etc.,
# we do not miss any information.

# Note: it is not appropriate to talk about stationarity!


### Estimating intensity ###
############################

# Constant intensity: mean number of point per unit length
int.uX <- intensity(uX)
int.uX
intensity(as.ppp(uX))

# Non-constant intensity: kernel estimation
plot(spiders)
plot(density(spiders, 40), style="width", adjust=5)
?density.lpp

# From now we consider first- and second-order homogeneity.
# (What is the precise meaning?)

# What does a Poisson process on the network look like?
Xp <- rpoislpp(lambda=int.uX, L=uX$domain)
Xp
plot(Xp, chars=19, main="")

par(mfrow=c(1,2), mar=c(0,0,2,0))
plot(uX, chars=19, main="Data", show.window=FALSE)
plot(Xp, chars=19, main="Poisson", show.window=FALSE)
dev.off()

# Is it a Poisson process as we know it? No!
plot(Xp, chars=19, main="Poisson", show.window=FALSE)
plot(as.ppp(Xp), chars=19, main="")

# Envelope tests can reveal the difference:
set.seed(12345)
env.pois <- envelope(as.ppp(Xp), fun=Kest, do.pwrong=TRUE, savepatterns=TRUE, savefuns=TRUE, nsim=3999)
plot(env.pois)           # the data curve leaves the envelope? yes, but very little
attr(env.pois,"pwrong")  # significance level of the global test = 0.0313
mad.test(env.pois)       # p-value = 0.00050
dclf.test(env.pois)      # p-value = 0.00075


library(GET)
res <- global_envelope_test(env.pois,alpha=0.05)  # 95% envelope
plot(res) # the data curve leaves the envelope, p-value = 0.006


# The problem is that we simulated realizations of the Poisson
# process in plane, not on the network:
env.pois.l <- envelope(as.ppp(Xp), simulate=expression(as.ppp(rpoislpp(lambda=int.uX, L=uX$domain))), fun=Kest, do.pwrong=TRUE, savepatterns=TRUE, savefuns=TRUE, nsim=3999)
plot(env.pois.l)           # the data curve leaves the envelope? no
attr(env.pois.l,"pwrong")  # significance level of the global test = 0.0198
mad.test(env.pois.l)       # p-value = 0.487
dclf.test(env.pois.l)      # p-value = 0.534

res.l <- global_envelope_test(env.pois.l,alpha=0.05)  # 95% envelope
plot(res.l) # the data curve does not leave the envelope, p-value = 0.089

# The result of the testing is that we do not reject.
# However, is the K-function we used so far really an appropriate
# characteristic of a point process on a linear network?


### K-functions on the linear network ###
#########################################

# Problem: the Euclidean distance between a pair of points does not
#          really make sense when assessing their interaction!

#          We can define so-called "network distance" as the length
#          of the shortest path between the points along the network.
#          The corresponding characteristic is then the "network K-function"
#          linearK.

# This illustrates the fact that for different application it may be relevant
# to consider different variants of the K-function such as
# - the directional K-function (one or two circular sectors instead of the disc)
# - cyllindrical K-function (for 3D data)
# - etc.

?linearK
plot(linearK(Xp))
plot(linearK(uX))

# The original, uncorrected version (Okabe) of the network K-function was
# (up to a constant) the mean number of pairs of points which are
# network-closer than r.

# Disadvantage of the Okabe network K-function: even for the Poisson process
# the theoretical values depend on the geometry of the network.
Xp2 <- rpoislpp(lambda=0.005, L=spiders$domain)
plot(Xp2, chars=19)
plot(linearK(Xp2,correction="none"), xlim=c(0,1000))
plot(linearK(Xp,correction="none"), xlim=c(0,1000))

# A solution to this problem is the so-called Ang geometric correction.
# Define the "network r-neighbourhood" of a point as the set of all the
# points of the network which are network-closer than r. For different
# points of the network the size of the network r-neigbourhood (the total
# length of the relevant segments) is different.

# This cannot happen in the planar case since the area of the ball
# with radius r is the same in all parts of the plane.

# The geometric correction consists in weighting the contribution of each
# pair of points x, y by the number of points on the boundary of the
# network r-neigbourhood of point x, where r = network distance between x, y.
# For a Poisson process the theoretical value of the corrected network K-function
# is K_{net}(r) = r no matter the shape and geometry of the network.
# Hence we can compare tendencies towards regularity or clustering
# for patterns observed on different networks.

plot(uX$domain, lwd=2)
lineardisc(uX$domain, x=c(639,1190), r=300)

Xp2 <- rpoislpp(lambda=0.005, L=spiders$domain)
plot(linearK(Xp2,correction="Ang"), xlim=c(0,1000))
plot(linearK(Xp,correction="Ang"), xlim=c(0,1000))


### Individual task: ###
########################

# Determine if the "chicago" dataset corresponds
# to a homogeneous Poisson process.
