
### NMST543 Spatial Statistics, exercise 5 ###
##############################################


library(spatstat)

# Today we discuss some basic approaches for fitting
# parametric models of point processes.


### Minimum contrast ###
########################

# Clustered point process models can be fitted to an observed
# point pattern using the minimum contrast method ("kppm" command).
# By default, the K-function is used.

# We fit a Thomas process model to the "redwood" dataset:
plot(redwood)
fit1 <- kppm(redwood, ~1, "Thomas")
fit1
plot(fit1)

# First argument of "kppm" is the point pattern;
# second argument is a formula for the logarithm of the intensity function
#   (~1 corresponds to stationarity);
# third argument specifies the type of interaction.

# Estimated parameters (mu computed from the total number of points,
# it does not appear in the formula for the K-function):
fit1$modelpar

# We can use different edge-correction or specify other settings for "Kest":
fit2 <- kppm(redwood, ~1, "Thomas", statargs=list(correction="translate"))
fit2$modelpar

# We fit a Matern cluster process model:
fitM <- kppm(redwood, ~1, "MatClust")
plot(fitM)
fitM$modelpar

# The result of the fitting function is an object of class "kppm".
# We can simulate easily from the fitted model (using simulate.kppm):
plot(simulate(fit1, nsim = 4))

# Also, envelopes corresponding to the fitted model can be computed
# (using envelope.kppm):
plot(envelope(fit1, Lest, nsim = 39))

# The minimum contrast method can be based on other characteristics such as
# the pair-correlation function:
fitp <- kppm(redwood, ~1, "Thomas", statistic = "pcf")
plot(fitp)
fitp$modelpar

# Detailed information on the settings of the estimation procedure
# (integration range, exponents p, q):
fitp$Fit

# We can specify these settings, e.g. based on some recommendation
# in the literature (Diggle's book):
fitp2 <- kppm(redwood, ~1, "Thomas", statistic = "pcf", q=1/2)
fitp2$modelpar
fitp2$Fit

fitp3 <- kppm(redwood, ~1, "Thomas", statistic = "pcf", q=1/2, rmax=0.15)
fitp3$modelpar
fitp3$Fit

# The minimum contrast method can be used also for other models for which
# an analytic formula for the (theoretical) K-function or pair-correlation
# function is known, as a function of some parameters, at least in an integral form.
# E.g. the log-Gaussian Cox process with an exponential covariance function:
?lgcp.estpcf  # min. contrast with the pair-correlation function
?lgcp.estK    # min. contrast with the K-function

# In this case we have to specify the initial value for the optimization procedure:
fitLGCP <- lgcp.estK(redwood, c(sigma2 = 0.1, alpha = 1))
fitLGCP$modelpar

fitLGCP2 <- lgcp.estpcf(redwood, c(sigma2 = 0.1, alpha = 1), rmin=0.01, rmax=0.25)
fitLGCP2$modelpar

### Composite likelihood & Palm likelihood ###
##############################################

# For cluster point processes also the composite likelihood method is available:
fitCL1 <- kppm(redwood, ~1, "Thomas", method="clik2")
fitCL1$modelpar

fitCL2 <- kppm(redwood, ~1, "Thomas", method="clik2", rmax=0.20)
fitCL2$modelpar

fitCL3 <- kppm(redwood, ~1, "Thomas", method="clik2", rmax=0.15)
fitCL3$modelpar

# The Palm likelihood method can be used, too:
?kppm


### Pseudolikelihood ###
########################

# Approximation to the MLE based on the Papangelou conditional intensity
# (for a Poisson process it is precisely MLE).

# We consider a finite point process with a density w.r.t. a unit-rate
# Poisson process, on a compact window, which has a "reasonable" form
# of the Papangelou conditional intensity.

# To estimate model parameters we use "ppm(X, ~ terms, V)".

# First argument is the point pattern;
# second argument describes the first-order terms in the potential;
# third argument is an object of class "interact" and describes
#   the second-order interactions.

# We fitt a Strauss process model to the "cells" dataset
# (assuming we know the interaction range r):
fitCELLS <- ppm(cells, ~1, Strauss(r = 0.1))
fitCELLS

# One can also consider inhomogeneity in the first-order terms,
# e.g. log-linear dependence on the coordinates:
fitCELLS2 <- ppm(cells, ~x + y, Strauss(r = 0.1))
fitCELLS2

# We can simulate from the fitted model using "rmh"
# (involves an MCMC chain):
Xsim <- rmh(fitCELLS)

par(mfrow=c(1,2), mar=c(1,1,1,1))
plot(cells, main = "Cells")
plot(Xsim, main = "Simulation")
dev.off()

# The estimated model is probably not completely wrong,
# however, the interaction distance could be higher:
min(nndist(cells))
par(mfrow=c(1,2))
plot(Kest(Xsim, correction="translate"))
plot(Kest(cells, correction="translate"))
dev.off()

# Plotting envelopes:
env1 <- envelope(fitCELLS, nsim = 99, do.pwrong=TRUE)
env1
plot(env1)

# The nuisance parameters such as the range of interactions "r"
# for the Strauss process cannot be estimated directly using "ppm".
# Even the theory considering estimation of such parameters is 
# not well developed or satisfactory.

# In some special cases we know the maximum likelihood estimate of
# the nuisance parameter. E.g. for the hard-core process (Strauss process
# with interaction parameter gamma=0) the nuisance parameter is the
# range "r" and its MLE is the minimum observed interpoint distance.

# Hence, it might be reasonable to the following for the cells dataset:
rhat <- min(nndist(cells))
rhat <- rhat * 0.99999
ppm(cells, ~1, Strauss(r = rhat))

# We can also use so-called profile pseudolikelihood (looking for the value
# of the nuisance parameter on some grid), where for each considered value of
# the nuisance parameter we maximize the pseudolikelihood w.r.t. the other,
# regular parameters.
data(simdat)                                     # simulated dataset
df <- data.frame(r = seq(0.05, 2, by = 0.025))   # we specify the values of "r"
df
pfit <- profilepl(df, Strauss, simdat, ~1)       # the actual estimation

# Summary:
pfit
plot(pfit)


# The best available parameter estimate:
pfit$fit
env2 <- envelope(pfit$fit, nsim = 99, do.pwrong=TRUE)
env2
plot(env2)



### Individual task: ###
########################

# Fit a Strauss process model to the "swedishpines" dataset and determine
# using the envelope and/or the deviation test (dclf.test, mad.test) whether
# the dataset follows the fitted model.
plot(swedishpines)
?swedishpines
