# NMSA407 Linear Regression: Tutorial

Numeric covariate: regression splines

Data Motorcycle

## Introduction

### Load used data and calculate basic summaries

``````data(Motorcycle, package = "mffSM")
``````
``````##   time haccel
## 1  2.4    0.0
## 2  2.6   -1.3
## 3  3.2   -2.7
## 4  3.6    0.0
## 5  4.0   -2.7
## 6  6.2   -2.7
``````
``````dim(Motorcycle)
``````
``````## [1] 133   2
``````
``````summary(Motorcycle)
``````
``````##       time           haccel
##  Min.   : 2.40   Min.   :-134.00
##  1st Qu.:15.80   1st Qu.: -54.90
##  Median :24.00   Median : -13.30
##  Mean   :25.55   Mean   : -25.55
##  3rd Qu.:35.20   3rd Qu.:   0.00
##  Max.   :65.60   Max.   :  75.00
``````

## Dependence of `haccel` on `time`

### Basis scatterplot

``````par(mfrow = c(1, 1), bty = BTY, mar = c(4, 4, 1, 1) + 0.1)
plot(haccel ~ time, data = Motorcycle, pch = 21, bg = "orange", col = "red3",
xlab = "Time [ms]", ylab = "Head acceleration [g]")
``````

### Basic scatterplot equipped with LOWESS

``````par(mfrow = c(1, 1), bty = BTY, mar = c(4, 4, 1, 1) + 0.1)
plot(haccel ~ time, data = Motorcycle, pch = 21, bg = "palegoldenrod", col = "orange",
xlab = "Time [ms]", ylab = "Head acceleration [g]")
lines(with(Motorcycle, lowess(time, haccel)), col = "red3", lwd = 2)
``````

## Regression spline (cubic B-spline)

### Knots of the spline

``````knots <- c(0, 11, 12, 13, 20, 30, 32, 34, 40, 50, 60)
inner <- knots[-c(1, length(knots))]
bound <- knots[c(1, length(knots))]
``````

### B-spline basis used here

• Function `bs` comes from package `splines`.
``````library("splines")
xgrid <- seq(knots[1], knots[length(knots)], length = 500)
Bgrid <- bs(xgrid, knots = inner, Boundary.knots = bound, degree = 3, intercept = TRUE)
``````
``````COL <- rainbow_hcl(ncol(Bgrid), c = 80)
par(mfrow = c(1, 1), bty = BTY, mar = c(4, 4, 1, 1) + 0.1)
plot(xgrid, Bgrid[,1], type = "l", col = COL[1], lwd = 2, xlab = "z", ylab = "B(z)",
xlim = range(knots), ylim = range(Bgrid, na.rm = TRUE))
for (j in 2:ncol(Bgrid)) lines(xgrid, Bgrid[,j], col = COL[j], lwd = 2)
points(knots, rep(0, length(knots)), pch = 21, bg = "blue", col = "skyblue", cex = 1.5)
``````

### B-spline model matrix

``````Data <- Motorcycle
#Data <- subset(Motorcycle, time <= knots[length(knots)])    ## remove 1 observation with time = 65.6
B <- bs(Data[, "time"], knots = inner, Boundary.knots = bound, degree = 3, intercept = TRUE)
``````
``````## Warning in bs(Data[, "time"], knots = inner, Boundary.knots = bound, degree = 3, : some 'x' values
## beyond boundary knots may cause ill-conditioned bases
``````
``````round(B[1:5,], 3)
``````
``````##          1     2     3     4 5 6 7 8 9 10 11 12 13
## [1,] 0.478 0.409 0.105 0.008 0 0 0 0 0  0  0  0  0
## [2,] 0.445 0.424 0.120 0.010 0 0 0 0 0  0  0  0  0
## [3,] 0.357 0.454 0.170 0.019 0 0 0 0 0  0  0  0  0
## [4,] 0.304 0.463 0.206 0.027 0 0 0 0 0  0  0  0  0
## [5,] 0.258 0.463 0.242 0.037 0 0 0 0 0  0  0  0  0
``````

### Fit

``````m1 <- lm(haccel ~ B - 1, data = Data)
summary(m1)
``````
``````##
## Call:
## lm(formula = haccel ~ B - 1, data = Data)
##
## Residuals:
##     Min      1Q  Median      3Q     Max
## -71.857 -11.740  -0.268  10.356  55.249
##
## Coefficients:
##     Estimate Std. Error t value Pr(>|t|)
## B1   -11.614     83.252  -0.140    0.889
## B2    12.450     81.282   0.153    0.879
## B3   -13.988     45.345  -0.308    0.758
## B4     2.984     18.894   0.158    0.875
## B5     6.115     12.139   0.504    0.615
## B6  -237.283     18.850 -12.588  < 2e-16 ***
## B7    17.346     14.094   1.231    0.221
## B8    53.248     12.635   4.214 4.88e-05 ***
## B9     5.102     13.051   0.391    0.697
## B10   12.609     17.146   0.735    0.464
## B11  -21.738     23.825  -0.912    0.363
## B12   10.879     16.284   0.668    0.505
## B13    7.885     17.624   0.447    0.655
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 22.61 on 120 degrees of freedom
## Multiple R-squared:  0.8447, Adjusted R-squared:  0.8278
## F-statistic: 50.19 on 13 and 120 DF,  p-value: < 2.2e-16
``````

### Coefficient of determination

``````m0 <- lm(haccel ~ 1, data = Data)
SSe <- deviance(m1)
SST <- deviance(m0)
R2 <- 1 - SSe/SST
R2adj <- 1 - (SSe/m1\$df.residual) / (SST/m0\$df.residual)
print(R2B)
``````
``````##        R2     R2adj
## 0.8009163 0.7810079
``````

### Fitted regression function

``````pm1 <- as.numeric(Bgrid %*% coef(m1))
pm1[1:10]
``````
``````##  [1] -11.613930 -10.842009 -10.104058  -9.399422  -8.727445  -8.087471  -7.478845  -6.900910
##  [9]  -6.353011  -5.834492
``````

### Scatterplot equiped by the fitted regression function

``````par(mfrow = c(1, 1), bty = BTY, mar = c(4, 4, 1, 1) + 0.1)
plot(haccel ~ time, data = Motorcycle, pch = 21, bg = "palegoldenrod", col = "orange",
xlab = "Time [ms]", ylab = "Head acceleration [g]")
lines(xgrid, pm1, col = "red3", lwd = 2)
points(knots, rep(min(Motorcycle[, "haccel"]), length(knots)), pch = 21, bg = "blue", col = "skyblue", cex = 1.5)
``````

### Basic residual plots

• Function `plotLM` comes from package `mffSM`.
``````library("mffSM")
plotLM(m1)
``````

### Residuals versus the covariate

``````lwsRes <- lowess(Motorcycle[, "time"], residuals(m1))
par(mfrow = c(1, 1), bty = BTY, mar = c(4, 4, 1, 1) + 0.1)
plot(residuals(m1) ~ Motorcycle[, "time"], pch = 21, col = "blue4", bg = "skyblue",
xlab = "Time", ylab = "Residuals")
lines(lwsRes[["x"]], lwsRes[["y"]], col = "red", lwd = 2)
``````