### --------------------------------------------------------------------------------------###
### Winter Term 2019/2020                                                                 ###
### NMSA407 - Linear Regression                                                           ###
###                                                                                       ###
### EXERCISE #6  Additivity and partial effects of covariates                             ###
###              (Simple linear regression models with interactions)                      ###
###                                                                                       ###
### --------------------------------------------------------------------------------------###


# In K4/K10/K11 you might not be able to load the package 'mffSM' because of a low 
# version of R software (< 3.2.0). In this case tyou can run R software from the 
# network disc L, where the version 3.2.2. is installed. The path is
#  L:\Statistika\R\bin\x64\Rgui.exe .                                              


### ======================
### Working directory
### ======================
setwd("H:/nmsa407_LinRegr/")

library("mffSM")

### ============================
### Data we will be working on
### ============================
data(Dris, package = "mffSM")
# help("Dris")

head(Dris)
summary(Dris)

### =======================================================
### As in one of the previous exercise classes we will be 
### interested in the dependence of yield on the amount of 
### magnesium yield ~ Mg but now we include also other 
### measurements to improve our model. 
### =======================================================

### Add transformations of Mg, Ca, N to the dataset 
Dris <- transform(Dris, lMg = log2(Mg), lMg2 = (log2(Mg))^2, lCa = log2(Ca), 
                  lCa2 = (log2(Ca))^2, lN = log2(N), lN2=(log2(N))^2)


### First, start with adding Ca in the form of log2(Ca)

### Let start with a simple additive model 
summary(m1_addit <- lm(yield ~ lMg + lCa, data = Dris))

### QUESTIONS:
### Interpret the estimated regression coefficients. 
### What is being tested on the rows started with 'lMg' and 'lCa'?
### Interpret the test on the last line of the output started with 'F-statistic'.

### Submodel 1: yield ~ lMg
m1_p1 = lm(yield ~ lMg, data = Dris)
anova(m1_p1,m1_addit)
summary(m1_addit)$coef[3,3]^2

### Submodel 2: yield ~ lCa
m1_p2 = lm(yield ~ lCa, data = Dris)
anova(m1_p2,m1_addit)
summary(m1_addit)$coef[2,3]^2

### Submodel 3: yield ~ 1
m1_p3 = lm(yield ~ 1, data = Dris)
anova(m1_p3,m1_addit)
summary(m1_addit)$fstat

### Compare the above output with the following model output:
summary(m1_addit)
summary(lm(yield ~ lMg, data = Dris))

### QUESTION:
### What is the difference in interpretation of the regression coefficients? 


### Interpret the following models that include also the quadratic term of lMg2.
summary(m20 <- lm(yield ~ lMg + lMg2, data = Dris))
summary(m21 <- lm(yield ~ lMg + lMg2 + lCa, data = Dris)) 
summary(m22 <- lm(yield ~ lMg + lMg2 + lCa + lCa2, data = Dris)) 

### The change of lMg by +log2(1.1) <=> the increase of Mg by 10%
eps <- 0.1;
delta <- log2(1+eps);
lMg.grid <- seq(min(Dris$lMg), max(Dris$lMg)-delta, length = 1000); 
effect.lMg <- function(b1,b2) b1*delta + b2*(2*delta*lMg.grid + delta^2); 

### Plotting the effect of lMg on the yield 
plot(lMg.grid, effect.lMg(coef(m20)['lMg'], coef(m20)['lMg2']), col = "blue", xlab = "log2(Mg)", ylab = "Change of expected yield", main = "The effect of 10%-increase of Mg on the yield", type="l", lwd=2, ylim=range(effect.lMg(coef(m20)['lMg'], coef(m20)['lMg2']), effect.lMg(coef(m21)['lMg'], coef(m21)['lMg2']), effect.lMg(coef(m22)['lMg'], coef(m22)['lMg2'])));
lines(lMg.grid, effect.lMg(coef(m21)['lMg'], coef(m21)['lMg2']), lwd=2, col="red")
lines(lMg.grid, effect.lMg(coef(m22)['lMg'], coef(m22)['lMg2']), lwd=2, col="yellow", lty="dotted") 
abline(h=0, lty="dotted")
legend("topright", col=c("blue", "red", "yellow"), lwd=c(2,2,2), lty=c("solid", "solid", "dotted"), legend=c("m20", "m21", "m22"), cex=1.2)
rug(log2(Dris$Mg+runif(nrow(Dris))-0.5));

### coefficients of the models: compare with the plot
coefs = rbind(c(coef(m20),0,0),c(coef(m21),0),coef(m22))
rownames(coefs) = c("m20","m21","m22")
colnames(coefs) = names(coef(m22))
coefs

### Plotting the effect of Mg on the yield 
Mg.grid <- seq(min(Dris$Mg), max(Dris$Mg)-1, length = 1000); 
effect.Mg <- function(b1,b2) b1*(log2(Mg.grid+1) - log2(Mg.grid)) + b2*((log2(1+Mg.grid))^2 - (log2(Mg.grid))^2); 

plot(Mg.grid, effect.Mg(coef(m20)['lMg'], coef(m20)['lMg2']), col = "blue", xlab = "Mg", ylab = "Change of expected yield", main = "The effect of 10%-increase of Mg on the yield", type="l", lwd=2, ylim=range(effect.Mg(coef(m20)['lMg'], coef(m20)['lMg2']), effect.Mg(coef(m21)['lMg'], coef(m21)['lMg2']), effect.Mg(coef(m22)['lMg'], coef(m22)['lMg2'])));
lines(Mg.grid, effect.Mg(coef(m21)['lMg'], coef(m21)['lMg2']), lwd=2, col="red")
lines(Mg.grid, effect.Mg(coef(m22)['lMg'], coef(m22)['lMg2']), lwd=2, col="yellow", lty="dotted")
abline(h=0, lty="dotted")
legend(15, 0.3, col=c("blue", "red", "yellow"), lwd=c(2,2,2), lty=c("solid", "solid", "dotted"), legend=c("m20", "m21", "m22"), cex=1.2)
rug(Dris$Mg+runif(nrow(Dris))-0.5);


### Assume that Ca and Mg influence only the expectation of 'yield' and the linear model holds. 
### How would you test that given 'Ca' the amount of 'yield' is independent of 'Mg'? 
### That is we would like to test that L(Y|Ca) is independent with L(Mg|Ca). 
### (Here L means "Law", i.e. the probability distribution.)
summary(m01 <- lm(yield ~ lCa, data = Dris)) 
summary(m02 <- lm(yield ~ lCa+lCa2, data = Dris)) 

# test of model H0: yield ~ lCa + lCa2 against H: yield ~ lCa + lCa2 + lMg + lMg2
anova(m02, m22)
# we reject H0, i.e. the larger model is needed

# test of model H0: yield ~ lCa against H: yield ~ lCa + lMg + lMg2
anova(m01, m21)
# again, we reject H0, i.e. yield depends on lMg given lCa

### Task: How would you test that given 'Mg' the amount of 'yield' is independent of 'Ca'? 


### Nest: we add variable lN into the model
summary(m211 <- lm(yield ~ lMg + lMg2 + lCa + lN, data = Dris)) 

### QUESTIONS:
### Can we say that the amount of 'yield' is independent of 'Ca'? 
### => un-conditional analysis, we ask directly about the relation of yield and lCa
summary(m01)
summary(m02)
### visualisation
plot(yield~lCa, data=Dris, col="orange", pch=21, bg = "red", main="Marginal relation of yield and lCa")
abline(m01, col="darkgreen", lwd=2)
lCa.grid = seq(min(Dris$lCa),max(Dris$lCa),length=1000)
lines(lCa.grid,m02$coef[1]+m02$coef[2]*lCa.grid+m02$coef[3]*lCa.grid^2, col="coral", lwd=3)
legend("topright", c("m01","m02"), lwd=c(2,3), col=c("darkgreen","coral"))

### Can we say that given 'Mg' and 'Ca' the amount of 'yield' is independent of 'N'? 
summary(m211)
anova(m21,m211)
  
### Can we say that given 'N' and 'Ca' the amount of 'yield' is independent of 'Mg'?  
summary(m011 <- lm(yield ~ lCa + lN, data = Dris)) 
anova(m011, m211)

### Interactions
### ------------
### A simple interaction model 
summary(m1_inter <- lm(yield ~ lMg + lCa + lMg:lCa, data = Dris))

### Interaction columns are given as the product of corresponding main effect columns
head(X<-model.matrix(m1_inter))
all.equal(X[,2]*X[,3],X[,4])
  
### visulalisation of the fitter regression surface - contours
lMg.grid <- seq(min(Dris$lMg), max(Dris$lMg), length = 100); 
lCa.grid <- seq(min(Dris$lCa), max(Dris$lCa), length = 100);  
Yhat = outer(lMg.grid,lCa.grid, function(x,y) predict(m1_inter,newdata=data.frame(lMg=x,lCa=y)))
contour(lMg.grid,lCa.grid,Yhat,xlab="log(Mg)",ylab="log(Ca)")
title("Contours of the fitted \n regression surface (with interactions)")
cols = gray.colors(nrow(Dris), start = 0.1, end = 0.5, gamma = 2.2, alpha = NULL)[round(rank(Dris$yield))]
points(Dris$lMg, Dris$lCa,cex=Dris$yield/3, col=cols,pch=16)
###
### bigger and lighter dots represent larger values of observed yield
 
### the same figure for the model without interactions 
Yhat = outer(lMg.grid,lCa.grid, function(x,y) predict(m1_addit,newdata=data.frame(lMg=x,lCa=y)))
contour(lMg.grid,lCa.grid,Yhat,xlab="log(Mg)",ylab="log(Ca)")
title("Contours of the fitted \n regression surface (without interactions)")
points(Dris$lMg, Dris$lCa,cex=Dris$yield/3, col=cols,pch=16)

### QUESTIONS: 
### What is the interpretation of the regression coefficients?
### How would you quantify the effect of Mg on yield (when taking Ca into account)? 

eps <- 0.1;
delta <- log2(1+eps);

par(mfrow = c(2, 1))
plot(lCa.grid, delta*coef(m1_inter)['lMg'] + delta*coef(m1_inter)['lMg:lCa'] * lCa.grid, col = "blue", xlab = "log2(Ca)", ylab = "Change of expected yield", main = "The effect of 10%-increase of Mg \n on the yield", type="l", lwd=2)
Ca.grid <- seq(min(Dris$Ca), max(Dris$Ca), length = 1000)
plot(Ca.grid, coef(m1_inter)['lMg'] * (log2(Mg.grid+1)-log2(Mg.grid)) + coef(m1_inter)['lMg:lCa'] * (log2(Mg.grid+1)-log2(Mg.grid)) * log2(Ca.grid), col = "blue", xlab = "Ca", ylab = "Change of expected yield", main = "The effect of increas of Mg by +1 \n on the yield", type="l", lwd=2)
par(mfrow = c(1, 1))

summary(m1_inter)
### test whether the interaction is significant
anova(m1_inter,m1_addit)
### test whether lCa is significant
anova(m1_inter,m10<-lm(yield~lMg, data = Dris))


###  Data from the American Association of University Professors
###  (AAUP) annual faculty salary survey of American colleges and
###  universities, 1995.
###
###  Data are from 1161 colleges obtained (mostly) by a census method.

### Data
data(AAUP, package = "mffSM")

head(AAUP)
summary(AAUP)

###  Model for dependence of salary of the associate professor [salary.assoc]
###  on the following variables:
###  * number of full professors [n.prof]
###  * number of associate professors [n.assoc]
###  * number of assistant professors [n.assist]
###  * type of the institution [type]
###
### Our main aim will be to have a model which sufficiently well
### expresses E(salary.assoc|...) such that inference concerning
### its dependence on considered factors can be done.

### Create a subset dataset containing only the needed
### or otherwise interesting variables
aaup <- subset(AAUP, select = c("FICE", "name", "state", "type", "n.prof", "n.assoc", "n.assist", "salary.assoc"))
summary(aaup)

### Descriptive statistics for the quantitative variables to be analyzed,
### also categorized by type

TABS <- list()
for (v in c("salary.assoc", "n.prof", "n.assoc", "n.assist")){
  MeanType <- tapply(aaup[, v], aaup[, "type"], mean, na.rm = TRUE)
  MedType  <- tapply(aaup[, v], aaup[, "type"], median, na.rm = TRUE)
  SDType   <- tapply(aaup[, v], aaup[, "type"], sd, na.rm = TRUE)    
  Q1Type   <- tapply(aaup[, v], aaup[, "type"], quantile, prob = 0.25, na.rm = TRUE)
  Q3Type   <- tapply(aaup[, v], aaup[, "type"], quantile, prob = 0.75, na.rm = TRUE)    
  IQRType  <- tapply(aaup[, v], aaup[, "type"], IQR, na.rm = TRUE)  
  NAsType <- tapply(is.na(aaup[, v]), aaup[, "type"], sum)
  
  Mean <- mean(aaup[, v], na.rm = TRUE)
  Med  <- median(aaup[, v], na.rm = TRUE)  
  SD   <- sd(aaup[, v], na.rm = TRUE)
  Q1   <- quantile(aaup[, v], prob = 0.25, na.rm = TRUE)
  Q3   <- quantile(aaup[, v], prob = 0.75, na.rm = TRUE)  
  IQR  <- IQR(aaup[, v], na.rm = TRUE)
  NAs <- sum(is.na(aaup[, v]))

  TABS[[v]] <- data.frame(Mean   = round(c(Mean, MeanType), 1),
                          SD     = round(c(SD, SDType), 1),
                          Median = round(c(Med, MedType), 1),
                          Q1     = round(c(Q1, Q1Type), 1),
                          Q3     = round(c(Q3, Q3Type), 1),
                          IQR    = round(c(IQR, IQRType), 1),
                          NAs    = round(c(NAs, NAsType), 1))
  rownames(TABS[[v]]) <- c("All", "I", "IIA", "IIB")
  colnames(TABS[[v]])[7] <- "NA's"
}  

print(TABS)


### Frequency table for the categorical variable
table(aaup[, "type"])
prop.table(table(aaup[, "type"]))
round(prop.table(table(aaup[, "type"])) * 100, 1)


### FYI only ###
### LaTeX tables. Perhaps some additional editing
### towards getting aesthetically sufficiently nice
### tables is still needed!
###
###  TO BE EXPLORED IN MORE DETAIL AT HOME
###  (IF BEING INTERESTED).
###
###  DURING THE EXERCISE CLASS, ONLY QUICKLY RUN IT
###  AND ADMIRE THE OUTPUT.
 
library("xtable")

### All choices default
print(xtable(TABS[["salary.assoc"]]))

### Some settings changed
print(xtable(TABS[["salary.assoc"]], align = c("l", rep("r", 7)), digits = c(0, rep(1, 6), 0)),
      floating = FALSE, hline.after = c(-1, -1, 0, 1, 4, 4))
print(xtable(TABS[["n.prof"]], align = c("l", rep("r", 7)), digits = c(0, rep(1, 6), 0)),
      floating = FALSE, hline.after = c(-1, -1, 0, 1, 4, 4))
print(xtable(TABS[["n.assoc"]], align = c("l", rep("r", 7)), digits = c(0, rep(1, 6), 0)),
      floating = FALSE, hline.after = c(-1, -1, 0, 1, 4, 4))
print(xtable(TABS[["n.assist"]], align = c("l", rep("r", 7)), digits = c(0, rep(1, 6), 0)),
      floating = FALSE, hline.after = c(-1, -1, 0, 1, 4, 4))


### Frequency table
(tTAB <- table(aaup[, "type"]))
(pTAB <- format(round(prop.table(tTAB) * 100, 1), nsmall = 1))
(tpTAB <- paste(tTAB, " (", pTAB, "\\%)", sep = ""))
names(tpTAB) <- names(tTAB)
print(tpTAB)

texTAB <- paste("\\begin{tabular}{rrr}\n",
                "\\hline\\hline\n",
                paste(paste("\\textbf{", names(tpTAB), "}", sep = ""), collapse = " & "), "\\\\\n",
                "\\hline\n",
                paste(tpTAB, collapse = " & "), "\\\\\n",
                "\\hline\\hline\n",
                "\\end{tabular}\n\n", sep = "")
cat(texTAB)
### End of LaTeX corner.
##################################################################################################


### Scatterplot matrix for quantitative variables at hand
palette(c("darkblue", "red3", "olivedrab", rainbow_hcl(5)))
scatterplotMatrix(~salary.assoc + n.prof + n.assoc + n.assist,
                  reg.line = FALSE, smooth = FALSE, spread = TRUE, diagonal = "histogram", data = aaup, pch = 16, cex=0.25)

### Boxplots of salary.assoc by type
COL3 <- rainbow_hcl(3)
plot(salary.assoc ~ type, data = aaup, col = COL3, xlab = "Type of institution", ylab = "Salary associate professor [USD 100]")

### Scatterplots of Y versus all quantitative x's while distinguishing the points by type
XLAB <- c("Num. of professors", "Num. of assoc. prof.", "Num. of assistants")
names(XLAB) <- c("n.prof", "n.assoc", "n.assist")

BCOL3 <- rainbow_hcl(3)
COL3 <- c("red4", "darkgreen", "blue4")
PCH3 <- c(21, 22, 23)
names(COL3) <- names(BCOL3) <- names(PCH3) <- levels(aaup[, "type"])
layout(matrix(c(rep(1:3, each = 1), 4), nrow = 1))
for (v in c("n.prof", "n.assoc", "n.assist")){
  plot(formula(paste("salary.assoc ~", v)), data = aaup, pch = PCH3[type], col = COL3[type], bg = BCOL3[type],
       xlab = XLAB[v], ylab = "Salary [USD 100]", cex=1.25)
}
plot.new()
legend(0.1, 0.9, legend = levels(aaup[, "type"]), title = "Type", pch = PCH3, col = COL3, pt.bg = BCOL3, cex=1.25)
par(mfrow = c(1, 1))

### QUESTIONS:
### In case we would consider each quantitative regressor separately, 
###    would a simple line be a useful model?
### Is it likely that type of institution must be taken into account?
### Do you expect normal and/or homoscedastic errors?


### In the following, we will fit several models,
### not all necessarily including all considered covariates.
### To make those models comparable (by F-tests etc.) we
### create a dataset containing only those institutions
### for which all needed variables are available.
### Consequently, all fitted models will be based on
### the same dataset.
Data <- subset(aaup, complete.cases(aaup[, c("salary.assoc", "n.prof", "n.assoc", "n.assist", "type")]))
nrow(Data)
nrow(aaup)

### Which institutions are lost?
whichREM <- with(aaup, !complete.cases(aaup[, c("salary.assoc", "n.prof", "n.assoc", "n.assist", "type")]))
subset(aaup, whichREM)


### Simple additive model to start with ... 
summary(m1orig <- lm(salary.assoc ~ type + n.prof + n.assoc + n.assist, data = Data));

### QUESTIONS: 
### Interpret the estimated regression coefficients. What do the p-values in the last column of the table tell us?

# for instance, the interpretation of the coefficient at n.prof in the two models:
summary(lm(salary.assoc~n.prof, data = Data))
summary(m1orig)

### Based on the above output compare the salary of assoc. professor 
### at type IIA school and type IIB school. 
  
m1orig
###  let's take the conditional expectations for:
###  (arbitrary but fixed numbers follow)
n.p = 10                      # number of professors
n.a1 = -17.2*pi               # number of associate professors
n.a2 = 19                     # number of assistant professors
## point estimates of the conditional expectations for type IIA and type IIB schools
lIIA = c(1,1,0,n.p,n.a1,n.a2)
lIIB = c(1,0,1,n.p,n.a1,n.a2)
lIIA - lIIB
## the difference does not depend on the the number of professors, assoc. profs, 
### or assist. profs at all
## <= A consequence of additivity

l <- c(0,1,-1,0,0,0);
LSest(m1orig, l)

### Can you explain how is it possible that the Std. error in the above output 
### is smaller than Std. Errors in summary(m1orig) at rows starting by 'typeIIA' 
### and 'typeIIB'?

summary(m1orig)$coef

### compare with the following output
vcov(m1orig)
cov2cor(vcov(m1orig))
sqrt(l%*%vcov(m1orig)%*%l)
sqrt(vcov(m1orig)[2,2]+vcov(m1orig)[3,3]-2*vcov(m1orig)[2,3])
    
### Can we conclude that hiring one more professor would increase 
### the salary of associate professors? 

coef(m1orig)
### To assess significance, test of a submodel
m_sub = lm(salary.assoc ~ type + n.assoc + n.assist, data = Data)
anova(m_sub,m1orig)
summary(m1orig)
### alternatively, directly testing about the regression coefficient
### one-sided test
### H0: beta_n.prof <= 0 against H1: beta_n.prof > 0
LSest(m1orig,L=c(0,0,0,1,0,0),alternative="greater")

###  (Overall) medians of n.prof, n.assist are 40,
###  for n.assoc it is 38 (which is roughly 40 as well).
TABS

###
###  To get better interpretation of the regression coefficients
###  we create new covariates n.prof40, n.assoc40, n.assist40
###  obtained from subtracting 40 from the original ones.
###  "40" versions will then be used in models.
###
###  Which regression coefficients become influenced by this
###  transformation?

Data <- transform(Data, n.prof40 = n.prof - 40,
                        n.assoc40 = n.assoc - 40,
                        n.assist40 = n.assist - 40)

### Simple additive model with transformed variables 
m1 <- lm(salary.assoc ~ type + n.prof40 + n.assoc40 + n.assist40, data = Data)
summary(m1)

### QUESTIONS: 
### Interpret the estimated regression coefficients. 
### Which parameters have different interpretation in models m1 and m1orig?

summary(m1orig)$coef
summary(m1)$coef

### Model m1 is just a reparametrization of m1orig
l = c(1,0,0,-40,-40,-40)
# intercept in m1orig
coef(m1)%*%l
coef(m1orig)[1]
LSest(m1,L=l)
LSest(m1orig,L=c(1,0,0,0,0,0))

# the whole model transformed
(L = unname(rbind(l,cbind(0,diag(5))))) 
LSest(m1,L=L)  
summary(m1orig)$coef

### What is being tested here? 
m0 <- lm(salary.assoc ~ n.prof40 + n.assoc40 + n.assist40, data = Data)
anova(m0, m1);

### Let consider the contrast sum ("contr.sum:) for the varaible  'type'
m1B <- lm(salary.assoc ~ type +n.prof40 + n.assoc40 + n.assist40, data = Data, contrasts = list(type = "contr.sum"))
summary(m1B);

### QUESTION: Interpret the regression coefficients on the rows that start 
### with 'type1' and 'type2' 
C <- contr.sum(3);
rownames(C) <- levels(Data$type);
colnames(C) <- names(coef(m1B))[2:3];
C

### Compare the following two summary outputs 
summary(m1B);
summary(m1);

# compare the output below with the two summaries above
(L = cbind(1,C,0,0,0))
LSest(m1B,L=L)

## in simple (one-way) ANOVA, these regression coefficients were the same as
## the estimated group-specific means, and mean of group-specific means was the intercept.
## with additional covariates this no longer holds true unconditionally

(means = tapply(Data$salary.assoc, Data$type, mean))
### compare with the marginal model, where only type is taken into account
(coefs = lm(salary.assoc ~ type, data = Data)$coef)
### in the marginal model we indeed get that the regression coefficients are modeled
### by the estimated group-specific means 
cbind(1,contr.treatment(3))%*%coefs
  
### Still, the two models m1 and m1B are equivalent
(C2 = cbind(1,contr.treatment(3)))
(Cf = solve(C2)%*%L)
(Cf = unname(rbind(Cf,cbind(0,0,0,diag(3)))))
LSest(m1B,L=Cf)
## which is the same as the original output
summary(m1)$coef

### Basic diagnostic plots 
plotLM(m1)


### Slightly more complicated model?
summary(m2 <- lm(salary.assoc ~ type + n.prof40*n.assoc40 + n.assist40, data = Data)) 

### Based on the model m2 describe what is the effect of the numbers of professors 
### on the salary of associate professors?

## let's take, for instance, the conditional expectations for:
n.p = 10             # number of professors
n.a1 = 100*pi        # number of associate professors
n.a2 = -15           # number of assistant professors
ty = levels(Data$type)[2] # type of school
coef(m2)%*%c(1,1,0,0,n.a1,n.a2,0) + n.p*(coef(m2)[4]+n.a1*coef(m2)[7])
#
predict(m2,newdata=data.frame(type = ty, n.prof40 = n.p, n.assoc40 = n.a1, n.assist40 = n.a2))
## the relation is now more complex than in the additive model

### Compare the following two outputs 
### the only difference is the shift in covariates by 40
summary(m2)
summary(m2orig <- lm(salary.assoc ~ type + n.prof*n.assoc + n.assist, data = Data)) 

### models are still equivalent. For instance, coefficient with n.prof and m2orig 
m2$coef[4] - 40*m2$coef[7]
m2orig$coef[4]
# or the intercept of m2orig 
m2$coef %*% c(1,0,0,rep(-40,3),40^2)
m2orig$coef[1]

### Try to add the quadratic term of number of professors. 
summary(m3 <- lm(salary.assoc ~ type + n.prof40*n.assoc40 + I(n.prof40^2) + n.assist40, data = Data))

### Based on the model m3 describe what is the effect of the numbers of professors on the salary of associate professors? 

coef(m3)
#
n.p = 10
n.a1 = 100
n.a2 = -15
ty = levels(Data$type)[2]
#
coef(m3)%*%c(1,1,0,0,n.a1,0,n.a2,0) + n.p*(coef(m3)[4]+n.a1*coef(m3)[8]+ n.p*coef(m3)[6])
predict(m3,newdata=data.frame(type = ty, n.prof40 = n.p, n.assoc40 = n.a1, n.assist40 = n.a2))