#####
##### NMST440: Advanced Aspects of the R Environment
#####
##### ---------------------------------------------------------------------
#####
##### Writing a function to replicate
##### many types one routine analysis with different variables
#####
##### ---------------------------------------------------------------------
#####
##### Arnošt Komárek
##### https://www2.karlin.mff.cuni.cz/~komarek
##### komarek@karlin.mff.cuni.cz
#####
##### ======================================================================
rm(list = ls())
ROOT <- "/home/komarek/teach/mff_2021/nmst440_AdvRko/Tutorial05/"
setwd(ROOT)
### Load illustrative dataset
### (subset of a dataset used also for Project 1 at Advanced Regression exercise class).
### It is assumed that the data file nelsNE.RData is stored
### in a subdirectory 'Data' of the working directory (variable ROOT above).
### ----------------------------------------------------------------
print(load("./Data/nelsNE2.RData"))
dim(nelsNE2)
head(nelsNE2)
### Brief description of variables
print(varlabels2)
### Our ultimate task is to repeat (with different variables)
### the following type of analyzis and provide results
### in a form of (many) nice tables to be included
### in a LaTeX document
### ----------------------------------------------------
### $$$$$ ANALYZIS $$$$$ ###
DataNoNA <- na.omit(nelsNE2[, c("f2.sco.math", "fam.comp")])
dim(DataNoNA)
(N <- with(DataNoNA, table(fam.comp)))
#
(grMn <- with(DataNoNA, mean(f2.sco.math)))
(grSD <- with(DataNoNA, sd(f2.sco.math)))
(grSE <- grSD / sqrt(sum(N)))
(grMed <- with(DataNoNA, median(f2.sco.math)))
(grQs <- with(DataNoNA, quantile(f2.sco.math, prob = c(0.25, 0.75))))
#
(Mn <- with(DataNoNA, tapply(f2.sco.math, fam.comp, mean)))
(SD <- with(DataNoNA, tapply(f2.sco.math, fam.comp, sd)))
(SE <- SD / sqrt(N))
(Med <- with(DataNoNA, tapply(f2.sco.math, fam.comp, median)))
(Qs <- with(DataNoNA, tapply(f2.sco.math, fam.comp, quantile, prob = c(0.25, 0.75))))
#
t.test(f2.sco.math ~ fam.comp, data = DataNoNA)
### $$$$$ END OF ANALYZIS $$$$$ ###
### Tables with a simpler structure
### formatted in LaTeX or html syntax can easily be created
### using the R package xtable
### ------------------------------------------------------------
library("xtable")
### For inclusion in reports, it is useful to format
### reported numbers
### --------------------------------------------------
format(round(4.56565, 2), nsmall = 2)
### Small functions to format numbers, p-values and confidence intervals
### for use in LaTeX tables
### --------------------------------------------------------------------
###
### Check the file formatOut.R. It contains code of three small functions
### designed to format (i) P-values, (ii) numbers, (iii) confidence intervals
### for use in a LaTeX code. Formatted numbers and confidence intervals
### appear in bold if the P-value is smaller than 0.05.
### Note that all backslashes from the LaTeX code must be doubled in the R code.
###
source(paste(ROOT, "formatOut.R", sep = ""))
### Check the code and try to understand it.
print(formatPval)
print(formatNum)
print(formatCI)
### Usage of formatPval
pvals <- c(0.0008, 0.0239, 0.0512)
pv1 <- formatPval(pvals, bold = FALSE)
print(pv1) ## it is a vector of characters
cat(pv1, "\n") ## cat "evaluates" the character vector, "\n" provides "Enter" to go to the next line
## --> useful to let R write the LaTeX code, see below
paste(pv1, collapse = " & ") ## char. vector -> single character with & placed between elements of the original vector
cat(paste(pv1, collapse = " & "), "\\\\ \n") ## basically a line in the LaTeX table
### one more illustration showing creation of a row in the LaTeX table
pv2 <- formatPval(pvals, bold = TRUE)
print(pv2)
cat(paste(pv2, collapse = " & "), "\\\\\n")
### and even one more illustration showing creation of a row in the LaTeX table
estims <- c(-1.2334, 0.5678, 1e-7)
fest1 <- formatNum(estims, digits = 2)
print(fest1)
cat(paste(fest1, collapse = " & "), "\\\\\n")
### one more...
fest2 <- formatNum(estims, pvals, digits = 2)
print(fest2)
cat(paste(fest2, collapse = " & "), "\\\\\n")
### and one more...
paste(fest2, ", P: ", pv2, sep = "")
cat(paste(paste(fest2, ", P: ", pv2, sep = ""), collapse = " & "), "\\\\\n")
### formatted confidence intervals
formatCI(c(-0.2909, 0.8656), pval = 0.15, digits = 2)
formatCI(c(-0.2909, -0.8656), pval = 0.01, digits = 2)
### --------------------------------------------------------------------------------
### Now let's go back to our main task which is to repeat analyzis shown
### at the beginning of this script for different combinations of two variables
### and report its results in a form of "nice" tables included in a "nice" document.
### Namely, we want to repeat it for 16 combinations of variables
### "sco.math", "sco.sci", "sco.soc", "sco.read" (y-variables) and
### "fam.comp", "gender", "f2.menrol", "f2.arrest" (x-variables)
### --------------------------------------------------------------------------------
### SOLUTION 1:
### Copy-paste 16-times code between ### $$$$$ ANALYZIS $$$$$ ###
### and ### $$$$$ END OF ANALYZIS $$$$$ ###
### and then manually replace original "f2.sco.math" and "fam.comp"
### by 16 requested combinations. Run it and manually copy-paste results into
### the final document (being prepared by LaTeX, MS Word, ...)
###
### --> This was a joke...
### SOLUTION 2:
### Write a function which, for given combination of variables (arguments of the function)
### (a) performs calculations, (b) creates the LaTeX code of a "nice" table
### filled by (formatted) calculated numbers.
###
### Such function is available in the 'funTabDescr.R' script.
### Good ideas to follow (my recommendations...) when writing such a function are
### (1) think about arguments of the function (items that you may need to change when
### calling the function later on),
### (2) create (standard) variables in the R global environment (.GlobalEnv in search())
### named in the same way as function arguments, i.e., as if you do everything without
### using the function. Assign to those variables some combination of possible values.
### You will do this if you uncomment eight rows below the ## TESTING row
### in the funTabSecr.R script.
### (3) start writing the body of the function as if you do a single analyzis
### (where later, the function arguments are set to values from point (2)). At the same time,
### after each row of the code/sequence of few rows, check manually whether the code
### really does what you intend.
### (4) at the very end, place something like funTabDescr <- function(...){ before the developed code
### and } at the end of the developed code.
### ---------------------------------
### Some more remarks regarding "local" and "global" variables in R. If you, within the function call,
### ask for a value of a variable named 'x', R will first look for it within the function itself (local 'x').
### If local 'x' exists it is used, if not, R looks for it in the global environment. If not found here
### R further searches for it in other environments found after running search() command
search()
### After .GlobalEnv, you usually find a list package:XX, package::YY etc. (plus few other things)
### that corresponds to attached packages. Consequently, if there exist 'local' x and also 'global' x,
### the local one is used (this is the same as in most other programming languages). Nevertheless,
### if no 'local' x exists, but 'global' one does, the 'global' x is automatically used
### (this is different from, e.g., PASCAL, in C/C++ global variables to be used locally
### must be explicitly declared etc.).
###
### It can be noted that the same strategy is also used if you ask for, let's say,
### function named "t.test". If it is defined within your other function (locally, this local version is used),
### if it is created by you in the .GlobalEnv, this version is used. For the rest, the first
### found instance of 't.test' on the list from search() is used.
###
### Main consequence of the above behavior when writing functions that just repeat some piece
### of code with only few items that vary is that (in contrast to most low level languages like PASCAL),
### you do not have to include all values that the function needs among its arguments.
### Only those items that are to change are sufficient as function arguments. All other
### needed values are automatically taken from the global environment.
### Your task now is to go manually (after you uncomment code below ## TESTING)
### through the code in funTabDescr.R.
### Next to calculation of needed numbers
### (code between ### $$$$$ ANALYZIS $$$$$ ### and
### ### $$$$$ END OF ANALYZIS $$$$$ ###),
### it also shows how to let R write the LaTeX code of
### a table that involves calculated numbers.
### --------------------------------------------------------
### Now, once you understand the code inside the funTabDesrc.R
### function, we can use it. Do not forget to comment again
### the rows below ## TESTING at the beginning of the script.
### Function to create (nice) table with requested results
### -------------------------------------------------------
source(paste(ROOT, "funTabDescr.R", sep = ""))
### Use it for one combination of x and y variables
tab01 <- funTabDescr("f2.sco.math", "fam.comp", yLab = "Score in mathematics", xLab = "Family composition", data = nelsNE2, digits = 2)
print(tab01)
print(tab01[["tab"]])
cat(tab01[["tex"]])
### Now, to include it in the LaTeX document,
### you can either Copy-Paste it from the R console to the LaTeX code
### or you can store the table LaTeX code in a separate file and then
### use the \input{} command in LaTeX to place it (without manual Copy-Paste)
### in the LaTeX document.
### To do so, R function sink() which redirects all output to some file can be used.
### The code below assumes that a subdirectory 'Tabs' exists in the working directory (getwd()).
### LaTeX file tab01.tex is created within this directory.
sink(paste(ROOT, "/Tabs/tab01.tex", sep = "")) ### redirect the R output into file
cat(tab01[["tex"]])
sink() ### direct the R output back to R console
### Use function many times
### -------------------------
### Below, the for loop is used to call the funTabDescr function
### for all combinations of requested x- and y-variables.
### LaTeX tables are stored again in the 'Tabs' subdirectory of the working directory,
### file names are derived from the variable names.
yVars <- c("sco.math", "sco.sci", "sco.soc", "sco.read")
xVars <- c("fam.comp", "gender", "f2.menrol", "f2.arrest")
yLabs <- paste(c("Math", "Science", "Social science", "Reading"), "score")
names(yLabs) <- yVars
print(yLabs)
xLabs <- c("Family composition", "Gender", "Math enrollment past 2 years", "Arrested")
names(xLabs) <- xVars
print(xLabs)
for (yv in yVars){
for (xv in xVars){
tab <- funTabDescr(paste("f2.", yv, sep = ""), xv, yLab = yLabs[yv], xLab = xLabs[xv], data = nelsNE2, digits = 2)
sink(paste(ROOT, "/Tabs/tab_", yv, "_", xv, ".tex", sep = ""))
cat(tab[["tex"]])
sink()
}
}
### -----------------------------------------------------------
### And now, explore supplied 'sampleReport.tex' to see
### how to include created tables in the final document.