# rm(list=ls()); # library(copula); # # Enable to use more cores of the processor library(doMC); # # Number of detected cores n.cores.det <- detectCores(); # # Number of cores to be used registerDoMC(cores=n.cores.det); # registerDoMC(cores=1); # Function to differentiate symbolically DD <- function(expr,name, order = 1) { if(order < 1) stop("'order' must be >= 1") if(order == 1) D(expr,name) else DD(D(expr, name), name, order - 1) } # # Formula like test.effect <- function(formula, data, given, B=99, family.assumed="frank", verbose=FALSE){ # # formula ... formula of the form cbind(Y1,Y2) ~ X1 + X2 + X1*X2 + I(X1^2) + X3, where # Y1 and Y2 are response variables already marginally adjusted for the effect of regressors # # data ... dataset with the variables Y1, Y2, X1, X2, X3, .... # # given ... the covariates which influence the dependence structure under the null hypothesis # e.g. given=~X1+X3 # # B .... number of permutations to be done for the permutation test # # verbose .... if set to TRUE more information will be printed (default is FALSE) # # family.assumed ... assumed parametric form of the dependence structure. At this # moment only the following family of copulas (with the given link functions g_R) are possible: # frank ... Frank copula with g_{R}(x) = x # clayton ... Clayton copula with g_{R}(x) = exp(x) # gumbel ... Gumbel copula with g_{R}(x) = exp(x) + 1 # # # Known issues: If given is not empty, then one cannot use 'poly' or 'bs' in the formula. cl <- match.call() mf <- match.call(expand.dots = FALSE) m <- match(c("formula", "data"), names(mf), 0L) mf <- mf[c(1L, m)] mf[[1L]] <- quote(stats::model.frame) mf <- eval(mf, parent.frame()) # mt <- attr(mf, "terms") # Response U <- model.response(mf, "numeric") if(ncol(U) != 2){ stop("The response vector U is not two-dimensional.") } xz <- model.matrix(mt, mf); # # # Used covariates used.covariates <- attributes(mt)$term.labels[is.element(attributes(mt)$term.labels, colnames(data))]; qqq.inter <- FALSE; # browser(); # Are there any Z? if(!missing(given)){ # browser(); if(length(attributes(given)) == 0){ stop("Argumet 'given' should be in the formula form, e.g. given=~X1.") } given <- attributes(terms(given))$term.labels; # getting conditioning regressors # Yes, there is Z. x.covariates <- setdiff(used.covariates, given) # browser(); if(length(x.covariates) == 0){ stop("Next to regressors used for conditioning (Z), there is not a remaining regressor for testing the effect.") } # Matching vectorial patter vec.grep <- function(pattern, x, ...){ iii <- c(); for(j in 1:length(pattern)){ iii <- c(iii, grep(pattern[j], x, ...)); } return(unique(iii)) } # # vec.grep(x.covariates, colnames(attributes(mt)$factors), value=T) z.regres <- setdiff(colnames(attributes(mt)$factors), vec.grep(x.covariates, colnames(attributes(mt)$factors), value=T)) if(length(z.regres)== 0){ cat(sep="\n\n"); print("Warning: There are no Z used in the formula for the dependence!"); cat(sep="\n\n"); X <- as.matrix(xz[,-1]); Z <- as.matrix(xz[,1]); }else{ Z <- as.matrix(xz[,-vec.grep(x.covariates, colnames(xz))]); indices.x <- vec.grep(given, colnames(xz)) X <- as.matrix(xz[,-c(1,indices.x)]); # # browser(); # # Handling possible interactions regressors <- colnames(attributes(mt)$factors) # Indices of interactions iii <- intersect(vec.grep(given, regressors), vec.grep(x.covariates, regressors)) # Are there interactions between X and Z? if(length(iii) > 0){ qqq.inter <- TRUE # inter.tab <- as.matrix(attributes(mt)$factors[,iii]); # browser(); inter.tabZ <- as.matrix(inter.tab[vec.grep(given, rownames(inter.tab)),]); inter.tabX <- as.matrix(inter.tab[vec.grep(x.covariates, rownames(inter.tab)),]); # # browser() Zint <- matrix(NA, ncol=ncol(inter.tabZ), nrow=n) Xint <- matrix(NA, ncol=ncol(inter.tabZ), nrow=n) # for(j in 1:ncol(inter.tabZ)){ Zint[,j] <- apply(as.matrix(Z[,match(rownames(inter.tabZ)[as.logical(inter.tabZ[,j])], colnames(Z))]), 1, prod) Xint[,j] <- apply(as.matrix(X[,match(rownames(inter.tabX)[as.logical(inter.tabX[,j])], colnames(X))]), 1, prod) } colnames(Zint) <- colnames(inter.tabZ) colnames(Xint) <- colnames(inter.tabZ); # }else{ # There are no interactions between X and Z. qqq.inter <- FALSE; } } }else{ # There are no Z X <- as.matrix(xz[,-1]); Z <- as.matrix(xz[,1]); } # Adding colnames which are not kept, if the dimension of X is 1 if(ncol(X) == 1){ colnames(X) <- colnames(xz)[2]; } # browser(); # # # # U1 <- U[,1]; U2 <- U[,2]; n <- length(U1); # dX <- ncol(X); dZ <- ncol(Z); # if(qqq.inter){ ZintX <- Zint * Xint; XZ <- cbind(X, ZintX); }else{ XZ <- X; } dXZ <- ncol(XZ); # browser(); if(n != nrow(X) | n != nrow(Z)){ stop("Lengths of vectors do not agree.") } if(family.assumed == "frank"){ log.dcopula.e <- expression(log((eta0)*(1-exp(-(eta0)))*exp(-(eta0)*(u+v))/((1-exp(-(eta0))) - (1 - exp(-(eta0)*u))*(1 - exp(-(eta0)*v)))^2)); link_inv <- function(x) x; # log.dcop <- function(u, theta) log(dCopula(copula=frankCopula(theta), u=u)); init.par <- rep(0.1, dZ); UPPER <- rep(20, dZ); LOWER <- rep(-20, dZ); } if(family.assumed == "clayton"){ log.dcopula.e <- expression(log((exp(eta0)+1)*(u*v)^(-exp(eta0)-1)*(u^(-exp(eta0))+v^(-exp(eta0))-1)^(-2-1/exp(eta0)))); link <- function(x) exp(x); link_inv <- function(x) log(x); # log.dcop <- function(u, theta) log(dCopula(copula=claytonCopula(theta), u=u)); init.par <- rep(0.1, dZ); UPPER <- rep(20, dZ); LOWER <- rep(-20, dZ); } if(family.assumed == "gumbel"){ log.dcopula.e <- expression(log(exp(-((-log(u))^(exp(eta0)+1) + (-log(v))^(exp(eta0)+1))^(1/(exp(eta0)+1))) * (u*v)^(-1) * ((-log(u))^(exp(eta0)+1) + (-log(v))^(exp(eta0)+1))^(-2+2/(exp(eta0)+1))*(log(u)*log(v))^((exp(eta0)+1)-1) * (1 + ((exp(eta0)+1)-1)*(((-log(u))^(exp(eta0)+1)) + (-log(v))^(exp(eta0)+1))^(-1/(exp(eta0)+1))))); link <- function(x) exp(x)+1; link_inv <- function(x) log(x-1); # log.dcop <- function(u, theta) log(dCopula(copula=gumbelCopula(theta), u=u)); init.par <- rep(0.1, dZ); UPPER <- rep(20, dZ); LOWER <- rep(-20, dZ); } loglik0 <- function(eta0, u, v) eval(log.dcopula.e); # loglik <- function(eta0, eta1, eta2, u, v, x) eval(log.dcopula.e); # loglik2 <- function(eta0, eta1, eta2, u, v, x) eval(log.dcopula.e2); # Expression for derivative of the assumed model density with respect to theta (x is dropped) skor.e0 <- DD(log.dcopula.e, "eta0"); skor.e0.u <- DD(skor.e0, "u"); skor.e0.v <- DD(skor.e0, "v"); # Assumed Fisher information I.e00 <- DD(skor.e0, "eta0"); # Derivative of the model with respect to theta (x is dropped) skor.theta <- function(eta0, u, v) eval(skor.e0); skor.theta.u <- function(eta0, u, v) eval(skor.e0.u); skor.theta.v <- function(eta0, u, v) eval(skor.e0.v); # skor2.v <- function(eta0, eta1, eta2, u, v, x) eval(skor.e2.v); I.theta <- function(eta0, u, v) (-1)*eval(I.e00); # # Calculation of the test statistic U1 <- rank(U1)/(n+1); U2 <- rank(U2)/(n+1); U <- cbind(U1, U2); UU1 <- outer(U1, U1, "<"); UU2 <- outer(U2, U2, "<"); UU12 <- UU1 * UU2; # browser(); # Test statistic calculation # log.dens <- function(theta) log.dcop(u=U, theta=theta); # fce <- function(theta) (-1)*sum(log.dcop(u=U, theta=theta)); fce <- function(eta) { ret <- (-1)*sum(loglik0(eta0=Z%*%eta, u=U[,1], v=U[,2])); qqq <- as.logical(abs(ret) > 1e10) | is.nan(ret); ret[qqq] <- 1e10; return(ret); } grad.fce <- function(eta) { ret <- (-1)*colSums(as.vector(skor.theta(eta0=Z%*%eta, u=U[,1], v=U[,2]))*Z); qqq <- as.logical(abs(ret) > 1e10) | is.nan(ret); ret[qqq] <- 1e10; return(ret); } # # optim.H0 <- optim(par=init.par, fce, lower=LOWER, upper=UPPER, method="L-BFGS-B", gr=grad.fce) # optim(par=init.par, fce, gr=grad.fce, lower=LOWER, upper=UPPER, method="L-BFGS-B") if(verbose){ print("Searching for the estimates of nuissance parameters:") print(optim.H0); cat(sep="\n\n") } # browser(); eta0.tilde <- optim.H0$par; if(sum(abs(eta0.tilde - UPPER) < 1e-6) | sum(abs(eta0.tilde - LOWER) < 1e-6)){ print(optim.H0); stop("The estimate of the nuissance parameter found on the border of the assumed parameteric space.") } # # browser(); nu0 <- Z%*%eta0.tilde skor.theta.n <- as.vector(skor.theta(nu0, U[,1], U[,2])); UiXZ <- skor.theta.n * XZ; UiZ <- skor.theta.n * Z; UA <- colSums(as.matrix(UiXZ)); if(verbose){ print("Score vector:"); print(UA); # browser(); cat(sep="\n\n") } # Ala-information matrices I.theta.n <- as.vector(I.theta(nu0, U[,1], U[,2])); I.theta.nZ <- I.theta.n*Z; IBB <- t(Z)%*%(I.theta.nZ); IAB <- t(XZ)%*%(I.theta.nZ); if(verbose){ print("Estimate of I_{\\alpha, \\psi}"); print(IAB); cat(sep="\n\n"); print("Estimate of I_{psi, psi}"); print(IBB); cat(sep="\n\n"); } # UU1 <- t(UU1 + diag(n)); UU2 <- t(UU2 + diag(n)); # skor.theta.u.n <- as.vector(skor.theta.u(nu0, U[,1], U[,2])); skor.theta.v.n <- as.vector(skor.theta.v(nu0, U[,1], U[,2])); # skor.theta.u.n.Z <- skor.theta.u.n * Z; skor.theta.v.n.Z <- skor.theta.v.n * Z; skor.theta.u.n.XZ <- skor.theta.u.n * XZ; skor.theta.v.n.XZ <- skor.theta.v.n * XZ; # V.uZ <- matrix(NA, nrow=n, ncol=dZ); V.vZ <- matrix(NA, nrow=n, ncol=dZ); V.uXZ <- matrix(NA, nrow=n, ncol=dXZ); V.vXZ <- matrix(NA, nrow=n, ncol=dXZ); # for(j in 1:dZ){ V.uZ[,j] <- 1/n*colSums(skor.theta.u.n.Z[,j] * UU1); V.vZ[,j] <- 1/n*colSums(skor.theta.v.n.Z[,j] * UU2); } for(j in 1:dXZ){ V.uXZ[,j] <- 1/n*colSums(skor.theta.u.n.XZ[,j] * UU1); V.vXZ[,j] <- 1/n*colSums(skor.theta.v.n.XZ[,j] * UU2); } # # browser(); # # Calculating Zi to estimate the variance UiV <- solve(IBB)%*%t(UiZ + V.uZ + V.vZ); Zi <- UiXZ + V.uXZ + V.vXZ - t(IAB%*%UiV); # # Test statistic RST2 <- sum(UA*solve(var(Zi), UA))/n; ret <- rep(NA, 2); ret[1] <- 1 - pchisq(RST2, df=dXZ); # print(var(Zi)) # browser(); # # Permutation version of the test if(B > 0){ RST2.perm <- foreach(l = 1:B, .combine="c", .inorder=FALSE, .verbose=FALSE) %dopar% { qqq <- sample(1:n); Xp <- X[qqq,]; # if(qqq.inter){ Xintp <- Xint[qqq,]; ZintXp <- Zint*Xintp; XpZ <- cbind(Xp, ZintXp); }else{ XpZ <- Xp; } # browser(); UiXZ <- skor.theta.n * XpZ; UA <- colSums(as.matrix(UiXZ)); # IAB <- t(XpZ)%*%(I.theta.nZ); # skor.theta.u.n.XZ <- as.matrix(skor.theta.u.n * XpZ); skor.theta.v.n.XZ <- as.matrix(skor.theta.v.n * XpZ); # for(j in 1:dXZ){ V.uXZ[,j] <- 1/n*colSums(skor.theta.u.n.XZ[,j] * UU1); V.vXZ[,j] <- 1/n*colSums(skor.theta.v.n.XZ[,j] * UU2); } # Zi <- UiXZ + V.uXZ + V.vXZ - t(IAB%*%UiV); sum(UA*solve(var(Zi), UA))/n; } # ret[2] <- 1/(B+1) * (1 + sum(RST2.perm >= RST2)); } # tisk <- function(){ print("------------------------------------------------") print(date()); print("Formula: ") print(formula); cat(sep="\n\n") # if(ncol(Z) > 1){ print("Testing for a partial effect of a covariate X given Z.") cat(sep="\n\n") print("Terms corresponding to the fixed covariate Z : "); print(colnames(Z)); print("Terms corresponding to the covariate X : "); print(colnames(X)); # browser(); if(qqq.inter){ print("Terms corresponding to the interactions between X and Z : "); print(colnames(xz)[iii+1]); } if(ncol(xz) - ncol(X) - ncol(Z) - length(iii) > 0){ print("Warning: Some of the terms in the formula are not included in the test.") } # browser(); }else{ print("Testing for any effect of X (Z is not given).") print("Terms corresponding to the covariate X : "); print(colnames(X)); if(ncol(xz) - ncol(X) - ncol(Z) > 0){ print("Warning: Some of the terms in the formula are not included in the test.") } } cat(sep="\n\n") # print(paste("Sample size :", n)); print(paste("Number of permutations :", B)); print(paste("Assumed family :", family.assumed)); print(paste("Value of test statistic :", signif(RST2,3))); print(paste("Degrees of freedom :", dXZ)); print(paste("P-value (asymptotic) :", signif(ret[1], 4))); print(paste("P-value (permutational) :", ret[2])); # print("----------------------------------------------") } tisk(); return(ret); } # # # # Some examples n <- 400; # # library(copula); # Y <- rCopula(n, frankCopula(8)); # Y1 <- Y[,1]; # Y2 <- Y[,2]; # # # Independence copula # Y1 <- runif(n); # Y2 <- runif(n); # # # Bivariate normal with rho=0.5 # Y1 <- rnorm(n); # Y2 <- sqrt((1-0.5^2))*rnorm(n) + 0.5*Y1; X1 <- runif(n)-0.5; X2 <- runif(n)-0.5; X3 <- runif(n)-0.5; X4 <- runif(n)-0.5; # # Bivariate normal Y1 <- rnorm(n); Y2 <- sqrt((1-X1^2))*rnorm(n) + abs(X1)*Y1; DATA <- data.frame(Y1, Y2, X1, X2, X3, X4) set.seed(200) # test.effect(cbind(Y1, Y2)~X1, data=DATA, B=999, family.assumed="frank", verbose=T) test.effect(cbind(Y1, Y2)~X1, given=X2, data=DATA, B=999, family.assumed="frank", verbose=T) # test.effect(cbind(Y1, Y2)~poly(X1,2), data=DATA, B=999, family.assumed="frank", verbose=T) # test.effect(cbind(Y1, Y2)~poly(X1,2), data=DATA, B=999, family.assumed="clayton", verbose=T) # test.effect(cbind(Y1, Y2)~poly(X1,2), data=DATA, B=999, family.assumed="gumbel", verbose=T) # # Warning: Using funciton poly might not work properly # test.effect(cbind(Y1, Y2)~poly(X1,2)+X2, given=~X2, data=DATA, B=99, family.assumed="frank", verbose=T) # does not work properly # test.effect(cbind(Y1, Y2)~X1, given=~X2, data=DATA, B=99, family.assumed="frank", verbose=T) # test.effect(cbind(Y1, Y2)~poly(X1,2), given=~poly(X2,3), data=DATA, B=99, family.assumed="frank", verbose=T) # test.effect(cbind(Y1, Y2)~ X2 + I(X2^2) + X1*X2*X3 + X1 + I(X1^2) + X4 + X3*X4, data=DATA, B=99, family.assumed="frank", verbose=T) # test.effect(cbind(Y1, Y2)~ X2 + I(X2^2) + X1*X2*X3 + X1 +I(X1^2) + X4+ X3*X4, given=~X1+X3, data=DATA, B=0, family.assumed="frank", verbose=T) # test.effect(cbind(Y1, Y2)~ X2 + I(X2^2) + X1*X2*X3 + X1 +I(X1^2) + X4+ X3*X4, given=~X1+X3, data=DATA, B=999, family.assumed="frank", verbose=T)