# Complete case analysis binormal.CCA <- function(Xo, alpha=0.05){ X.cc <- na.omit(Xo); # complete cases Sigma <- var(X.cc); mu <- apply(X.cc, 2, mean); n0 <- nrow(X.cc); kvant <- qt(1-alpha/2, df=n0-1) IS <- matrix(mu, 2, 2) + matrix(c(-1,1), 2, 2, byrow=T)*kvant*sqrt(diag(Sigma))/sqrt(n0); colnames(IS) <- c("lower", "upper") # browser(); res <- list(muhat=cbind(mu, IS), Sigma2hat=Sigma); return(res); } # Available cases analysis binormal.ACA <- function(Xo, alpha=0.05){ nr <- colSums(!is.na(Xo)); (Sigma <- var(Xo, na.rm = TRUE, use="pairwise.complete")); (mu <- apply(Xo, 2, mean, na.rm=TRUE)); kvant <- qt(1-alpha/2, df=nr-1) IS <- matrix(mu, 2, 2) + matrix(c(-1,1), 2, 2, byrow=T)*matrix(kvant, 2, 2)*sqrt(diag(Sigma_CCA))/sqrt(nr); colnames(IS) <- c("lower", "upper") # browser(); res <- list(muhat=cbind(mu, IS), Sigma2hat=Sigma); return(res); } # Simple imputation via the estimate of the conditional expectation binormal.imp <- function(Xo, alpha=0.05){ fit.12 <- lm(Xo[,1]~Xo[,2]); X1hat <- predict(fit.12, data.frame(Xo[,2])); fit.21 <- lm(Xo[,2]~Xo[,1]); X2hat <- predict(fit.21, data.frame(Xo[,1])); # Filling data with imputed values X.imp <- Xo; X.imp[is.na(X.imp[,1]),1]<- X1hat[is.na(X.imp[,1])]; X.imp[is.na(X.imp[,2]),2]<- X2hat[is.na(X.imp[,2])]; (mu <- apply(X.imp, 2, mean)); (Sigma <- var(X.imp)); # browser(); res <- list(muhat=mu, Sigma2hat=Sigma); return(res); } # Multiple imputation via the estimate of the conditional expectation binormal.imp.mult <- function(Xo, alpha=0.05, M=20){ fit.12 <- lm(Xo[,1]~Xo[,2]); X1hat <- predict(fit.12, data.frame(Xo[,2])); fit.21 <- lm(Xo[,2]~Xo[,1]); X2hat <- predict(fit.21, data.frame(Xo[,1])); n0 <- nrow(na.omit(Xo)); # number of observed pairs # Filling data with imputed values sigma.12 <- sqrt(sum(fit.12$residuals^2)/(n0-2)); sigma.21 <- sqrt(sum(fit.21$residuals^2)/(n0-2)); # Filling data with imputed values na.X <- sum(is.na(Xo[,1])); na.Y <- sum(is.na(Xo[,2])); M <- 20; # Number of imputations mu.imp.M <- matrix(NA, nrow=M, ncol=2); Sigma.imp.M <- matrix(NA, nrow=M, ncol=4); for(i in 1:M){ X.imp <- Xo; X.imp[is.na(X.imp[,1]),1] <- X1hat[is.na(X.imp[,1])] + sigma.12 * rnorm(na.X); X.imp[is.na(X.imp[,2]),2] <- X2hat[is.na(X.imp[,2])] + sigma.21 * rnorm(na.Y); mu.imp.M[i,] <- apply(X.imp, 2, mean); Sigma.imp.M[i,] <- as.vector(var(X.imp)); } (mu <- colMeans(mu.imp.M)); (Sigma <- matrix(colMeans(Sigma.imp.M), 2, 2)); # Q: What can be said about mu_imp_mult in comparison with mu_imp ? # # Confidence interval construction # Variance estimation of mu_imp_mult VM <- colMeans(Sigma.imp.M[,c(1,4)])/n; # Estimation of expectation of conditional variance BM <- diag(var(mu.imp.M)); # Estimation of variance of conditional expectation var.mult.imp <- VM + BM; IS <- matrix(mu, 2, 2) + matrix(c(-1,1), 2, 2, byrow=T) * matrix(qt(0.975, df=nr-1), 2, 2) * sqrt(var.mult.imp); colnames(IS) <- c("lower", "upper") # browser(); res <- list(muhat=cbind(mu, IS), Sigma2hat=Sigma); return(res); } # # linear model with only Y missing -- coincides with CCA simply using lm() function linear.EM1 <- function(X, Y, print.iter=FALSE){ qqq <- is.na(Y); # missing values at the first compoenent # Number of missing values nm <- sum(qqq); n0 <- length(Y) - nm; # Initial estimates fit <- lm(Y~X); XX <- model.matrix(fit); # bhat <- fit$coef; n.iter <- 10; # n0 <- nrow(X.cc); n1 <- n - n0; loglik <- function(bhat){ sum((Y - XX%*%bhat)^2) } bhat <- fit$coef; loglik.old <-loglik(bhat); Yimp <- Y; for(i in 1:n.iter){ Yhat <- XX%*%bhat; # predicted Y # Yhat2 <- sum(fit$residuals^2)/(n0) - Yhat^2; # predicted Y^2 Yimp[qqq] <- Yhat[qqq]; fit <- lm(Yimp~X); loglik.new <- loglik(bhat); bhat <- fit$coef; if(print.iter) print(c(i, loglik.new)); # # # Stopping rule # if(abs(loglik.new-loglik.old) < 1e-6){ # break; # }else{ # loglik.old <- loglik.new; # } } res <- list(bhat=bhat, loglik=loglik.new); return(res); } binormal.EM <- function(Xo, print.iter=FALSE){ qqq.X1 <- is.na(Xo[,1]); # missing values at the first compoenent qqq.X2 <- is.na(Xo[,2]); # missing values at the second compoenent qqq.oX1 <- as.logical((!qqq.X1)*(qqq.X2)) # only X1 is observed qqq.oX2 <- as.logical((!qqq.X2)*(qqq.X1)) # only X2 is observed # Number of missing values nm.X1 <- sum(qqq.X1); nm.X2 <- sum(qqq.X2); # Initial estimates X.cc <- na.omit(Xo); # complete cases Sigma_CCA <- var(X.cc); mu_CCA <- apply(X.cc, 2, mean); # 5. EM-algorithm mu_EM <- mu_CCA; Sigma_EM <- Sigma_CCA; n.iter <- 100; n0 <- nrow(X.cc); n1 <- n - n0 - nm.X1 n2 <- n - n0 - nm.X2 loglik <- function(mu, Sigma){ Mu_hat <- matrix(mu, n0, 2, byrow=TRUE); # Contribution of complete cases l0 <- -n0*log(det(Sigma)) - sum(diag((X.cc-Mu_hat)%*%solve(Sigma)%*%t((X.cc-Mu_hat)))); # Contribution when only X1 is observed l1 <- -n1*log(Sigma[1,1]) - sum((Xo[qqq.oX1,1] - mu[1])^2)/Sigma[1,1] # Contribution when only X2 is observed l2 <- -n2*log(Sigma[1,1]) - sum((Xo[qqq.oX2,2] - mu[2])^2)/Sigma[2,2]; l0+l1+l2 } loglik.old <-loglik(mu_EM, Sigma_EM); for(i in 1:n.iter){ beta <- Sigma_EM[1,2]/c(Sigma_EM[2,2], Sigma_EM[1,1]); Yhat <- mu_EM[2] + beta[2]*(Xo[,1] - mu_EM[1]); # predicted Y Xhat <- mu_EM[1] + beta[1]*(Xo[,2] - mu_EM[2]); # predicted X rho <- (Sigma_EM[1,2])/sqrt(Sigma_EM[1,1]*Sigma_EM[2,2]); # correlation coefficient Yhat2 <- Sigma_EM[2,2]*(1 - rho^2) + Yhat^2; # predicted Y^2 Xhat2 <- Sigma_EM[1,1]*(1 - rho^2) + Xhat^2; # predicted X^2 XX <- Xo; XX[qqq.X1,1] <- Xhat[qqq.X1]; XX[qqq.X2,2] <- Yhat[qqq.X2]; mu_EM <- colMeans(XX); XX2 <- Xo^2; XX2[qqq.X1,1] <- Xhat2[qqq.X1]; XX2[qqq.X2,2] <- Yhat2[qqq.X2]; diag(Sigma_EM) <- colMeans(XX2) - mu_EM^2; Sigma_EM[1,2] <- mean(XX[,1]*XX[,2]) - mu_EM[1] * mu_EM[2]; Sigma_EM[2,1] <- Sigma_EM[1,2]; loglik.new <- loglik(mu_EM, Sigma_EM); if(print.iter) print(c(i, loglik.new)); # # Stopping rule if(abs(loglik.new-loglik.old) < 1e-6){ break; }else{ loglik.old <- loglik.new; } } res <- list(muhat=mu_EM, Sigma2hat=Sigma_EM, loglik=loglik.new); return(res); } binormal.analysis <- function(Xo){ qqq.X1 <- is.na(Xo[,1]); # missing values at the first compoenent qqq.X2 <- is.na(Xo[,2]); # missing values at the second compoenent qqq.oX1 <- as.logical((!qqq.X1)*(qqq.X2)) # only X1 is observed qqq.oX2 <- as.logical((!qqq.X2)*(qqq.X1)) # only X2 is observed # Number of missing values nm.X1 <- sum(qqq.X1); nm.X2 <- sum(qqq.X2); # 1. Complete case analysis X.cc <- na.omit(Xo); # complete cases Sigma_CCA <- var(X.cc); mu_CCA <- apply(X.cc, 2, mean); # 2. Available case analysis (Sigma_AC <- var(Xo, na.rm = TRUE, use="pairwise.complete")); (mu_AC <- apply(Xo, 2, mean, na.rm=TRUE)); # 3. Imputation (simple) fit.12 <- lm(Xo[,1]~Xo[,2]); X1hat <- predict(fit.12, data.frame(Xo[,2])); fit.21 <- lm(Xo[,2]~Xo[,1]); X2hat <- predict(fit.21, data.frame(Xo[,1])); # Filling data with imputed values X.imp <- Xo; X.imp[is.na(X.imp[,1]),1]<- X1hat[is.na(X.imp[,1])]; X.imp[is.na(X.imp[,2]),2]<- X2hat[is.na(X.imp[,2])]; Sigma_imp <- var(X.imp); mu_imp <- apply(X.imp, 2, mean); # 5. Multiple imputation sigma.12 <- sqrt(sum(fit.12$residuals^2)/(n-2)); sigma.21 <- sqrt(sum(fit.21$residuals^2)/(n-2)); # Filling data with imputed values na.X <- sum(is.na(Xo[,1])); na.Y <- sum(is.na(Xo[,2])); M <- 20; # Number of imputations mu.imp.M <- matrix(NA, nrow=M, ncol=2); Sigma.imp.M <- matrix(NA, nrow=M, ncol=4); for(i in 1:M){ X.imp <- Xo; X.imp[is.na(X.imp[,1]),1] <- X1hat[is.na(X.imp[,1])] + sigma.12 * rnorm(na.X); X.imp[is.na(X.imp[,2]),2] <- X2hat[is.na(X.imp[,2])] + sigma.21 * rnorm(na.Y); mu.imp.M[i,] <- apply(X.imp, 2, mean); Sigma.imp.M[i,] <- as.vector(var(X.imp)); } mu_imp_mult <- colMeans(mu.imp.M); Sigma_imp_mult <- matrix(colMeans(Sigma.imp.M), 2, 2); # 5. EM-algorithm # mu_EM <- mu_AC; # Sigma_EM <- Sigma_AC; mu_EM <- mu_imp; Sigma_EM <- Sigma_imp; n.iter <- 100; n0 <- nrow(X.cc); n1 <- n - n0 - nm.X1 n2 <- n - n0 - nm.X2 loglik <- function(mu, Sigma){ Mu_hat <- matrix(mu, n0, 2, byrow=TRUE); # Contribution of complete cases l0 <- -n0*log(det(Sigma)) - sum(diag((X.cc-Mu_hat)%*%solve(Sigma)%*%t((X.cc-Mu_hat)))); # Contribution when only X1 is observed l1 <- -n1*log(Sigma[1,1]) - sum((Xo[qqq.oX1,1] - mu[1])^2)/Sigma[1,1] # Contribution when only X2 is observed l2 <- -n2*log(Sigma[1,1]) - sum((Xo[qqq.oX2,2] - mu[2])^2)/Sigma[2,2]; l0+l1+l2 } loglik.old <-loglik(mu_EM, Sigma_EM); for(i in 1:n.iter){ beta <- Sigma_EM[1,2]/c(Sigma_EM[2,2], Sigma_EM[1,1]); Yhat <- mu_EM[2] + beta[2]*(Xo[,1] - mu_EM[1]); # predicted Y Xhat <- mu_EM[1] + beta[1]*(Xo[,2] - mu_EM[2]); # predicted X rho <- (Sigma_EM[1,2])/sqrt(Sigma_EM[1,1]*Sigma_EM[2,2]); # correlation coefficient Yhat2 <- Sigma_EM[2,2]*(1 - rho^2) + Yhat^2; # predicted Y^2 Xhat2 <- Sigma_EM[1,1]*(1 - rho^2) + Xhat^2; # predicted X^2 XX <- Xo; XX[qqq.X1,1] <- Xhat[qqq.X1]; XX[qqq.X2,2] <- Yhat[qqq.X2]; mu_EM <- colMeans(XX); XX2 <- Xo^2; XX2[qqq.X1,1] <- Xhat2[qqq.X1]; XX2[qqq.X2,2] <- Yhat2[qqq.X2]; diag(Sigma_EM) <- colMeans(XX2) - mu_EM^2; Sigma_EM[1,2] <- mean(XX[,1]*XX[,2]) - mu_EM[1] * mu_EM[2]; Sigma_EM[2,1] <- Sigma_EM[1,2]; loglik.new <- loglik(mu_EM, Sigma_EM); print(c(i, loglik.new)); # # Stopping rule if(abs(loglik.new-loglik.old) < 1e-6){ break; }else{ loglik.old <- loglik.new; } } res <- list(); res[[1]] <- list(muhat=mu_CCA, Sigma2hat=Sigma_CCA); res[[2]] <- list(muhat=mu_AC, Sigma2hat=Sigma_AC); res[[3]] <- list(muhat=mu_imp, Sigma2hat=Sigma_imp); res[[4]] <- list(muhat=mu_imp_mult, Sigma2hat=Sigma_imp_mult); res[[5]] <- list(muhat=mu_EM, Sigma2hat=Sigma_EM, loglik=loglik.new); names(res) <- c("CCA", "AC", "Imp", "Mult.imp.", "EM"); return(res); } EM2 <- function(Xo){ qqq.X1 <- is.na(Xo[,1]); # missing values at first compoenent qqq.X2 <- is.na(Xo[,2]); # missing values at first compoenent qqq.oX1 <- as.logical((!qqq.X1)*(qqq.X2)) # only X1 is observed qqq.oX2 <- as.logical((!qqq.X2)*(qqq.X1)) # only X2 is observed # Number of missing values nm.X1 <- sum(qqq.X1); nm.X2 <- sum(qqq.X2); # Initial estimates X.cc <- na.omit(Xo); # complete cases Sigma_EM <- var(X.cc); mu_EM <- mean(X.cc); n.iter <- 100; n0 <- nrow(X.cc); n1 <- n - n0 - nm.X1 n2 <- n - n0 - nm.X2 loglik <- function(mu, Sigma){ Mu_hat <- matrix(mu, n0, 2, byrow=TRUE); # browser(); # Contribution of complete cases l0 <- -n0*log(det(Sigma)) - sum(diag((X.cc-Mu_hat)%*%solve(Sigma)%*%t((X.cc-Mu_hat)))); # Contribution when only X1 is observed l1 <- -n1*log(Sigma[1,1]) - sum((Xo[qqq.oX1,1] - mu)^2)/Sigma[1,1] # Contribution when only X2 is observed l2 <- -n2*log(Sigma[1,1]) - sum((Xo[qqq.oX2,2] - mu)^2)/Sigma[2,2]; l0+l1+l2 } # browser(); loglik.old <- loglik(mu_EM, Sigma_EM); for(i in 1:n.iter){ beta <- Sigma_EM[1,2]/c(Sigma_EM[2,2], Sigma_EM[1,1]); Yhat <- mu_EM + beta[2]*(Xo[,1] - mu_EM); # predicted Y Xhat <- mu_EM + beta[1]*(Xo[,2] - mu_EM); # predicted X rho <- (Sigma_EM[1,2])/sqrt(Sigma_EM[1,1]*Sigma_EM[2,2]); # correlation coefficient Yhat2 <- Sigma_EM[2,2]*(1 - rho^2) + Yhat^2; # predicted Y^2 Xhat2 <- Sigma_EM[1,1]*(1 - rho^2) + Xhat^2; # predicted X^2 XX <- Xo; XX[qqq.X1,1] <- Xhat[qqq.X1]; XX[qqq.X2,2] <- Yhat[qqq.X2]; mu_EM <- mean(XX); XX2 <- Xo^2; XX2[qqq.X1,1] <- Xhat2[qqq.X1]; XX2[qqq.X2,2] <- Yhat2[qqq.X2]; diag(Sigma_EM) <- colMeans(XX2) - mu_EM^2; Sigma_EM[1,2] <- mean(XX[,1]*XX[,2]) - mu_EM * mu_EM; Sigma_EM[2,1] <- Sigma_EM[1,2]; loglik.new <- loglik(mu_EM, Sigma_EM); # print(c(i, mu_EM, as.numeric(Sigma_EM), loglik.new)); # # Stopping rule if(abs(loglik.new-loglik.old) < 1e-6){ break; }else{ loglik.old <- loglik.new; } } res <- list(muhat=mu_CCA, Sigma2hat=Sigma_CCA, loglik=loglik.new); # names(res) <- c("CCA", "AC", "Imp", "Mult.imp.", "EM"); return(res); } # print(vysl <- EM2(Xo)); # mlest(Xo)