### ### PURPOSE: Function to draw empirical CDF's together with a value of ### the Kolmogorov-Smirnov statistic ### ### PROGRAMMER: Arnost Komarek ### ### LOG: 20110415 created ### upravena verze 8.12.2017 ### ============================================================================ figKS <- function(y, group, lwd = 2, col = c("blue4", "red3")){ ## y ... observed values} ## group ... group indicator (factor with two levels) if (!is.factor(group)) group <- factor(group) LEV <- levels(group) if (length(LEV) != 2) stop("Only implemented for comparison of two groups.") if (length(col) == 1) col <- rep(col, 2) KSobj <- ks.test(y[group == LEV[1]], y[group == LEV[2]]) KS <- KSobj$stat KS.pval <- KSobj$p.value if (KS.pval < 0.001) KS.pval <- "P < 0.001" else KS.pval <- paste("P = ", format(round(KS.pval, 3), nsmall=3), sep="") EDF <- list(ecdf(y[group == LEV[1]]), ecdf(y[group == LEV[2]])) Knots <- c(knots(EDF[[1]]), knots(EDF[[2]])) Knots <- Knots[order(Knots)] Differences <- abs(EDF[[1]](Knots) - EDF[[2]](Knots)) KS.stat <- max(Differences) KS.knot <- Knots[which.max(Differences)] rval1 <- data.frame(Knot=Knots, Diff=Differences) rval2 <- c(KS.knot, KS.stat, EDF[[1]](KS.knot), EDF[[2]](KS.knot)) names(rval2) <- c("knot", "D", "F1", "F2") plot(EDF[[1]], verticals=TRUE, do.points=FALSE, xlim=range(y, na.rm=TRUE), col=col[1], lwd=lwd, main="", xlab="y", ylab="CDF") plot(EDF[[2]], verticals=TRUE, do.points=FALSE, col=col[2], lwd=lwd, add=TRUE) text(min(y, na.rm=TRUE), 0.9, labels = paste("D = ", format(round(rval2["D"], 3), nsmall=3), sep=""), pos=4) text(min(y, na.rm=TRUE), 0.85, labels = KS.pval, pos=4) lines(rep(rval2["knot"], 2), rval2[c("F1", "F2")], col="green4", lwd=lwd) abline(v=rval2["knot"],col="green4",lty=2) legend("bottomright", legend=LEV, lty=1, lwd=2, col=col[1:2], ) return(rval2) } #### funkce simuluj: simuluj <- function(n, m, scenar, alfa=0.05, nopak=1000){ # nopak ... pocet opakovani # n ... rozsah prvniho vyberu # m ... rozsah druheho vyberu phodnoty <- list(); phodnoty$ttest <- numeric(nopak); phodnoty$welch <- numeric(nopak); phodnoty$wilcox <- numeric(nopak); phodnoty$ks <- numeric(nopak); if(scenar=="normal-posun"){ generuj.x <- function(n) rnorm(n); generuj.y <- function(m) rnorm(m) + 1; } if(scenar=="normal-hetero"){ generuj.x <- function(n) rnorm(n); generuj.y <- function(m) sqrt(3)*rnorm(m); } if(scenar=="normal-exp"){ generuj.x <- function(n) rnorm(n); generuj.y <- function(m) rexp(m)-1; } if(scenar=="normal-exp2"){ generuj.x <- function(n) rnorm(n); generuj.y <- function(m) rexp(m)-qexp(0.5); } for(i in 1:nopak){ X <- generuj.x(n); # prvni vyber Y <- generuj.y(m); # druhy vyber phodnoty$ttest[i] <- t.test(X, Y, var.equal=TRUE)$p.value; phodnoty$welch[i] <- t.test(X, Y)$p.value; phodnoty$wilcox[i] <- wilcox.test(X, Y)$p.value; phodnoty$ks[i] <- ks.test(X, Y)$p.value; } # Odhad skutecne hladiny testu pri predepsane hladine 0.05 # vysl <- c(mean(phodnoty.ttest <= 0.05), mean(phodnoty.welch <= 0.05), # mean(phodnoty.wilcox <= 0.05), mean(phodnoty.ks <= 0.05)); fce <- function(x) mean(x <= alfa) vysl <- unlist(lapply(phodnoty, fce)); names(vysl) <- c("t-test", "welch-test", "wilcoxon", "K-S") return(vysl); }