Solution to Task 1 of Exercise 3
Back to course page
load(url('http://www.karlin.mff.cuni.cz/~pesta/NMST531/data/km_all.RData'))
all<- all[order(all$time,-all$delta),]
d <-sum(all$delta == 1 & !duplicated(all$time))
d1 <- sum(all$delta == 1 & all$type ==1 & !duplicated(all$time))
n <-length(all$time)
n1 <- sum(all$type==1)
dt <- table(all$time[all$delta==1], all$type[all$delta==1])
# columns for the output table
dj <- dt[,1]+dt[,2]
d1j <- dt[,1]
tj <- as.numeric(rownames(dt))
yj <- rev(1:n)[all$delta==1 & !duplicated(all$time)]
y1j <- c(); for(i in 1:d) y1j <- c(y1j,sum(all$time>=tj[i] & all$type==1))
data <- as.data.frame(cbind(tj, d1j, dj, y1j, yj), row.names=1:d)
data$ej <- dj * y1j/yj
data$rozd <- d1j - data$ej
data
## tj d1j dj y1j yj ej rozd
## 1 0.030 1 1 50 101 0.4950495 0.50495050
## 2 0.493 1 1 49 100 0.4900000 0.51000000
## 3 0.658 0 1 48 99 0.4848485 -0.48484848
## 4 0.822 0 1 48 98 0.4897959 -0.48979592
## 5 0.855 1 1 48 97 0.4948454 0.50515464
## 6 1.184 1 1 47 96 0.4895833 0.51041667
## 7 1.283 1 1 46 95 0.4842105 0.51578947
## 8 1.414 0 1 45 94 0.4787234 -0.47872340
## 9 1.480 1 1 45 93 0.4838710 0.51612903
## 10 1.776 1 1 44 92 0.4782609 0.52173913
## 11 2.138 1 1 43 91 0.4725275 0.52747253
## 12 2.500 1 2 42 90 0.9333333 0.06666667
## 13 2.763 1 1 41 88 0.4659091 0.53409091
## 14 2.993 1 1 40 87 0.4597701 0.54022989
## 15 3.224 1 1 39 86 0.4534884 0.54651163
## 16 3.322 0 1 38 85 0.4470588 -0.44705882
## 17 3.421 1 1 38 84 0.4523810 0.54761905
## 18 3.816 0 1 37 83 0.4457831 -0.44578313
## 19 4.178 1 1 37 82 0.4512195 0.54878049
## 20 4.737 0 1 35 80 0.4375000 -0.43750000
## 21 4.934 0 1 35 78 0.4487179 -0.44871795
## 22 5.033 0 1 35 77 0.4545455 -0.45454545
## 23 5.691 1 1 35 76 0.4605263 0.53947368
## 24 5.757 0 1 34 75 0.4533333 -0.45333333
## 25 5.855 0 1 34 74 0.4594595 -0.45945946
## 26 5.987 0 1 33 72 0.4583333 -0.45833333
## 27 6.151 0 1 33 71 0.4647887 -0.46478873
## 28 6.217 0 1 33 70 0.4714286 -0.47142857
## 29 6.941 1 1 33 68 0.4852941 0.51470588
## 30 8.651 0 1 30 65 0.4615385 -0.46153846
## 31 8.711 0 1 30 64 0.4687500 -0.46875000
## 32 8.882 2 2 30 63 0.9523810 1.04761905
## 33 10.329 0 1 27 59 0.4576271 -0.45762712
## 34 11.480 1 2 27 58 0.9310345 0.06896552
## 35 11.513 1 1 26 56 0.4642857 0.53571429
## 36 12.007 0 1 25 55 0.4545455 -0.45454545
## 37 12.237 0 1 24 52 0.4615385 -0.46153846
## 38 12.796 1 1 24 50 0.4800000 0.52000000
## 39 15.461 0 1 21 44 0.4772727 -0.47727273
## 40 15.757 0 1 21 43 0.4883721 -0.48837209
## 41 16.480 0 1 21 42 0.5000000 -0.50000000
## 42 16.711 0 1 20 40 0.5000000 -0.50000000
## 43 17.237 0 1 19 37 0.5135135 -0.51351351
## 44 18.092 0 1 19 34 0.5588235 -0.55882353
## 45 20.066 1 1 19 31 0.6129032 0.38709677
## 46 23.158 0 1 16 27 0.5925926 -0.59259259
## 47 56.086 0 1 3 4 0.7500000 -0.75000000
sum(data$rozd) # numerator of the logrank test statistic
## [1] -2.169765
# Comparison with survdiff
library(survival)
(s <- survdiff(Surv(time,delta)~type,data=all))
## Call:
## survdiff(formula = Surv(time, delta) ~ type, data = all)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## type=1 50 22 24.2 0.195 0.382
## type=2 51 28 25.8 0.182 0.382
##
## Chisq= 0.4 on 1 degrees of freedom, p= 0.537
sqrt(s$chisq * s$var[1])
## [1] 2.169765