Solution to Task 1 of Exercise 3


Back to course page


load(url('http://www.karlin.mff.cuni.cz/~pesta/NMST531/data/km_all.RData'))

all<- all[order(all$time,-all$delta),]
d <-sum(all$delta == 1 & !duplicated(all$time))
d1 <- sum(all$delta == 1 & all$type ==1 & !duplicated(all$time))
n <-length(all$time)
n1 <- sum(all$type==1)
dt <- table(all$time[all$delta==1], all$type[all$delta==1])

# columns for the output table
dj <- dt[,1]+dt[,2]
d1j <- dt[,1]
tj <- as.numeric(rownames(dt))
yj <- rev(1:n)[all$delta==1 & !duplicated(all$time)]
y1j <- c(); for(i in 1:d) y1j <- c(y1j,sum(all$time>=tj[i] & all$type==1))
data <- as.data.frame(cbind(tj, d1j, dj, y1j, yj), row.names=1:d)
data$ej <- dj * y1j/yj
data$rozd <- d1j - data$ej
data
##        tj d1j dj y1j  yj        ej        rozd
## 1   0.030   1  1  50 101 0.4950495  0.50495050
## 2   0.493   1  1  49 100 0.4900000  0.51000000
## 3   0.658   0  1  48  99 0.4848485 -0.48484848
## 4   0.822   0  1  48  98 0.4897959 -0.48979592
## 5   0.855   1  1  48  97 0.4948454  0.50515464
## 6   1.184   1  1  47  96 0.4895833  0.51041667
## 7   1.283   1  1  46  95 0.4842105  0.51578947
## 8   1.414   0  1  45  94 0.4787234 -0.47872340
## 9   1.480   1  1  45  93 0.4838710  0.51612903
## 10  1.776   1  1  44  92 0.4782609  0.52173913
## 11  2.138   1  1  43  91 0.4725275  0.52747253
## 12  2.500   1  2  42  90 0.9333333  0.06666667
## 13  2.763   1  1  41  88 0.4659091  0.53409091
## 14  2.993   1  1  40  87 0.4597701  0.54022989
## 15  3.224   1  1  39  86 0.4534884  0.54651163
## 16  3.322   0  1  38  85 0.4470588 -0.44705882
## 17  3.421   1  1  38  84 0.4523810  0.54761905
## 18  3.816   0  1  37  83 0.4457831 -0.44578313
## 19  4.178   1  1  37  82 0.4512195  0.54878049
## 20  4.737   0  1  35  80 0.4375000 -0.43750000
## 21  4.934   0  1  35  78 0.4487179 -0.44871795
## 22  5.033   0  1  35  77 0.4545455 -0.45454545
## 23  5.691   1  1  35  76 0.4605263  0.53947368
## 24  5.757   0  1  34  75 0.4533333 -0.45333333
## 25  5.855   0  1  34  74 0.4594595 -0.45945946
## 26  5.987   0  1  33  72 0.4583333 -0.45833333
## 27  6.151   0  1  33  71 0.4647887 -0.46478873
## 28  6.217   0  1  33  70 0.4714286 -0.47142857
## 29  6.941   1  1  33  68 0.4852941  0.51470588
## 30  8.651   0  1  30  65 0.4615385 -0.46153846
## 31  8.711   0  1  30  64 0.4687500 -0.46875000
## 32  8.882   2  2  30  63 0.9523810  1.04761905
## 33 10.329   0  1  27  59 0.4576271 -0.45762712
## 34 11.480   1  2  27  58 0.9310345  0.06896552
## 35 11.513   1  1  26  56 0.4642857  0.53571429
## 36 12.007   0  1  25  55 0.4545455 -0.45454545
## 37 12.237   0  1  24  52 0.4615385 -0.46153846
## 38 12.796   1  1  24  50 0.4800000  0.52000000
## 39 15.461   0  1  21  44 0.4772727 -0.47727273
## 40 15.757   0  1  21  43 0.4883721 -0.48837209
## 41 16.480   0  1  21  42 0.5000000 -0.50000000
## 42 16.711   0  1  20  40 0.5000000 -0.50000000
## 43 17.237   0  1  19  37 0.5135135 -0.51351351
## 44 18.092   0  1  19  34 0.5588235 -0.55882353
## 45 20.066   1  1  19  31 0.6129032  0.38709677
## 46 23.158   0  1  16  27 0.5925926 -0.59259259
## 47 56.086   0  1   3   4 0.7500000 -0.75000000
sum(data$rozd) # numerator of the logrank test statistic
## [1] -2.169765
# Comparison with survdiff
library(survival)
(s <- survdiff(Surv(time,delta)~type,data=all))
## Call:
## survdiff(formula = Surv(time, delta) ~ type, data = all)
## 
##         N Observed Expected (O-E)^2/E (O-E)^2/V
## type=1 50       22     24.2     0.195     0.382
## type=2 51       28     25.8     0.182     0.382
## 
##  Chisq= 0.4  on 1 degrees of freedom, p= 0.537
sqrt(s$chisq * s$var[1])
## [1] 2.169765