############################################################################### # Iterative Nelson's estimator (INE) for left-truncated and right-censored data. # See the end of this file for an example. #Reference: #Pan, W. and Chappell, R. (1998) # ``A Nonparametric Estimator of Survival Functions # for Arbitrarily Truncated and Censored Data". # Lifetime Data Analysis, 4, 187-202. #Author: Wei Pan, June 2001 # weip@biostat.umn.edu # Division of Biostat, Univ. of Minnesota-Minneapolis # Minneapolis, MN 55455 ############################################################################### ############################################################################### #Function: calculating INE #Input: # lT-- a vector containing left-truncation times # Y -- a vector containing the minimum of event and censoring times # delta -- a vector containing the indicator of whether Y is an # event time (if delta=1) or censoring time (if delta=0) # maxIter -- maximum number of iterations in calculating INE # Tol -- Tolerance of the convergence of the algorithm, which is # the maximum difference of estimated survival probabilities # across all time points between two consecutive iterations #Output: # A list containing two components: # time -- a vector of time points # surv -- a vector of corresponding survival probabilities ############################################################################### #NOTE: using the starting value as ENE or not does NOT matter! # 8/24/04, Wei Pan #NOTE: missed that there may be a positive prob mass at +infty # in earlier versions!! #THANKS A LOT to John Fieberg for helping find the bugs!!! ############################################################################### ine.surv<-function(lT, Y, delta, maxIter=3000, Tol=1e-6, debug=F) { if (length(lT)!=length(Y) || length(lT)!=length(delta)) cat("Error with data: lT, Y and delta must have an equal length!\n") if (any(lT<0) || any(Y<0)) cat("ERROR: truncation and event/censoring times have to be non-negative!\n") N<-length(lT) dt0<-rep(0, sum(delta)) #location of jumps of probability mass n<-1 #number of jumps of probability mass if (sum(delta[delta==1])==0){ cat("WARNING: All observations are censored!\n") list(time=0, surv=1) } else{ YY<-sort(Y[delta==1]) dt0[1]<-YY[1] for(i in 2:length(YY)) if (YY[i]>dt0[n]) { n<-n+1; dt0[n] <- YY[i]; } ## is the largest obs right-censored? if (any(delta==0) && max(Y[delta==1])=lT & dt[k]<=Y]) # if (k==1) newdp[k]<-1-exp(-d0[k]/R0[k]) # else newdp[k]<-exp(-sum(d0[1:(k-1)]/R0[1:(k-1)])) - # exp(-sum(d0[1:k]/R0[1:k])) # if (k==n && R0[k]==0) newdp[k]<-1-sum(newdp[1:(k-1)]) # } lambda<-rep(0, n) d<-R<-rep(0, N) dp<-rep(0, n) iter<-0 while (iter < maxIter && max(abs(newdp-dp))>Tol){ iter<-iter+1 if (debug) cat("Iter=", iter, newdp[1:3],"\n") dp<-newdp for(i in 1:N){ d[i]<-sum(dp[(dt>=Y[i] & delta[i]==0) | (dt==Y[i] & delta[i]==1)]) R[i]<-sum(dp[dt>=lT[i]]) } for(k in 1:n){ lambda[k]<-0 lambda[k]<-sum(dp[k]/d[(dt[k]>=Y & delta==0) | (dt[k]==Y & delta==1)]) if (any(dt[k] < lT)) lambda[k]<-lambda[k] + sum(dp[k]/R[dt[k] < lT]) } if (debug) cat("At iter=", iter," d=", d, "\n") if (debug) cat("At iter=", iter," lambda=", lambda, "\n") M<-sum(lambda) S0<-1 for(k in 1:n){ S1<-S0*exp(-lambda[k]/M) newdp[k]<-S0-S1 S0<-S1 M<-M - lambda[k] } if (debug) cat("At iter=", iter," newdp=", newdp, "\n") } if (iter>=maxIter) cat("WARNING: No convergence after ", iter, " iterations!\n") #sink("ine.res2") #cat("Iter=", iter, "\n") #for(k in 1:n) cat(dt[k], newdp[k], "\n") #sink() surv<-rep(1, n) for(k in 1:n) surv[k]<-1-sum(newdp[1:k]) if (dt[1]==0) list(time=dt[1:n], surv=surv) else list(time=c(0, dt[1:n]), surv=c(1, surv)) } #else (i.e. having events) } ############################################################################### #Function: using the nonparametric bootstrap (sampling observations) # to calculate the point-wise percentile confidence intervals # of survival probabilities using INE #Input: # lT-- a vector containing left-truncation times # Y -- a vector containing the minimum of event and censoring times # delta -- a vector containing the indicator of whether Y is an # event time (if delta=1) or censoring time (if delta=0) # maxIter -- maximum number of iterations in calculating INE # Tol -- Tolerance of the convergence of the algorithm, which is # the maximum difference of estimated survival probabilities # across all time points between two consecutive iterations # --The above parameters are used to call ine.surv() # B -- number of the bootstrap replications # alpha -- confidence level is (1- alpha) # outf -- if =T then the results are also written to a file named # as "INEb"B (with B replaced by its numerical value), # which contains B+1 columns, the first is times, the second # is estimated survival prob's based on the given data # set, and the others are B-1 etimated survival prob's based # on B-1 bootstrap samples #Output: # A list containing six components: # time -- a vector of time points # surv -- a vector of corresponding survival probabilities # lowbd -- lower bound of the point-wise CI's # upbd -- upperer bound of the point-wise CI's # B, alpha -- as given in the input ############################################################################### bt.ine.surv<-function(lT, Y, delta, B=200, alpha=0.05, maxIter=3000, Tol=1e-6, debug=F, outf=F) { res0<-ine.surv(lT, Y, delta, maxIter=maxIter, Tol=Tol) if (debug) cat("****** Time=", res0$time, "\n") if (debug) cat("****** Surv=", res0$surv, "\n") n<-length(res0$time) if (res0$time[n]<1e+10){ time<-c(res0$time, 1e+10) n<-n+1 Ss<-Ssort<-matrix(1, nrow=n, ncol=B) Ss[, 1]<-c(res0$surv, 0) } else{ time<-res0$time Ss<-Ssort<-matrix(1, nrow=n, ncol=B) Ss[, 1]<-res0$surv } N<-length(lT) for(b in 2:B){ if (debug) cat("b=",b, "\n") indx<-sample(1:N, N, replace=T) res<-ine.surv(lT[indx], Y[indx], delta[indx], maxIter=maxIter, Tol=Tol) if (debug) cat("** Time=", res$time, "\n") if (debug) cat("** Surv=", res$surv, "\n") n1<-length(res$time) i<-1 k<-1 while (k<=n1 && i<=n){ while (i<=n && time[i] < res$time[k]){ Ss[i, b]<-Ss[i-1, b] i<-i+1 } if (time[i] == res$time[k]){ Ss[i, b]<-res$surv[k]; i<-i+1 } k<-k+1 } if (i<=n) for(j in i:n) Ss[j, b]<-Ss[i-1, b] if (debug) cat("$$$ Time=", time, "\n") if (debug) cat("$$$ Surv=", Ss[,b], "\n") } for(i in 1:n) Ssort[i,]<-sort(Ss[i,]) low<-floor(B*alpha/2) if (low<1) low<-1 up<-ceiling(B*(1-alpha/2)) if (up>B) up<-B if (outf==T) { sink(paste("INEb", B, sep="")) for(i in 1:n) cat(time[i], Ss[i, ], "\n") sink() } list(time=time, surv=res0$surv, lowbd=Ssort[, low], upbd=Ssort[, up], B=B, alpha=alpha) } ############################################################################### #Function: drawing a survival curve based on the given (time, surv) pairs #Input: # time: time points # surv: corresponding survival probabilities # New: create a new plot if New=T; otherwise, add onto a current plot # All other parameters are the same as in plot() function #Output: # no return value but drawing ############################################################################### draw.ine.surv<-function(time, surv, New=F, lty=2, lwd=2, xlim=c(0, 5), xlab="Time", ylab="Cumulative Survival Probability",...) { if (New) plot(0,0, type="n", xlim=xlim, ylim=c(0,1), las=1, xlab=xlab, ylab=ylab,...); n<-length(time) for(i in seq(1,n-1,by=1)){ lines(time[i:(i+1)], c(surv[i], surv[i]), lty=lty, lwd=lwd,...) lines(c(time[i+1], time[i+1]), c(surv[i], surv[i+1]), lty=lty, lwd=lwd,...) } lines(c(time[n], time[n]+1), c(surv[n], surv[n]), lty=lty, lwd=lwd,...) } ##Examples: ##Read in a data set: the first three columns # contain lT, T and delta respectively: #dat<-read.table("deer.dat2") #ine.fit<-ine.surv(dat$V1, dat$V2, dat$V3) #draw.ine.surv(ine.fit$time, ine.fit$surv, xlim=range(c(dat$V1, dat$V2)), # New=T, lty=1) #bt.ine.fit<-bt.ine.surv(dat$V1, dat$V2, dat$V3, B=200, alpha=0.05, outf=T) #draw.ine.surv(bt.ine.fit$time, bt.ine.fit$surv, xlim=range(c(dat$V1, dat$V2)), # New=T, lty=1) #draw.ine.surv(bt.ine.fit$time, bt.ine.fit$lowbd, lty=2) #draw.ine.surv(bt.ine.fit$time, bt.ine.fit$upbd, lty=2) ##Or, based on the output file of the above call bt.ine.surv(...,outf=T), ## one can obtain CI's for other alpha-levels: #dat1<-read.table("INE200") #n<-length(dat1$V1) #B<-200 ## B should be consistent with the suffix of the file name, here "INE200" #Ss<-matrix(0, nrow=n, ncol=B) #for(i in 1:n) Ss[i, ]<-sort(dat1[i, -1]) #alpha<-0.1 ##or any other value #low<-floor(B*alpha/2) #if (low<1) low<-1 #up<-ceiling(B*(1-alpha/2)) #if (up>B) up<-B #time<-dat1$V1 #lowbd<-Ss[, low] #upbd<-Ss[, up] #draw.ine.surv(time, dat$V2, xlim=range(time), New=T, lty=1) ##Recall that the second column of INE200 contains that for the original data #draw.ine.surv(time, lowbd, lty=2) #draw.ine.surv(time, upbd, lty=2) ##then you may try another value of alpha: #alpha<-0.2 #low<- ..... so on