###############################################################################
# Iterative Nelson's estimator (INE) for left-truncated and right-censored data.
# See the end of this file for an example.

#Reference:
#Pan, W. and Chappell, R. (1998)
# ``A Nonparametric  Estimator of Survival Functions
#   for Arbitrarily Truncated and Censored Data".
#  Lifetime Data Analysis, 4, 187-202.


#Author: Wei Pan, June 2001
#        weip@biostat.umn.edu
#        Division of Biostat, Univ. of Minnesota-Minneapolis
#        Minneapolis, MN 55455
###############################################################################

###############################################################################
#Function: calculating INE
#Input: 
#       lT-- a vector containing left-truncation times
#       Y -- a vector containing the minimum of event and censoring times
#       delta -- a vector containing the indicator of whether Y is an
#                event time (if delta=1) or censoring time (if delta=0)
#       maxIter -- maximum number of iterations in calculating INE
#       Tol -- Tolerance of the convergence of the algorithm, which is
#               the maximum difference of estimated survival probabilities
#               across all time points between two consecutive iterations
#Output:
#       A list containing two components: 
#        time -- a vector of time points
#        surv -- a vector of corresponding survival probabilities 
###############################################################################
       
#NOTE: using the starting value as ENE or not does NOT matter!
#   8/24/04, Wei Pan
#NOTE: missed that there may be a positive prob mass at +infty 
#      in earlier versions!!
#THANKS A LOT to John Fieberg for helping find the bugs!!!
###############################################################################
ine.surv<-function(lT, Y, delta, maxIter=3000, Tol=1e-6, debug=F)
{
if (length(lT)!=length(Y) || length(lT)!=length(delta))
  cat("Error with data: lT, Y and delta must have an equal length!\n")
if (any(lT<0) || any(Y<0)) 
  cat("ERROR: truncation and event/censoring times have to be non-negative!\n")
                                
N<-length(lT)
dt0<-rep(0, sum(delta)) #location of jumps of probability mass
n<-1                   #number   of jumps of probability mass
if (sum(delta[delta==1])==0){
   cat("WARNING: All observations are censored!\n")
   list(time=0, surv=1)
   }
else{
YY<-sort(Y[delta==1])
dt0[1]<-YY[1]
for(i in 2:length(YY))
  if (YY[i]>dt0[n]) {
      n<-n+1;
      dt0[n] <- YY[i]; 
      }

## is the largest obs right-censored?
if (any(delta==0) && max(Y[delta==1])<max(Y[delta==0]))  {
   dt<-c(dt0[1:n], 1e+10)
   n<-n+1
   }
else dt<-dt0[1:n]

#sizes of jumps: initial values
newdp<-rep(1/n, n) #sizes of jumps: initial values
#newdp<-d0<-R0<-Lamb0<-rep(0, n)
#ones<-rep(1, N)
#  for(k in 1:n){
#    d0[k]<-sum(ones[dt[k]==Y & delta==1])
#    R0[k]<-sum(ones[dt[k]>=lT & dt[k]<=Y])
#    if (k==1) newdp[k]<-1-exp(-d0[k]/R0[k]) 
#    else newdp[k]<-exp(-sum(d0[1:(k-1)]/R0[1:(k-1)])) - 
#                   exp(-sum(d0[1:k]/R0[1:k]))
#    if (k==n && R0[k]==0) newdp[k]<-1-sum(newdp[1:(k-1)])
#    }

lambda<-rep(0, n)
d<-R<-rep(0, N)
dp<-rep(0, n)

iter<-0
while (iter < maxIter && max(abs(newdp-dp))>Tol){
  iter<-iter+1
if (debug) cat("Iter=", iter, newdp[1:3],"\n")
  dp<-newdp
  for(i in 1:N){
    d[i]<-sum(dp[(dt>=Y[i] & delta[i]==0) | (dt==Y[i] & delta[i]==1)])
    R[i]<-sum(dp[dt>=lT[i]])
    }
  
  for(k in 1:n){
    lambda[k]<-0
      lambda[k]<-sum(dp[k]/d[(dt[k]>=Y & delta==0) | (dt[k]==Y & delta==1)])
    if (any(dt[k] < lT))
      lambda[k]<-lambda[k] + sum(dp[k]/R[dt[k] < lT])
    }

if (debug) cat("At iter=", iter," d=", d, "\n")
if (debug) cat("At iter=", iter," lambda=", lambda, "\n")

  M<-sum(lambda)
  S0<-1
  for(k in 1:n){
    S1<-S0*exp(-lambda[k]/M)
    newdp[k]<-S0-S1
    S0<-S1
    M<-M - lambda[k]
    }
if (debug) cat("At iter=", iter," newdp=", newdp, "\n")
  }

if (iter>=maxIter) cat("WARNING: No convergence after ", iter, " iterations!\n")
#sink("ine.res2")
#cat("Iter=", iter, "\n")
#for(k in 1:n) cat(dt[k], newdp[k], "\n")
#sink()
surv<-rep(1, n)
for(k in 1:n)
  surv[k]<-1-sum(newdp[1:k])
if (dt[1]==0)
  list(time=dt[1:n], surv=surv)
else list(time=c(0, dt[1:n]), surv=c(1, surv))

} #else (i.e. having events)
}

###############################################################################
#Function: using the nonparametric bootstrap (sampling observations) 
#          to calculate the point-wise percentile confidence intervals
#          of survival probabilities using INE
#Input:
#       lT-- a vector containing left-truncation times
#       Y -- a vector containing the minimum of event and censoring times
#       delta -- a vector containing the indicator of whether Y is an
#                event time (if delta=1) or censoring time (if delta=0)
#       maxIter -- maximum number of iterations in calculating INE
#       Tol -- Tolerance of the convergence of the algorithm, which is
#               the maximum difference of estimated survival probabilities
#               across all time points between two consecutive iterations
#       --The above parameters are used to call ine.surv()
#       B -- number of the bootstrap replications
#       alpha -- confidence level is (1- alpha)
#       outf -- if =T then the results are also written to a file named
#                as "INEb"B (with B replaced by its numerical value),
#                which contains B+1 columns, the first is times, the second
#                is estimated survival prob's based on the given data
#                set, and the others are B-1 etimated survival prob's based
#                on B-1 bootstrap samples
#Output:
#       A list containing six components:
#        time -- a vector of time points
#        surv -- a vector of corresponding survival probabilities
#        lowbd -- lower bound of the point-wise CI's
#        upbd -- upperer bound of the point-wise CI's
#        B, alpha -- as given in the input
###############################################################################

bt.ine.surv<-function(lT, Y, delta, B=200, alpha=0.05,
                      maxIter=3000, Tol=1e-6, debug=F, outf=F)
{
 res0<-ine.surv(lT, Y, delta, maxIter=maxIter, Tol=Tol)
if (debug) cat("****** Time=", res0$time, "\n")
if (debug) cat("****** Surv=", res0$surv, "\n")
 n<-length(res0$time)
 if (res0$time[n]<1e+10){
    time<-c(res0$time, 1e+10)
    n<-n+1
    Ss<-Ssort<-matrix(1, nrow=n, ncol=B)
    Ss[, 1]<-c(res0$surv, 0)
    }
 else{
    time<-res0$time
    Ss<-Ssort<-matrix(1, nrow=n, ncol=B)
    Ss[, 1]<-res0$surv
    }
 N<-length(lT)
 for(b in 2:B){
if (debug) cat("b=",b, "\n")
   indx<-sample(1:N, N, replace=T)
   res<-ine.surv(lT[indx], Y[indx], delta[indx], maxIter=maxIter, Tol=Tol)
if (debug) cat("** Time=", res$time, "\n")
if (debug) cat("** Surv=", res$surv, "\n")
   n1<-length(res$time)
   i<-1
   k<-1
   while (k<=n1 && i<=n){
     while (i<=n && time[i] < res$time[k]){
       Ss[i, b]<-Ss[i-1, b]
       i<-i+1
       }
     if (time[i] == res$time[k]){
       Ss[i, b]<-res$surv[k];
       i<-i+1
       }
     k<-k+1
     } 
   if (i<=n) 
     for(j in i:n) Ss[j, b]<-Ss[i-1, b]
if (debug) cat("$$$ Time=", time, "\n")
if (debug) cat("$$$ Surv=", Ss[,b], "\n")
  }
 for(i in 1:n)
   Ssort[i,]<-sort(Ss[i,])
 low<-floor(B*alpha/2)
 if (low<1) low<-1
 up<-ceiling(B*(1-alpha/2))
 if (up>B) up<-B

 if (outf==T) {
   sink(paste("INEb", B, sep=""))
   for(i in 1:n)
     cat(time[i], Ss[i, ], "\n")
   sink()
   }

 list(time=time, surv=res0$surv, lowbd=Ssort[, low], upbd=Ssort[, up],
      B=B, alpha=alpha)
 
}

###############################################################################
#Function: drawing a survival curve based on the given (time, surv) pairs
#Input: 
#      time: time points
#      surv: corresponding survival probabilities
#      New: create a new plot if New=T; otherwise, add onto a current plot
#      All other parameters are the same as in plot() function
#Output:
#       no return value but drawing 
###############################################################################

draw.ine.surv<-function(time, surv, New=F, lty=2, lwd=2, xlim=c(0, 5),
 xlab="Time", ylab="Cumulative Survival Probability",...)
{
 if (New)
   plot(0,0, type="n", xlim=xlim, 
        ylim=c(0,1), las=1, xlab=xlab, ylab=ylab,...);
 n<-length(time)
 for(i in seq(1,n-1,by=1)){
   lines(time[i:(i+1)], c(surv[i], surv[i]), lty=lty, lwd=lwd,...)
   lines(c(time[i+1], time[i+1]), c(surv[i], surv[i+1]), lty=lty, lwd=lwd,...)
   }
 lines(c(time[n], time[n]+1), c(surv[n], surv[n]), lty=lty, lwd=lwd,...)
}

##Examples:
##Read in a data set: the first three columns 
#                    contain lT, T and delta respectively:
#dat<-read.table("deer.dat2")
#ine.fit<-ine.surv(dat$V1, dat$V2, dat$V3)
#draw.ine.surv(ine.fit$time, ine.fit$surv, xlim=range(c(dat$V1, dat$V2)),
#              New=T, lty=1)
#bt.ine.fit<-bt.ine.surv(dat$V1, dat$V2, dat$V3, B=200, alpha=0.05, outf=T)

#draw.ine.surv(bt.ine.fit$time, bt.ine.fit$surv, xlim=range(c(dat$V1, dat$V2)),
#              New=T, lty=1)
#draw.ine.surv(bt.ine.fit$time, bt.ine.fit$lowbd, lty=2)
#draw.ine.surv(bt.ine.fit$time, bt.ine.fit$upbd, lty=2)

##Or, based on the output file of the above call bt.ine.surv(...,outf=T), 
##    one can obtain CI's for other alpha-levels:
#dat1<-read.table("INE200")
#n<-length(dat1$V1)
#B<-200    
## B should be consistent with the suffix of the file name, here "INE200"
#Ss<-matrix(0, nrow=n, ncol=B)
#for(i in 1:n) Ss[i, ]<-sort(dat1[i, -1])
#alpha<-0.1 ##or any other value
#low<-floor(B*alpha/2)
#if (low<1) low<-1
#up<-ceiling(B*(1-alpha/2))
#if (up>B) up<-B
#time<-dat1$V1
#lowbd<-Ss[, low]
#upbd<-Ss[, up]
#draw.ine.surv(time, dat$V2, xlim=range(time), New=T, lty=1)
##Recall that the second column of INE200 contains that for the original data
#draw.ine.surv(time, lowbd, lty=2)
#draw.ine.surv(time, upbd, lty=2)

##then you may try another value of alpha:
#alpha<-0.2
#low<- ..... so on