# Comparing two samples based on the logspline estimates of their 
# distribution functions 
# Note it needs the logspline package of Kooperberg, C.
# Author: Wei Pan, weip@biostat.umn.edu
# INPUT: 1) datafile: name of the file containing the data
#        2) B: number of bootstrap replications
#        3) range.x: range of the survival time to be considered
runLogspline<-function(datafile, nkt=5){
x0<-seq(0,2,length=101)
  dat<-read.table(datafile)
  nseq<-1:length(dat$V2)
  ind<-nseq[dat$V2>=9999]
  fit0<-logspline.fit(interval=cbind(dat$V1[-ind], dat$V2[-ind]),
      right=dat$V1[ind], lbound=0, ubound=3, nknots=nkt, penalty=3)
  sink(paste("LGSPLN1/survKnts", nkt, sep=""), append=T)
  cat(1-plogspline(x0, fit0))
  cat("\n")
  sink()
}


test2Logspline<-function(datafile, B=500, nkt=4, ubd=3, 
                         range.x=c(0,2), debug=F){
x0<-seq(0,range.x[2],length=101*range.x[2]/2)
  dat<-read.table(datafile)
  n<-length(dat$V2)
  nseq<-1:length(dat$V2)
  sam1<-nseq[dat$V3==0]
  sam2<-nseq[dat$V3==1]
  LC1<-dat$V1[sam1]
  RC1<-dat$V2[sam1]
  LC2<-dat$V1[sam2]
  RC2<-dat$V2[sam2]
  n1<-length(LC1)
  nseq1<-1:length(LC1)
    anyind1<-any(RC1>=9999)
    if (anyind1) ind1<-nseq1[RC1>=9999]
  n2<-length(LC2)
  nseq2<-1:length(LC2)
    anyind2<-any(RC2>=9999)
    if (anyind2) ind2<-nseq2[RC2>=9999]
  pen1<-log(length(LC1))*2/3
  pen2<-log(length(LC2))*2/3
  if (anyind1) 
     fit1<-logspline.fit(interval=cbind(LC1[-ind1], RC1[-ind1]),
      right=LC1[ind1], lbound=0, ubound=ubd, nknots=nkt, penalty=pen1)
  else
     fit1<-logspline.fit(interval=cbind(LC1, RC1),
      lbound=0, ubound=ubd, nknots=nkt, penalty=pen1)
  y1<-plogspline(x0, fit1)
  if (anyind2) 
     fit2<-logspline.fit(interval=cbind(LC2[-ind2], RC2[-ind2]),
       right=LC2[ind2], lbound=0, ubound=ubd, nknots=nkt, penalty=pen2)
  else
     fit2<-logspline.fit(interval=cbind(LC2, RC2),
        lbound=0, ubound=ubd, nknots=nkt, penalty=pen2)
  y2<-plogspline(x0, fit2)
  KSstat0<-max(abs(y1-y2))
  Dstat0<-sum(y1-y2)*(x0[2]-x0[1])
if (debug==T){
#   cat("stat0=",stat0,"\n")
   plot(x0, 1-y1, type="l")
   lines(x0, 1-y2, lty=2)
   }

  #bootstrap
  b<-1
  KSngt<-KSnlt<-0
  Dngt<-Dnlt<-0
  for (b in 1:B){
    subsam1<-sample(1:n, n1, replace=T)
    subsam2<-sample(1:n, n2, replace=T)
    lc1<-dat$V1[subsam1]
    rc1<-dat$V2[subsam1]
    lc2<-dat$V1[subsam2]
    rc2<-dat$V2[subsam2]
    anyind1<-any(rc1>=9999)
    if (anyind1) ind1<-nseq1[rc1>=9999]
    anyind2<-any(rc2>=9999)
    if (anyind2) ind2<-nseq2[rc2>=9999]
    if (anyind1)
        fit1<-logspline.fit(interval=cbind(lc1[-ind1], rc1[-ind1]),
          right=lc1[ind1], lbound=0, ubound=ubd, nknots=nkt, penalty=pen1)
    else
        fit1<-logspline.fit(interval=cbind(lc1, rc1),
          lbound=0, ubound=ubd, nknots=nkt, penalty=pen1)
    y1<-plogspline(x0, fit1)
    if (anyind2)
        fit2<-logspline.fit(interval=cbind(lc2[-ind2], rc2[-ind2]),
          right=lc2[ind2], lbound=0, ubound=ubd, nknots=nkt, penalty=pen2)
    else
        fit2<-logspline.fit(interval=cbind(lc2, rc2),
           lbound=0, ubound=ubd, nknots=nkt, penalty=pen2)
    y2<-plogspline(x0, fit2)
    KSstat<-max(abs(y1-y2))
    Dstat<-sum(y1-y2)*(x0[2]-x0[1])
if (debug==T){
#cat("  ", stat, " ");
plot(x0, 1-y1, type="l")
lines(x0, 1-y2, lty=2)
   }
    if (KSstat0<KSstat) KSnlt<-KSnlt+1
    else if (KSstat0>KSstat) KSngt<-KSngt+1
    if (Dstat0<Dstat) Dnlt<-Dnlt+1
    else if (Dstat0>Dstat) Dngt<-Dngt+1
    }

if (debug==F){
  sink(paste(paste("LGSPLN1/2TestKnts", nkt, sep=""),paste("B",B,sep=""),
       sep=""), append=T)
  cat(KSnlt," ", KSngt," ", KSstat0, 
      " ", Dnlt," ",Dngt," ", Dstat0,"\n")
  sink()
  }

}