# Comparing two samples based on the logspline estimates of their # distribution functions # Note it needs the logspline package of Kooperberg, C. # Author: Wei Pan, weip@biostat.umn.edu # INPUT: 1) datafile: name of the file containing the data # 2) B: number of bootstrap replications # 3) range.x: range of the survival time to be considered runLogspline<-function(datafile, nkt=5){ x0<-seq(0,2,length=101) dat<-read.table(datafile) nseq<-1:length(dat$V2) ind<-nseq[dat$V2>=9999] fit0<-logspline.fit(interval=cbind(dat$V1[-ind], dat$V2[-ind]), right=dat$V1[ind], lbound=0, ubound=3, nknots=nkt, penalty=3) sink(paste("LGSPLN1/survKnts", nkt, sep=""), append=T) cat(1-plogspline(x0, fit0)) cat("\n") sink() } test2Logspline<-function(datafile, B=500, nkt=4, ubd=3, range.x=c(0,2), debug=F){ x0<-seq(0,range.x[2],length=101*range.x[2]/2) dat<-read.table(datafile) n<-length(dat$V2) nseq<-1:length(dat$V2) sam1<-nseq[dat$V3==0] sam2<-nseq[dat$V3==1] LC1<-dat$V1[sam1] RC1<-dat$V2[sam1] LC2<-dat$V1[sam2] RC2<-dat$V2[sam2] n1<-length(LC1) nseq1<-1:length(LC1) anyind1<-any(RC1>=9999) if (anyind1) ind1<-nseq1[RC1>=9999] n2<-length(LC2) nseq2<-1:length(LC2) anyind2<-any(RC2>=9999) if (anyind2) ind2<-nseq2[RC2>=9999] pen1<-log(length(LC1))*2/3 pen2<-log(length(LC2))*2/3 if (anyind1) fit1<-logspline.fit(interval=cbind(LC1[-ind1], RC1[-ind1]), right=LC1[ind1], lbound=0, ubound=ubd, nknots=nkt, penalty=pen1) else fit1<-logspline.fit(interval=cbind(LC1, RC1), lbound=0, ubound=ubd, nknots=nkt, penalty=pen1) y1<-plogspline(x0, fit1) if (anyind2) fit2<-logspline.fit(interval=cbind(LC2[-ind2], RC2[-ind2]), right=LC2[ind2], lbound=0, ubound=ubd, nknots=nkt, penalty=pen2) else fit2<-logspline.fit(interval=cbind(LC2, RC2), lbound=0, ubound=ubd, nknots=nkt, penalty=pen2) y2<-plogspline(x0, fit2) KSstat0<-max(abs(y1-y2)) Dstat0<-sum(y1-y2)*(x0[2]-x0[1]) if (debug==T){ # cat("stat0=",stat0,"\n") plot(x0, 1-y1, type="l") lines(x0, 1-y2, lty=2) } #bootstrap b<-1 KSngt<-KSnlt<-0 Dngt<-Dnlt<-0 for (b in 1:B){ subsam1<-sample(1:n, n1, replace=T) subsam2<-sample(1:n, n2, replace=T) lc1<-dat$V1[subsam1] rc1<-dat$V2[subsam1] lc2<-dat$V1[subsam2] rc2<-dat$V2[subsam2] anyind1<-any(rc1>=9999) if (anyind1) ind1<-nseq1[rc1>=9999] anyind2<-any(rc2>=9999) if (anyind2) ind2<-nseq2[rc2>=9999] if (anyind1) fit1<-logspline.fit(interval=cbind(lc1[-ind1], rc1[-ind1]), right=lc1[ind1], lbound=0, ubound=ubd, nknots=nkt, penalty=pen1) else fit1<-logspline.fit(interval=cbind(lc1, rc1), lbound=0, ubound=ubd, nknots=nkt, penalty=pen1) y1<-plogspline(x0, fit1) if (anyind2) fit2<-logspline.fit(interval=cbind(lc2[-ind2], rc2[-ind2]), right=lc2[ind2], lbound=0, ubound=ubd, nknots=nkt, penalty=pen2) else fit2<-logspline.fit(interval=cbind(lc2, rc2), lbound=0, ubound=ubd, nknots=nkt, penalty=pen2) y2<-plogspline(x0, fit2) KSstat<-max(abs(y1-y2)) Dstat<-sum(y1-y2)*(x0[2]-x0[1]) if (debug==T){ #cat(" ", stat, " "); plot(x0, 1-y1, type="l") lines(x0, 1-y2, lty=2) } if (KSstat0KSstat) KSngt<-KSngt+1 if (Dstat0Dstat) Dngt<-Dngt+1 } if (debug==F){ sink(paste(paste("LGSPLN1/2TestKnts", nkt, sep=""),paste("B",B,sep=""), sep=""), append=T) cat(KSnlt," ", KSngt," ", KSstat0, " ", Dnlt," ",Dngt," ", Dstat0,"\n") sink() } }