/* MLE under the increasing failure rate (i.e. nondecreasing hazard) for left truncated and interval censored data. Gradient Projection Method used to maximize the log-likelihood , the projection to feasible region is done by using Greatest Convex Minorant to get increasing failure rate! It can handle any left-truncated and interval-censored data. ***** For Increasing Hazard Rate *************************** it should support data set with size < 10000 entries; but default is 1000; modify MAX_N_Obs if necessary. Author: Wei Pan, Division of Biostatistics, U of Minnesota April 30, 96 weip@biostat.umn.edu Reference: Pan, W. and Chappell, R. (1998) "Estimating survival curves with left-truncated and interval-censored data under monotone hazards". Biometrics, 54, 1053--1060. Usage: ***Input: A data file name. Data file format: Ti Li Ri corresponding to left truncation time and two endpoints of censoring interval, e.g. that 0 1 2 1 2 99999 2 3 3 represents there are three obs: one is censored b/w 1 and 2 with left truncation time at 0; the second is right censored at 2 with left truncation time at 1; the third is exactly observed at 3. Output data file format: A_i B_i h_i corresponding to an estimated hazard h_i in [A_i, B_i). ***Output: A result file. format: ai bi hi which represents that the estimated hazard rate is hi in (ai, bi). ***Use: 1) compile it: cc sourceFile -o execFile -lm 2) execFile DataFileName 3) the output will be generated in a data file */ #include #include #include #include #include #include #define MAX_N_Obs 1000 static int comp(f1,f2) double *f1, *f2; { if (*f1 < *f2) return(-1); else if (*f1 > *f2) return(1); else return(0); } /* L_infinite distance */ double Linf_dist(a,b,n) double *a,*b; int n; { int i; double s1, s=0; for(i=1; i<=n; i++) { s1 = (s1=a[i]-b[i])>0? s1 : -s1; if (s < s1) s=s1; } return(s); } /* make a Greatest Convex Minorant from interval hazards from output of N-C-P. March 14, 96 */ gcm(q,nh,n) double *q, *nh; int n; { int i,j,k,last; double min_slope, slope; nh[0]=q[0]=0; for(i=2; i<=n; i++){ nh[i] += nh[i-1]; } i=0; while(i1; i--){ nh[i] -= nh[i-1]; } } #define TB_EPSILON 0.000001 #define TB_MAX_ITER 5000 #define TB_MIN_ITER 5 void plwp(INfname) char *INfname; { char OUTfname[100]; char OUTfname2[80]; int i,j,k,n,N, iter; double Tolerence=0.0001, PL, PL2, start,min; double Delta, L0, Li, dLgd, stepsize; /* Amiju stepsize */ double LT[MAX_N_Obs], LC[MAX_N_Obs], RC[MAX_N_Obs], lr[2*MAX_N_Obs], d1[2*MAX_N_Obs], d2[2*MAX_N_Obs], dL[2*MAX_N_Obs], gd[2*MAX_N_Obs], Y[MAX_N_Obs], /* obs'ed pts */ dt[2*MAX_N_Obs], /* width of [pi,qi] */ w[2*MAX_N_Obs], /* weightof [pi,qi] in isotonic regression*/ p[2*MAX_N_Obs],q[2*MAX_N_Obs],h[2*MAX_N_Obs],nh[3*MAX_N_Obs]; double *tp1=&(LT[1]), *tp2=&(LC[1]), *tp3=&(RC[1]); FILE *fp; /* struct rusage ru; */ /* resource utilization, such as time */ fp=fopen(INfname, "r"); N=0; while ((iter=fscanf(fp, "%lf%lf%lf",tp1++,tp2++,tp3++))>0){ N++; if (LC[N]==RC[N]) RC[N] += TB_EPSILON; /* notice that tp1~3 point to LT,LC,RC */ /* Ease the case for "double censoring" */ if (N>=MAX_N_Obs) perror("too large data set!"); } if (iter<0 && iter!=EOF) perror("reading data file!"); fclose(fp); j=1; for(i=1; i<=N; i++){ lr[j++]=LT[i]; lr[j++]=LC[i]; lr[j++]=RC[i]; if (LC[i]==LT[i]) Y[i]=RC[i]; else Y[i]=LC[i]; } tp1=&(lr[1]); qsort(tp1, 3*N,sizeof(double), comp); n=0; for(i=1; i<3*N; i++) { if (lr[i]==lr[i+1]) continue; /* may 23, 96 for(j=1;j<=N;j++) { if (lr[i]>=LT[j] && lr[i+1]<=Y[j]) { */ n=n+1; p[n]=lr[i]; q[n]=lr[i+1]; dt[n]=q[n]-p[n]; if (n>=MAX_N_Obs*2) perror("too large data set (i.e. bins)!"); } for(j=1; j<=n; j++) { nh[j]=h[j]=1.0/n; w[j]=j; /* weight of gcm (isotonic regression */ } /* compute L0=logL(h0) */ L0=0; for(i=1; i<=N; i++){ d1[i]=0; for (j=1; j<=n; j++){ if (p[j]>=LT[i] && q[j]<=LC[i]) d1[i] += nh[j]*dt[j]; else if (p[j]>=LC[i]) break; } d2[i]=0; for (; j<=n; j++){ if (p[j]>=LC[i] && q[j]<=RC[i]) d2[i] += nh[j]*dt[j]; else if (p[j]>=RC[i]) break; } if (d2[i]=LT[i] && q[j]<=RC[i]) { if (p[j]>=LC[i]){ dL[j] += (d2[i]/(1-d2[i])); } else dL[j] -= 1; } else if (p[j]>=RC[i]) break; } for(j=1; j<=n; j++) dL[j] *= dt[j]; for(j=1; j<=n; j++){ nh[j]=h[j]+dL[j]; /* ??? if (nh[j]<0) nh[j]=0; */ } /* Add CVM to get the projection P(h[j]+df[j]) */ gcm(w,nh,n); /*there might be negative ones! */ for(i=0;i<=n;i++){ if (nh[i]<0) nh[i]=0; if (nh[i]>9999) nh[i]=9999; gd[i]=nh[i]-h[i]; /* direction pi=P(xi+df(xi))-xi */ /* printf(" nh=%f ", nh[i]); */ } /**************Amiju Stepsize *****************/ dLgd = 0; for(i=1; i<=n; i++) dLgd += dL[i]*gd[i]; stepsize=1; for(k=1;1;k++){ for(j=1; j<=n; j++) nh[j]=h[j]+stepsize*gd[j]; /* compute logL(nh) */ Li=0; for(i=1; i<=N; i++){ d1[i]=0; for (j=1; j<=n; j++){ if (p[j]>=LT[i] && q[j]<=LC[i]) d1[i] += nh[j]*dt[j]; else if (p[j]>=LC[i]) break; } d2[i]=0; for (; j<=n; j++){ if (p[j]>=LC[i] && q[j]<=RC[i]) d2[i] += nh[j]*dt[j]; else if (p[j]>=RC[i]) break; } if (d2[i]= stepsize*dLgd/4) break; stepsize /= 2; if (stepsize=TB_MIN_ITER) break; */ if ((Delta=Li-L0)=TB_MIN_ITER) break; else for(j=1; j<=n; j++) h[j]=nh[j]; L0=Li; /* for(i=1; i<=5; i++) printf("nh[%d]=%f ",i,nh[i]); printf("\n"); */ } /* if ((fp=fopen("IFR/speed","a"))==NULL) perror("writing output data!"); getrusage(RUSAGE_SELF, &ru); fprintf(fp, "%i %ld %ld %ld %ld\n",iter, ru.ru_utime.tv_sec,ru.ru_utime.tv_usec, ru.ru_stime.tv_sec,ru.ru_stime.tv_usec); fclose(fp); */ printf("\n\n************RESULT:(after %d iterations)\n\n",iter); /* Sat Oct 19 19:00:54 CDT 1996 */ strcpy(OUTfname, ""); strcat(OUTfname, INfname); sprintf(OUTfname2,"IfrRes%lf",Tolerence); strcat(OUTfname,OUTfname2); if ((fp=fopen(OUTfname, "w"))==NULL) perror("writing output data!"); /* combine together the intervals with same hazards . May 22, 96 */ min=nh[1]; fprintf(fp,"%f ", p[1]); for(j=2; j<=n; j++){ if (nh[j]>min){ fprintf(fp,"%lf %lf\n",q[j-1],min); fprintf(fp,"%lf ", p[j]); min=nh[j]; } } fprintf(fp,"%lf %lf\n",q[n],min); fclose(fp); } main(argc, argv) int argc; char *argv[]; { char fname[100]; int i, start, end; if (argc==3) { start =atoi(argv[1]); end = atoi(argv[2]); for (i=start; i<=end; i++) { sprintf(fname,"g%d",i); plwp(fname); } } else if (argc==2){ plwp(argv[1]); } else{ printf("format: mono start_num end_num OR mono datafilename\n"); exit(1); } }