/* #Iteratice Convex Minorant algorithm to compute NPMLE for left truncation #AND interval censored data. modified from ~/prelim/data/wesdr/ftb4.c, and so [pi,qi]'s are reducted, as in Turnbull's paper (after Frydman's correction)! 1) Log-likelihood increment is taken as convergence criterion, instead of max increament of probability mass. 2) Double reals are used for improved precision. Version 1: Fri Nov 8 10:23:27 CST 1996 BY Wei Pan, Biostat Dept., UW-Madison from Version 1, which is parameterized in cumulative hazards, in which some premature early convergences happen. this may caused by inaccurate quadratic approximation from dG() which ignores the non-diagonal elements of the Hessian of the objective function (i.e. log-likelihood). v1.5: no need for dG(), hence the steepest decscent alg. Mon Nov 11 10:45:35 CST 1996 copied from icm1.5.c with: 1) stronger convergence criterion: max(p(k+1)-p(k)) #include #include #include #include #include #define MAX_N_Obs 10000 static int comp(f1,f2) double *f1, *f2; { if (*f1 < *f2) return(-1); else if (*f1 > *f2) return(1); else return(0); } /* L_infinite distance */ double Linf_dist(a,b,n) double *a,*b; int n; { int i; double s1, s=0; for(i=1; i<=n; i++) { s1 = (s1=a[i]-b[i])>0? s1 : -s1; if (s < s1) s=s1; } /* s += (a[i]-b[i])*(a[i]-b[i]); s = (float)sqrt((double)s); */ return(s); } /* copied from mono mle1.2.c, Nov 8, 96 make a Greatest Convex Minorant from interval hazards from output of N-C-P. March 14, 96 */ gcm(q,nh,n) double *q, *nh; int n; { int i,j,k,last; double min_slope, slope; nh[0]=q[0]=0; for(i=2; i<=n; i++){ nh[i] += nh[i-1]; } i=0; while(i1; i--){ nh[i] -= nh[i-1]; } } #define TB_EPSILON 1.0e-100 #define RIGHTCENSORING 9999 #define LARGENUM 1.0e+10 #define TB_MAX_ITER 5000 void icm(INfname, IsOneFile) char *INfname; int IsOneFile; { char OUTfname[100],OUTfname2[100]; int i,j,k,n,N, iter, iter2; int INDlt[MAX_N_Obs], INDlc[MAX_N_Obs], INDrc[MAX_N_Obs]; /* indices of the corrsponding LT,LC,RC in ordered y[] */ double Tolerence=0.00001, M ; double pTolerence=0.0001 ; double LT[MAX_N_Obs], LC[MAX_N_Obs], RC[MAX_N_Obs], y[MAX_N_Obs], wt[MAX_N_Obs], dW[MAX_N_Obs], dG[MAX_N_Obs], dpis[MAX_N_Obs]; double *tp1=&(LT[1]), *tp2=&(LC[1]), *tp3=&(RC[1]); double lr[2*MAX_N_Obs], d1[2*MAX_N_Obs], d2[2*MAX_N_Obs], pis[2*MAX_N_Obs],npis[3*MAX_N_Obs]; double L0, Li, li, nu, dn, stepsize; FILE *fp; struct rusage ru; /* resource utilization, such as time */ fp=fopen(INfname, "r"); if (fp==NULL) perror("data file can't be opened!"); N=0; while ((iter=fscanf(fp, "%lf%lf%lf",tp1++,tp2++,tp3++))>0){ N++; if (LC[N]==RC[N]) RC[N] += TB_EPSILON; /* notice that tp1~3 point to LT,LC,RC */ /* Ease the case for "double censoring" */ /* following added on June 17, 96 */ /* deleted on Sun Nov 10 18:49:49 CST 1996 if (LC[N]==LT[N]) LT[N] -= TB_EPSILON; */ if (N>=MAX_N_Obs) perror("too large data set!"); } if (iter<0 && iter!=EOF) perror("reading data file!"); fclose(fp); j=1; for(i=1; i<=N; i++){ lr[i]=LC[i]; lr[i+N]=LT[i]; lr[i+2*N]=RC[i]; } tp1=&(lr[1]); qsort(tp1, N,sizeof(double), comp); tp1=&(lr[1+N]); qsort(tp1, 2*N,sizeof(double), comp); n=0; i=1; j=N+1; y[0]=0; while (i<=N && j<=3*N ){ /* && lr[i]=lr[j]) j++; else if (i=RIGHTCENSORING) n--; */ for(i=1; i<=N; i++){ INDlt[i]=INDlc[i]=INDrc[i]=-1; for(j=1; j<=n; j++) { if (INDlt[i]==-1 && LT[i]=RIGHTCENSORING){ if (INDlc[i]==j) dW[j] -= 1; } else { if (INDlc[i]==j ) { dW[j] -= exp(-pis[j])/(exp(-pis[j])-exp(-pis[INDrc[i]])); dG[j] += exp(-pis[j]-pis[INDrc[i]])/((exp(-pis[j])-exp(-pis[INDrc[i]]))*(exp(-pis[j])-exp(-pis[INDrc[i]]))); } if (INDrc[i]==j ) { dW[j] += exp(-pis[j])/(exp(-pis[INDlc[i]])-exp(-pis[j])); dG[j] += exp(-pis[j]-pis[INDlc[i]])/((exp(-pis[j])-exp(-pis[INDlc[i]]))*(exp(-pis[j])-exp(-pis[INDlc[i]]))); } } } } for(j=1; j<=n; j++) { /* if (dG[j]>0) npis[j] = pis[j]+ dW[j]/dG[j]; else npis[j] = npis[j] = pis[j]+ dW[j] ; */ npis[j] = pis[j]+ dW[j] ; } gcm(wt, npis, n); for(j=1; j<=n; j++) dpis[j] = npis[j]-pis[j]; stepsize=1; while (stepsize>1e-10){ for(j=1; j<=n; j++) { npis[j] = pis[j]+ stepsize*dpis[j]; if (npis[j]<0) npis[j] = 0; } npis[0]=0; Li=0; for(i=1; i<=N; i++){ nu= exp(-npis[INDlt[i]]); dn = exp(-npis[INDlc[i]]); if (RC[i]L0) break; stepsize /= 2; } /* for(j=1; j<=n; j++) printf(" %f ", npis[j]); printf(" npis[n]= %lf \n", npis[n]); */ if (Li-L0L0){ for(j=1; j<=n; j++) pis[j]=npis[j]; L0=Li; } if (iter>=TB_MAX_ITER) printf("\n\nWARNING: No convergence after %d iterations!!\n\n",iter); printf("\n\n************RESULT:(after %d iterations)\n\n",iter); if (!IsOneFile){ strcpy(OUTfname, INfname); sprintf(OUTfname2,"GPres%lf",Tolerence); strcat(OUTfname,OUTfname2); if ((fp=fopen(OUTfname, "w"))==NULL) perror("writing output data!"); } y[0]=0; npis[0]=0; for(j=0; j<=n; j++){ if (IsOneFile) printf(" %lf : %lf\n",y[j],npis[j]); else fprintf(fp," %lf %lf\n",y[j],exp(-pis[j])); } if (!IsOneFile) fclose(fp); getrusage(RUSAGE_SELF, &ru); /* strcpy(OUTfname2,OUTfname); strcat(OUTfname,".speed"); if ((fp=fopen(OUTfname, "a"))==NULL) perror("writing output data!"); */ /* if ((fp=fopen("ICM2/speed2.5", "a"))==NULL) perror("writing output data!"); fprintf(fp, "%i %lf %lf \n",iter, L0, ru.ru_utime.tv_sec+ru.ru_utime.tv_usec/1000000.0+ ru.ru_stime.tv_sec+ru.ru_stime.tv_usec/1000000.0); fclose(fp); */ } main(argc, argv) int argc; char *argv[]; { char fname[100]; int i, start, end; if (argc==3) { start =atoi(argv[1]); end = atoi(argv[2]); for (i=start; i<=end; i++) { sprintf(fname,"g%d",i); icm(fname,0); } } else if (argc==2) icm(argv[1],0); else{ printf("format: ntb2 start_num end_num OR ntb2 datafile\n"); exit(1); } }