/* Using EMS algorithm to estimate the survival function from (left-truncated and) interval-censored data, to obtain Smoothed Nonparametric Estimate (SNE). Reference: Pan, W. and Chappell, R. (1998). ``Estimating Survival Curves with Left-truncated and Interval-censored Data via the EMS Algorithm". Communications in Statistics -- Theory and Methods, 27, 777-793. Some implementation details: (I) Add a smoothing (S) step following E- and M-steps in the EM algorithm. smoothing the probability mass in a neighborhood (i.e. pis[]) directly: 1) modifying smoothing at two end points; 2) if [pn,qn) is the half-line, don't smooth it; AUTHOR: Dr. Wei Pan weip@biostat.umn.edu Division of Biostatistics University of Minnesota Minneapolis, MN 55455 convergence criteria: 1) diff(new_estimate, old_estimate) #include #include #include #include #include #define MAX_N_Obs 3000 static int comp(f1,f2) double *f1, *f2; { if (*f1 < *f2) return(-1); else if (*f1 > *f2) return(1); else return(0); } /* L_infinite distance */ double Linf_dist(a,b,n) double *a,*b; int n; { int i; double s1, s=0; for(i=1; i<=n; i++) { s1 = (s1=a[i]-b[i])>0? s1 : -s1; if (s < s1) s=s1; } /* s += (a[i]-b[i])*(a[i]-b[i]); s = sqrt((double)s); */ return(s); } #define TB_EPSILON 1.0e-10 #define TB_MAX_ITER 3000 #define RIGHT_CENSORING_PT 9999 void turnbull(INfname, IsOneFile) char *INfname; int IsOneFile; { char OUTfname[100], OUTfname2[100]; int i,j,k,n,N, iter, ns; double Tolerence=0.0001, M ; double LT[MAX_N_Obs], LC[MAX_N_Obs], RC[MAX_N_Obs], lr[2*MAX_N_Obs], d1[2*MAX_N_Obs], d2[2*MAX_N_Obs], mpis[2*MAX_N_Obs], /* smoothing */ p[2*MAX_N_Obs],q[2*MAX_N_Obs],pis[2*MAX_N_Obs],npis[3*MAX_N_Obs]; double *tp1=&(LT[1]), *tp2=&(LC[1]), *tp3=&(RC[1]); FILE *fp; struct rusage ru; /* resource utilization, such as time */ /* delete it!!!! * Tolerence=0.00002; */ fp=fopen(INfname, "r"); if (fp==NULL) perror("data file can't be opened!"); N=0; while ((iter=fscanf(fp, "%lf%lf%lf",tp1++,tp2++,tp3++))>0){ N++; if (LC[N]==RC[N]) RC[N] += TB_EPSILON; /* notice that tp1~3 point to LT,LC,RC */ /* Ease the case for "double censoring" */ /* following added on June 17, 96 */ if (LC[N]==LT[N]) LT[N] -= TB_EPSILON; if (N>=MAX_N_Obs) perror("too large data set!"); } if (iter<0 && iter!=EOF) perror("reading data file!"); fclose(fp); j=1; for(i=1; i<=N; i++){ lr[i]=LC[i]; lr[i+N]=LT[i]; lr[i+2*N]=RC[i]; } tp1=&(lr[1]); qsort(tp1, N,sizeof(double), comp); tp1=&(lr[1+N]); qsort(tp1, 2*N,sizeof(double), comp); /* old one? or modified one? n=0; for(i=1; i<2*N; i++) { if (lr[i]==lr[i+1]) continue; for(j=1;j<=N;j++) { if (lr[i]>=LC[j] && lr[i+1]<=RC[j]) { n=n+1; p[n]=lr[i]; q[n]=lr[i+1]; break; } } if (n>=MAX_N_Obs*2) perror("too large data set (i.e. bins)!"); } */ n=0; i=1; j=N+1; while (i<=N && j<=3*N){ if (lr[i]>=lr[j]) j++; else if (i=RIGHT_CENSORING_PT) ns=n-1; else ns=n; iter=0; while(iter=LC[i] && q[k]<=RC[i]) d1[i] += pis[k]; if (p[k]>=LT[i]) d2[i] += pis[k]; } } for(j=1; j<=n; j++){ lr[j]=0; for(i=1; i<=N; i++){ if (p[j]>=LC[i] && q[j]<=RC[i]) lr[j] += pis[j]/d1[i]; if (p[j]=3){ npis[1] = (3*mpis[1]+mpis[2])/4; for(j=2; jns) npis[n]=mpis[n]; npis[0]=0; /* printf("Iteration=%d : \n", iter); for(j=1; j<=n; j++) printf(" %f ", npis[j]); */ if (Linf_dist(pis,npis,n)=TB_MAX_ITER) printf("\n\nWARNING: No convergence after %d iterations!!\n\n",iter); printf("\n\n************RESULT:(after %d iterations)\n\n",iter); if (!IsOneFile){ strcpy(OUTfname, INfname); sprintf(OUTfname2,"EMSres%lf",Tolerence); strcat(OUTfname,OUTfname2); if ((fp=fopen(OUTfname, "w"))==NULL) perror("writing output data!"); } for(j=1; j<=n; j++){ if (IsOneFile) printf("[%lf, %lf]: %lf\n",p[j],q[j],pis[j]); else fprintf(fp,"%lf %lf %lf\n",p[j],q[j],npis[j]); } if (!IsOneFile) fclose(fp); /* getrusage(RUSAGE_SELF, &ru); if ((fp=fopen("EMS3/speed3.1", "a"))==NULL) perror("writing output data!"); fprintf(fp, "%i %lf \n",iter, ru.ru_utime.tv_sec+ru.ru_utime.tv_usec/1000000.0+ ru.ru_stime.tv_sec+ru.ru_stime.tv_usec/1000000.0); fclose(fp); */ } /* main(argc, argv) int argc; char *argv[]; { if (argc==2) turnbull(argv[1]); else turnbull("gen1.cdat"); } */ main(argc, argv) int argc; char *argv[]; { char fname[100]; int i, start, end; if (argc==3) { start =atoi(argv[1]); end = atoi(argv[2]); for (i=start; i<=end; i++) { sprintf(fname,"g%d",i); turnbull(fname,0); } } else if (argc==2) turnbull(argv[1],0); else{ printf("format: ems start_num end_num OR ems datafile\n"); exit(1); } }