/* FUNCTION: Turnbull's (1976) EM algorithm to compute the NPMLE of survival for left truncated AND interval censored data, with Frydman's (1993) correction of the support of the NPMLE. IMPLEMENTATION: Convergence criterion: Log-likelihood increment, instead of max increament of probability mass. AUTHOR: Wei Pan, (done at Biostatistics and Statistics, UW-Madison). Division of Biostatistics University of Minnesota weip@biostat.umn.edu Input: Data file name, idatafname. Output: Data file of estimated survival, odatafname=idatafname.out. Usage: Input Dataset format: Ti Li Ri corresponding to left truncation time and two endpoints of censoring interval, e.g. that 0 1 2 1 2 99999 2 3 3 represents there are two obs: one is censored b/w 1 and 2 with left truncation time at 0; the second is right censored at 2 with left truncation time at 1; the third is exactly observed at 3. Output data format: A_i B_i p_i corresponding to a probability mass p_i in [A_i, B_i). **************** You may also want to change the convergence criterion and maximum number of iterations or maximum number of observations allowed given in the follwoing #define statements. */ #include #include #include #include #include #include #define NPMLEem_EPSILON 1.0e-5 #define NPMLEem_MAX_ITER 10000 #define MAX_N_Obs 10000 /* comparing two double numbers. used in qsort() */ static int comp(f1,f2) double *f1, *f2; { if (*f1 < *f2) return(-1); else if (*f1 > *f2) return(1); else return(0); } /* L_infinite distance b/w two vectors a and b with length n. */ double Linf_dist(a,b,n) double *a,*b; int n; { int i; double s1, s=0; for(i=1; i<=n; i++) { s1 = (s1=a[i]-b[i])>0? s1 : -s1; if (s < s1) s=s1; } /* s += (a[i]-b[i])*(a[i]-b[i]); s = (float)sqrt((double)s); */ return(s); } /* using Turnbull's EM to compute the NPMLE, with Frydman's correction. */ void turnbull(INfname, IsOneFile) char *INfname; /* input dataset file name */ int IsOneFile; { char OUTfname[100],OUTfname2[100]; int i,j,k,n,N, iter; double Tolerence=0.00001, M ; double LT[MAX_N_Obs], LC[MAX_N_Obs], RC[MAX_N_Obs], p[2*MAX_N_Obs],q[2*MAX_N_Obs]; double *tp1=&(LT[1]), *tp2=&(LC[1]), *tp3=&(RC[1]); double lr[2*MAX_N_Obs], d1[2*MAX_N_Obs], d2[2*MAX_N_Obs], pis[2*MAX_N_Obs],npis[3*MAX_N_Obs]; double L0, Li, li, nu, dn; FILE *fp; /* struct rusage ru; */ /* resource utilization, such as time */ fp=fopen(INfname, "r"); if (fp==NULL) perror("data file can't be opened!"); N=0; while ((iter=fscanf(fp, "%lf%lf%lf",tp1++,tp2++,tp3++))>0){ N++; if (LC[N]==RC[N]) RC[N] += NPMLEem_EPSILON; /* notice that tp1~3 point to LT,LC,RC */ /* Ease the case for "double censoring" */ /* following added on June 17, 96 */ if (LC[N]==LT[N]) LT[N] -= NPMLEem_EPSILON; if (N>=MAX_N_Obs) perror("too large data set!"); } if (iter<0 && iter!=EOF) perror("reading data file!"); fclose(fp); j=1; for(i=1; i<=N; i++){ lr[i]=LC[i]; lr[i+N]=LT[i]; lr[i+2*N]=RC[i]; } tp1=&(lr[1]); qsort(tp1, N,sizeof(double), comp); tp1=&(lr[1+N]); qsort(tp1, 2*N,sizeof(double), comp); /* old one? or modified one? n=0; for(i=1; i<2*N; i++) { if (lr[i]==lr[i+1]) continue; for(j=1;j<=N;j++) { if (lr[i]>=LC[j] && lr[i+1]<=RC[j]) { n=n+1; p[n]=lr[i]; q[n]=lr[i+1]; break; } } if (n>=MAX_N_Obs*2) perror("too large data set (i.e. bins)!"); } */ n=0; i=1; j=N+1; while (i<=N && j<=3*N){ if (lr[i]>=lr[j]) j++; else if (i=LC[i] && q[j]<=RC[i]) dn += pis[j]; if (p[j]>=LT[i]) nu += pis[j]; } li=dn/nu; if (li=LC[i] && q[k]<=RC[i]) d1[i] += pis[k]; if (p[k]>=LT[i]) d2[i] += pis[k]; } } for(j=1; j<=n; j++){ lr[j]=0; for(i=1; i<=N; i++){ if (p[j]>=LC[i] && q[j]<=RC[i]) lr[j] += pis[j]/d1[i]; if (p[j]=LC[i] && q[j]<=RC[i]) dn += npis[j]; if (p[j]>=LT[i]) nu += npis[j]; } li=dn/nu; if (li=NPMLEem_MAX_ITER) printf("\n\nWARNING: No convergence after %d iterations!!\n\n",iter); /* printf("loglikelihood=%lf\n",Li); */ printf("\n\n************RESULT:(after %d iterations)\n\n",iter); if (!IsOneFile){ strcpy(OUTfname, ""); strcat(OUTfname, INfname); sprintf(OUTfname2,"NPMLERes%lf",Tolerence); strcat(OUTfname,OUTfname2); if ((fp=fopen(OUTfname, "w"))==NULL) perror("writing output data!"); } for(j=1; j<=n; j++){ if (IsOneFile) printf("[%lf, %lf]: %lf\n",p[j],q[j],npis[j]); else fprintf(fp,"%lf %lf %lf\n",p[j],q[j],npis[j]); } if (!IsOneFile) fclose(fp); /* getrusage(RUSAGE_SELF, &ru); if ((fp=fopen("ftb/speed4.1", "a"))==NULL) perror("writing output data!"); fprintf(fp, "%i %lf %lf \n",iter, Li, ru.ru_utime.tv_sec+ru.ru_utime.tv_usec/1000000.0+ ru.ru_stime.tv_sec+ru.ru_stime.tv_usec/1000000.0); fclose(fp); */ } main(argc, argv) int argc; char *argv[]; { char fname[100]; int i, start, end; if (argc==3) { start =atoi(argv[1]); end = atoi(argv[2]); for (i=start; i<=end; i++) { sprintf(fname,"g%d",i); turnbull(fname,0); } } else if (argc==2) turnbull(argv[1],0); else{ printf("format: npmle start_num end_num OR npmle datafile\n"); exit(1); } }