/* Iterative Nelson's Estimator (INE) based on Nelson's estimator (for right-censored data) and Turnbull's self-consistency algorithm (to compute NPMLE) for truncated AND interval censored data. Data file size < 10000 entries. (But you may modify MAX_N_Obs to increase it.) by Wei Pan, Div. of Biostatistics, University of Minnesota (weip@biostat.umn.edu) see npmle.c for Input/Output data format. Reference: 1) Pan, W. and Chappell, R. (1998) ``A Nonparametric Estimator of Survival Functions for Arbitrarily Truncated and Censored Data". Lifetime Data Analysis, Vol. 4, 187-202. */ #include #include #include #include #define MAX_N_Obs 10000 static int comp(f1,f2) double *f1, *f2; { if (*f1 < *f2) return(-1); else if (*f1 > *f2) return(1); else return(0); } /* L_infinite distance */ double Linf_dist(a,b,n) double *a,*b; int n; { int i; double s1, s=0; for(i=1; i<=n; i++) { s1 = (s1=a[i]-b[i])>0? s1 : -s1; if (s < s1) s=s1; } /* s += (a[i]-b[i])*(a[i]-b[i]); s = (float)sqrt((double)s); */ return(s); } #define TB_EPSILON 0.000001 #define TB_MAX_ITER 3000 void nelson(INfname, IsOneFile) char *INfname; int IsOneFile; { char OUTfname[100]; int i,j,k,n,N, iter; double Tolerence=0.00001, M , S0, S1; double LT[MAX_N_Obs], LC[MAX_N_Obs], RC[MAX_N_Obs], lr[2*MAX_N_Obs], d1[2*MAX_N_Obs], d2[2*MAX_N_Obs], p[2*MAX_N_Obs],q[2*MAX_N_Obs],pis[2*MAX_N_Obs],npis[3*MAX_N_Obs]; double *tp1=&(LT[1]), *tp2=&(LC[1]), *tp3=&(RC[1]); FILE *fp; fp=fopen(INfname, "r"); if (fp==NULL) perror("data file can't be opened!"); N=0; while ((iter=fscanf(fp, "%lf%lf%lf",tp1++,tp2++,tp3++))>0){ N++; if (LC[N]==RC[N]) RC[N] += TB_EPSILON; /* notice that tp1~3 point to LT,LC,RC */ /* Ease the case for "double censoring" */ /* following added on June 17, 96 */ if (LC[N]==LT[N]) LT[N] -= TB_EPSILON; if (N>=MAX_N_Obs) perror("too large data set!"); } if (iter<0 && iter!=EOF) perror("reading data file!"); fclose(fp); j=1; for(i=1; i<=N; i++){ lr[i]=LC[i]; lr[i+N]=LT[i]; lr[i+2*N]=RC[i]; } tp1=&(lr[1]); qsort(tp1, N,sizeof(double), comp); tp1=&(lr[1+N]); qsort(tp1, 2*N,sizeof(double), comp); n=0; i=1; j=N+1; while (i<=N && j<=3*N){ if (lr[i]>=lr[j]) j++; else if (i=LC[i] && q[k]<=RC[i]) d1[i] += pis[k]; if (p[k]>=LT[i]) d2[i] += pis[k]; } } for(j=1; j<=n; j++){ lr[j]=0; for(i=1; i<=N; i++){ if (p[j]>=LC[i] && q[j]<=RC[i]) lr[j] += pis[j]/d1[i]; if (p[j]=TB_MAX_ITER) printf("\n\nWARNING: No convergence after %d iterations!!\n\n",iter); printf("\n\n************RESULT:(after %d iterations)\n\n",iter); if (!IsOneFile){ strcpy(OUTfname, INfname); strcat(OUTfname,".INEres"); if ((fp=fopen(OUTfname, "w"))==NULL) perror("writing output data!"); } for(j=1; j<=n; j++){ if (IsOneFile) printf("[%lf, %lf]: %lf\n",p[j],q[j],npis[j]); fprintf(fp,"%lf %lf %lf\n",p[j],q[j],npis[j]); } if (!IsOneFile) fclose(fp); } main(argc, argv) int argc; char *argv[]; { char fname[100]; int i, start, end; if (argc==3) { start =atoi(argv[1]); end = atoi(argv[2]); for (i=start; i<=end; i++) { sprintf(fname,"g%d",i); nelson(fname,0); } } else if (argc==2) nelson(argv[1],0); else{ printf("format: nsn start_num end_num OR nsn datafile\n"); exit(1); } }