/*
#Iteratice Convex Minorant algorithm to compute NPMLE for left truncation
#AND interval censored data.
 
 modified from ~/prelim/data/wesdr/ftb4.c, and so
 [pi,qi]'s are reducted, as in Turnbull's paper (after Frydman's correction)!

 1) Log-likelihood increment is taken as convergence criterion, instead
    of max increament of probability mass.
 2) Double reals are used for improved precision.

Version 1: Fri Nov  8 10:23:27 CST 1996
 BY Wei Pan, Biostat Dept., UW-Madison

from Version 1, which is parameterized in cumulative hazards, in which
some premature early convergences happen. this may caused by inaccurate
quadratic approximation from dG() which ignores the non-diagonal elements
of the Hessian of the objective function (i.e. log-likelihood).
v1.5: no need for dG(), hence the steepest decscent alg.
      Mon Nov 11 10:45:35 CST 1996

copied from icm1.5.c with:
1) stronger convergence criterion: max(p(k+1)-p(k))<Tolerance;
Fri Jan 24 17:22:21 CST 1997

References:
1) Pan, W. and Chappell, R. (1998) ``Computation of the NPMLE of Distribution 
   Functions for Interval Censored and Truncated Data with Applications
   to the Cox Model".
   To appear in  {\em Computational Statistics and Data Analysis}.
2) Pan, W. and Chappell, R. (1998) ``Estimating Survival Curves with 
   Left-truncated and Interval-censored Data Under Monotone Hazards".
   To appear in {\em Biometrics}.
*/

#include <sys/time.h>
#include <sys/resource.h>
#include <stdio.h>
#include <math.h>
#include <stdlib.h>
#include <string.h>

#define MAX_N_Obs  10000

static int comp(f1,f2)
double *f1, *f2;
{
 if (*f1 < *f2) return(-1);
    else if (*f1 > *f2) return(1);
            else return(0);
}

/* L_infinite distance */
double Linf_dist(a,b,n)
double *a,*b;
int n;
{
 int i;
 double s1, s=0;

 for(i=1; i<=n; i++) {
    s1 = (s1=a[i]-b[i])>0? s1 : -s1;
    if (s < s1) s=s1;
   }
/*
    s += (a[i]-b[i])*(a[i]-b[i]);
 s = (float)sqrt((double)s);
*/
 return(s);
}

/* copied from mono mle1.2.c, Nov 8, 96
   make a Greatest Convex Minorant from
   interval hazards from output of N-C-P.

   March 14, 96
*/

gcm(q,nh,n)
double *q, *nh;
int n;
{
 int i,j,k,last;
 double  min_slope, slope;

 nh[0]=q[0]=0;
 for(i=2; i<=n; i++){
   nh[i] += nh[i-1];
   }
 i=0;
 while(i<n-1){
    min_slope=(nh[i+1]-nh[i])/(q[i+1]-q[i]);
    last=i+1;
    for(j=i+2; j<=n; j++){
       slope=(nh[j]-nh[i])/(q[j]-q[i]);
       if (slope<min_slope){
          /*
          printf("i=%d j=%d min_slope=%f    slope=%f\n",i,j,min_slope,slope);
          */
          min_slope=slope;
          last=j;
          }
       }

    for(k=i+1; k<last; k++)
       nh[k]=nh[i]+ (double)min_slope*(q[k]-q[i]);

    i=last;
    }

 for(i=n; i>1; i--){
   nh[i] -= nh[i-1];
   }
}

#define TB_EPSILON     1.0e-100
#define RIGHTCENSORING     9999
#define LARGENUM       1.0e+10
#define TB_MAX_ITER    5000 

void icm(INfname, IsOneFile)
char *INfname;
int IsOneFile;
{
char OUTfname[100],OUTfname2[100];
int i,j,k,n,N, iter, iter2;
int INDlt[MAX_N_Obs], INDlc[MAX_N_Obs], INDrc[MAX_N_Obs];
    /* indices of the corrsponding LT,LC,RC in ordered y[] */
double Tolerence=0.00001, M ;
double pTolerence=0.0001 ;
double LT[MAX_N_Obs], LC[MAX_N_Obs], RC[MAX_N_Obs],
      y[MAX_N_Obs], wt[MAX_N_Obs],
      dW[MAX_N_Obs], dG[MAX_N_Obs], dpis[MAX_N_Obs];
double *tp1=&(LT[1]), *tp2=&(LC[1]), *tp3=&(RC[1]);
double lr[2*MAX_N_Obs], d1[2*MAX_N_Obs], d2[2*MAX_N_Obs], 
       pis[2*MAX_N_Obs],npis[3*MAX_N_Obs];
double L0, Li, li, nu, dn, stepsize;
FILE *fp;
struct rusage ru; /* resource utilization, such as time */

fp=fopen(INfname, "r");
if (fp==NULL) perror("data file can't be opened!");

N=0;
while ((iter=fscanf(fp, "%lf%lf%lf",tp1++,tp2++,tp3++))>0){
  N++;
  if (LC[N]==RC[N]) RC[N] += TB_EPSILON;
  /* notice that tp1~3 point to LT,LC,RC */
  /* Ease the case for "double censoring" */
  /* following added on June 17, 96 */
/* deleted on Sun Nov 10 18:49:49 CST 1996
  if (LC[N]==LT[N]) LT[N] -= TB_EPSILON;
*/
  if (N>=MAX_N_Obs) perror("too large data set!");
 }
if (iter<0 && iter!=EOF) perror("reading data file!");
 
fclose(fp);

j=1;
for(i=1; i<=N; i++){
   lr[i]=LC[i];
   lr[i+N]=LT[i];
   lr[i+2*N]=RC[i];
  }

tp1=&(lr[1]);
qsort(tp1, N,sizeof(double), comp);
tp1=&(lr[1+N]);
qsort(tp1, 2*N,sizeof(double), comp);

n=0; i=1; j=N+1;
y[0]=0;
while (i<=N && j<=3*N ){
/*
&& lr[i]<RIGHTCENSORING && lr[j]<RIGHTCENSORING){
*/
  if (lr[i]>=lr[j]) j++;
     else if (i<N && lr[i+1]<lr[j]) i++; /* ==? */
	     else {
		   n=n+1;
/*
                   p[n]=lr[i]; q[n]=lr[j];
*/
                   y[n]=lr[j];
                   i++; j++;
                   /* added on Sun Nov 10 18:49:49 CST 1996 */
                   while (lr[j]== y[n] && j<=3*N) j++;
                  }
  }

/* added Sat Nov  9 18:49:58 CST 1996 */
/*
if (y[n]>=RIGHTCENSORING) n--;
*/

for(i=1; i<=N; i++){
 INDlt[i]=INDlc[i]=INDrc[i]=-1;
 for(j=1; j<=n; j++) {
   if (INDlt[i]==-1 && LT[i]<y[j] ) INDlt[i]=j-1;
   if (INDlc[i]==-1 && LC[i]<y[j] ) INDlc[i]=j-1;
   if (INDrc[i]==-1 && RC[i]<y[j] ) {
       INDrc[i]=j-1;
       break;
       }
   }
   if (INDrc[i]==-1) INDrc[i]=n;
if(INDlt[i]==-1 || INDlc[i]==-1 || INDrc[i]==-1)
   perror("Indices wrong!!!!"); 
 } 
/* initial cumulative hazards */
npis[0]=pis[0]=0;
for(j=0; j<=n; j++) {
  pis[j]=j*3.0/(n);
  wt[j]=j;
  }

L0=0;
for(i=1; i<=N; i++){
  nu= exp(-pis[INDlt[i]]);
  dn = exp(-pis[INDlc[i]]);
  if (RC[i]<RIGHTCENSORING) 
          dn -= exp(-pis[INDrc[i]]);
  li=dn/nu;
  if (li<TB_EPSILON) {
     printf("li from obs %d = %lf\n", i, li);
     li=TB_EPSILON;
     }
  L0 += log(li);
  }

iter=0;
while(iter<TB_MAX_ITER) {
iter=iter+1;

for(j=1; j<=n; j++){
 dW[j]=dG[j]=0;
 for(i=1; i<=N; i++){
   if (INDlt[i]==j) dW[j] += 1;
   if (RC[i]>=RIGHTCENSORING){
      if (INDlc[i]==j) dW[j] -= 1;
      }
   else {
         if (INDlc[i]==j ) {
             dW[j] -= exp(-pis[j])/(exp(-pis[j])-exp(-pis[INDrc[i]]));
             dG[j] += exp(-pis[j]-pis[INDrc[i]])/((exp(-pis[j])-exp(-pis[INDrc[i]]))*(exp(-pis[j])-exp(-pis[INDrc[i]])));
             }
         if (INDrc[i]==j ) {
             dW[j] += exp(-pis[j])/(exp(-pis[INDlc[i]])-exp(-pis[j]));
             dG[j] += exp(-pis[j]-pis[INDlc[i]])/((exp(-pis[j])-exp(-pis[INDlc[i]]))*(exp(-pis[j])-exp(-pis[INDlc[i]])));
             }
        } 
   }
 }

for(j=1; j<=n; j++) {
/*
  if (dG[j]>0) npis[j] = pis[j]+ dW[j]/dG[j];
  else npis[j] = npis[j] = pis[j]+ dW[j] ;
*/
  npis[j] = pis[j]+ dW[j] ;
  }
gcm(wt, npis, n);
for(j=1; j<=n; j++) dpis[j] = npis[j]-pis[j];
	
stepsize=1;
while (stepsize>1e-10){
for(j=1; j<=n; j++) {
  npis[j] = pis[j]+ stepsize*dpis[j];
  if (npis[j]<0) npis[j] = 0;
  }
npis[0]=0;

Li=0;
for(i=1; i<=N; i++){
  nu= exp(-npis[INDlt[i]]);
  dn = exp(-npis[INDlc[i]]);
  if (RC[i]<RIGHTCENSORING) 
          dn -= exp(-npis[INDrc[i]]);
  li=dn/nu;
  if (li<TB_EPSILON) li=TB_EPSILON;
  Li += log(li);
  }
/*
printf("Iteration=%d : DeltalogL=%lf Li=%lf  L0=%lf  \n", iter, Li-L0, Li,L0);
*/
if (Li>L0) break;
stepsize /= 2;
}
/*
for(j=1; j<=n; j++) printf(" %f ", npis[j]);
printf(" npis[n]= %lf \n", npis[n]);
*/

if (Li-L0<Tolerence && Linf_dist(pis,npis,n)<pTolerence) break;
   else 
       for(j=1; j<=n; j++)
        pis[j]=npis[j];
L0=Li;
}
if (Li>L0){
    for(j=1; j<=n; j++)
        pis[j]=npis[j];
    L0=Li;
    }

if (iter>=TB_MAX_ITER)
   printf("\n\nWARNING: No convergence after %d iterations!!\n\n",iter);

printf("\n\n************RESULT:(after %d iterations)\n\n",iter);
if (!IsOneFile){
strcpy(OUTfname, INfname);
sprintf(OUTfname2,"GPres%lf",Tolerence);
strcat(OUTfname,OUTfname2);
if ((fp=fopen(OUTfname, "w"))==NULL) perror("writing output data!");
}
y[0]=0; npis[0]=0; 
for(j=0; j<=n; j++){
  if (IsOneFile) printf(" %lf : %lf\n",y[j],npis[j]);
  else
  fprintf(fp," %lf %lf\n",y[j],exp(-pis[j]));
 }

if (!IsOneFile) fclose(fp);

getrusage(RUSAGE_SELF,  &ru);
/*
strcpy(OUTfname2,OUTfname);
strcat(OUTfname,".speed");
if ((fp=fopen(OUTfname, "a"))==NULL) perror("writing output data!");
*/
/*
if ((fp=fopen("ICM2/speed2.5", "a"))==NULL) perror("writing output data!");
 fprintf(fp, "%i   %lf   %lf \n",iter, L0,
  ru.ru_utime.tv_sec+ru.ru_utime.tv_usec/1000000.0+ 
  ru.ru_stime.tv_sec+ru.ru_stime.tv_usec/1000000.0);
fclose(fp);
*/
}

main(argc, argv)
int argc;
char *argv[];
{
 char fname[100];
 int i, start, end;
 if (argc==3) {
   start =atoi(argv[1]);
   end = atoi(argv[2]);
   for (i=start; i<=end; i++) {
      sprintf(fname,"g%d",i);
      icm(fname,0);
      }
   }
   else if (argc==2) icm(argv[1],0);
        else{
         printf("format: ntb2 start_num end_num   OR ntb2 datafile\n");
         exit(1);
        }
}