/* this does Bayesian inference for viral production model0 Some useful Splus functions when you do the metropolis algorithm are first read in data with > tst _ matrix(scan("tsout1.dat"),byrow=T,ncol=numpar) > metrop.plt1 function(SIMS, numpar, noChn, numIter) { for(i in 1:numpar) { plot(SIMS[1:numIter, i], ylim = c(min(SIMS[, i]), max(SIMS[, i])), type = "n") for(j in 1:noChn) lines(SIMS[c((j - 1) * numIter + 1):c(j * numIter), i ], col = j) } } > metrop.plt2 function(x, y, noChn, numIter) { plot(x[1:numIter], y[1:numIter], ylim = c(min(y), max(y)), xlim = c( min(x), max(x))) for(i in 2:noChn) points(x[c((i - 1) * numIter + 1):c(i * numIter)], y[c((i - 1 ) * numIter + 1):c(i * numIter)], col = i) } here is an S function to compute theta > getTheta function(b0, b1, ts, t1.2) { b0 * ((exp(b1 * min(t1.2, ts)) - 1)/b1 + ind(t1.2, ts) * exp(b1 * ts) * (t1.2 - ts)) } */ #include #pragma hdrstop #include #include #include #include #include "C:\WINDOWS\DESKTOP\matrices.txt" #include "C:\WINDOWS\DESKTOP\randNo.txt" #include "C:\WINDOWS\DESKTOP\sorting.txt" #include "C:\WINDOWS\DESKTOP\math.txt" #include "C:\WINDOWS\DESKTOP\minimize.txt" #pragma argsused #define FREE_ARG char* #define SHFT(a,b,c,d) (a)=(b);(b)=(c);(c)=(d); double minFcn(double *x); void getHes(double *initValu,double **obsInfo,double *err,double h, int numpar); void metrop(long setSeed,int noChn,int numIter,int numburn,int numpar, double **tryV,double **initValu,double ***simres); void initlz(double **initValu,int noPar,int noChn); void sqrtR(double *stat,int noChn,int numiter,int numpar,double ***tens); double factln(double n); double bico(double n,double k); double ind(double x,double y); double round(double x); double vpIntgrnd2(double x); double vpIntgrnd1(double x); double loglik(double b01,double b02,double b11,double b12,double ts1, double ts2,double phi1,double phi2,double rstHL,double actHL); double expB(double x); void checkConvrg(double *stat,double ***simres,int numiter, int numburn,int noChn); /* nGlob is number of cells (assumed same for both types), rstHL and actHL are the 2 halflifes, infty is big number where -infty is log(0) when likelihood is identically zero and trying to compute log-lik */ int nGlob=100; double infty=1.0e20,rstHL/*=4.0/*14.0*/,actHL/*=1.5*/,tGlob=12.0,lam1, lam2,K1,K2,b01Glob,b02Glob,b11Glob,b12Glob,ts1Glob,ts2Glob,phi1Glob, phi2Glob,xGlob,*yAGlob,*yRGlob; int main() { /* this file holds the resting counts */ ifstream filer1("c:\\Spluswin\\home\\rstCnt.dat"); if(!filer1){ cerr << "Failure to open filer1\n"; getch(); exit(EXIT_FAILURE); } /* this file holds the activated counts */ ifstream filer2("c:\\Spluswin\\home\\actCnt.dat"); if(!filer2){ cerr << "Failure to open filer2\n"; getch(); exit(EXIT_FAILURE); } /* this holds cov for jumping in metrop */ ifstream filer3("c:\\Spluswin\\home\\covVP.dat"); if(!filer3){ cerr << "Failure to open filer3\n"; getch(); exit(EXIT_FAILURE); } \ /* holds optimum or covaraince matrix at mode */ ofstream filew1("c:\\Spluswin\\home\\tsout1.dat"); if(!filew1){ cerr << "Failure to open filew1\n"; getch(); exit(EXIT_FAILURE); } /* holds posterior samples */ ofstream filew2("c:\\Spluswin\\home\\tsout2.dat"); if(!filew2){ cerr << "Failure to open filew2\n"; getch(); exit(EXIT_FAILURE); } /* use SIMANL=1 for simulated annealing, MAXX=1 for using powells method for minimization, GTVR=1 for finding an approximation to the Hessian, and SMPL=1 for running the metropolis algorithm */ /* numIter controls how many iterations per chain in metrop, noChn controls how many chains, numpar controls how many parameters */ int SIMANL=0,MAXX=0,GTVR=0,SMPL=1,numIter=5000, numburn=2500,noChn=3,numpar=10,iter,i,j,ii; long setSeed=-9907; double minVal,temp1,temp2,retVal,*dev,*optVal,*drvErr,*initVal,*conStat, **drctns,**obsInfo,**tryV,**initValu,***simres; yAGlob=dvector(1,nGlob); yRGlob=dvector(1,nGlob); initVal=dvector(1,numpar); tryV=dmatrix(1,numpar,1,numpar); filer1.seekg(0); for(i=1;i<=nGlob;i++){ filer1 >> yRGlob[i]; } filer2.seekg(0); for(i=1;i<=nGlob;i++){ filer2 >> yAGlob[i]; } /* posterior mode so far: note all par.s non-neg */ /* this is mode when rstHL=4 and tGlob=12.0 */ initVal[1]=log(126.633); initVal[2]=log(583.068); initVal[3]=log(0.116145); initVal[4]=log(0.168907); initVal[5]=log(6.24273); initVal[6]=log(10.8296); initVal[7]=log(0.193108); initVal[8]=log(0.173824); initVal[9]=log(4.01164); initVal[10]=log(2.03985); if(SIMANL==1){ double temp,yb,*dev,*y,*pb,**p; p=dmatrix(1,numpar+1,1,numpar); y=dvector(1,numpar+1); pb=dvector(1,numpar); dev=dvector(1,numpar); for(i=1;i<=numpar;i++) p[1][i]=initVal[i]; y[1]=minFcn(initVal); cout << "the values of the objective at the corners of the simplex:" << endl; cout << y[1] << " "; for(i=2;i<=numpar+1;i++){ for(j=1;j<=numpar;j++){ p[i][j]=p[1][j]; if(i==(j+1)) p[i][j]+=1.0; dev[j]=p[i][j]; } y[i]=minFcn(dev); cout << y[i] << " "; } cout << endl; yb=1.0e30; temp=0.0; for(i=1;i<=300;i++){ cout << "starting iteration " << i << " "; iter=500; temp*=0.9; amebsa(p,y,numpar,pb,&yb,1.0e-7,minFcn,&iter,temp); if(iter>0) break; cout << "minVal=" << yb << " "; cout << "b01=" << exp(pb[1]) << " " << "b02=" << exp(pb[2]) << " " << "b11=" << exp(pb[3]) << " " << "b12=" << exp(pb[4]) << " " << "ts1=" << exp(pb[5]) << " " << "ts2=" << exp(pb[6]) << " " << "phi1=" << exp(pb[7]) << " " << "phi2=" << exp(pb[8]) << " " << "rstHL=" << exp(pb[9]) << " " << "actHL=" << exp(pb[10]) << endl; } cout << endl << "iter=" << iter << "min value=" << yb << endl; cout << "b01=" << exp(pb[1]) << " " << "b02=" << exp(pb[2]) << " " << "b11=" << exp(pb[3]) << " " << "b12=" << exp(pb[4]) << " " << "ts1=" << exp(pb[5]) << " " << "ts2=" << exp(pb[6]) << " " << "phi1=" << exp(pb[7]) << " " << "phi2=" << exp(pb[8]) << " " << "rstHL=" << exp(pb[9]) << " " << "actHL=" << exp(pb[10]) << endl; for(i=1;i<=numpar;i++) filew1 << exp(pb[i]) << " "; free_dmatrix(p,1,numpar+1,1,numpar); free_dvector(y,1,numpar+1); free_dvector(pb,1,numpar); free_dvector(dev,1,numpar); } if(MAXX==1){ drctns=dmatrix(1,numpar,1,numpar); for(i=1;i<=numpar;i++){ for(j=1;j<=numpar;j++){ drctns[i][j]=0.0; if(i==j) drctns[i][j]=1.0; } } powell(initVal,drctns,numpar,1.0e-7,&iter,&retVal,minFcn); cout << "Number of iterations in powell=" << iter << endl; cout << "minimum value=" << retVal << endl; cout << "optimal value" << endl; cout << "b01=" << exp(initVal[1]) << " " << "b02=" << exp(initVal[2]) << " " << "b11=" << exp(initVal[3]) << " " << "b12=" << exp(initVal[4]) << " " << "ts1=" << exp(initVal[5]) << " " << "ts2=" << exp(initVal[6]) << " " << "phi1=" << exp(initVal[7]) << " " << "phi2=" << exp(initVal[8]) << " " << "rstHL=" << exp(initVal[9]) << " " << "actHL=" << exp(initVal[10]) << endl; for(i=1;i<=numpar;i++) filew1 << exp(initVal[i]) << " "; free_dmatrix(drctns,1,numpar,1,numpar); } if(GTVR==1){ drvErr=dvector(1,numpar*numpar); obsInfo=dmatrix(1,numpar,1,numpar); getHes(initVal,obsInfo,drvErr,0.01,numpar); cout << endl << "got obsInfo-the errors in the hessian estimates are" << endl; for(i=1;i<=numpar*(numpar+1)/2;i++) cout << drvErr[i] << " "; cout << endl; /* obsInfo is really neg of obs Info-so now invert this-note that this does an extran chol decomp so a little redundant */ invert(obsInfo,tryV,numpar); cout << "scaled variance matrix of MLE's" << endl; for(i=1;i<=numpar;i++){ for(j=1;j<=numpar;j++){ cout << tryV[i][j] << " "; filew1 << tryV[i][j] << " "; } cout << endl; } free_dvector(drvErr,1,numpar*numpar); free_dmatrix(obsInfo,1,numpar,1,numpar); } if(SMPL==1){ conStat=dvector(1,1); simres=d3tensor(1,noChn,1,numIter-numburn,1,numpar); initValu=dmatrix(1,numpar,1,noChn); /* use this to read covariance in from external file */ filer3.seekg(0); for(i=1;i<=numpar;i++){ for(j=1;j<=numpar;j++){ filer3 >> tryV[i][j]; } } /* initial values moved to the end of the program */ initlz(initValu,numpar,noChn); /* scale cov matrix */ for(i=1;i<=numpar;i++){ for(j=1;j<=numpar;j++){ tryV[i][j]*=0.8; } } /* do the metropolis step */ metrop(setSeed,noChn,numIter,numburn,numpar,tryV,initValu,simres); if(noChn>1){ checkConvrg(conStat,simres,numIter,numburn,noChn); cout << endl << "sqrt R for ratio=" << conStat[1] << endl; } for(ii=1;ii<=noChn;ii++){ for(i=1;i<=numIter-numburn;i++){ for(j=1;j<=numpar;j++) filew2 << simres[ii][i][j] << " "; } } free_dvector(conStat,1,1); free_d3tensor(simres,1,noChn,1,numIter-numburn,1,numpar); free_dmatrix(initValu,1,numpar,1,noChn); } cout << "press any key to finish"; getch(); free_dvector(yRGlob,1,nGlob); free_dvector(yAGlob,1,nGlob); free_dvector(initVal,1,numpar); free_dmatrix(tryV,1,numpar,1,numpar); } double minFcn(double *x) { return -loglik(exp(x[1]),exp(x[2]),exp(x[3]),exp(x[4]), exp(x[5]),exp(x[6]),exp(x[7]),exp(x[8]),exp(x[9]),exp(x[10])); } void metrop(long setSeed,int noChn,int numIter,int numburn,int numpar, double **tryV,double **initValu,double ***simres) { /* does the metropolis algorithm-lots of parameters on log scale-simres holds sim. results in form chain by iteration by parameter, tryV is the varaince matrix for the normal jumping kernel, and initValu is matrix of initial values for the chain this finds convergence diagnostic stat and reports it-stops if converrgence taken place */ int ii,jj,i,j,k; double RSTOP=1.1,sum,*minVal,*accept,*temp1,*temp2,*Rstat, *pp,*mm,*canDev,**dev,**optDev; minVal=dvector(1,noChn); accept=dvector(1,noChn); temp1=dvector(1,noChn); temp2=dvector(1,noChn); Rstat=dvector(1,numpar); pp=dvector(1,numpar); mm=dvector(1,numpar); canDev=dvector(1,numpar); dev=dmatrix(1,numpar,1,noChn); optDev=dmatrix(1,numpar,1,noChn); choldc(tryV,numpar,pp); for(ii=1;ii<=noChn;ii++){ accept[ii]=0.0; for(i=1;i<=numpar;i++){ dev[i][ii]=initValu[i][ii]; optDev[i][ii]=initValu[i][ii]; canDev[i]=initValu[i][ii]; } temp1[ii]=minFcn(canDev); minVal[ii]=temp1[ii]; } for(i=1;i<=numIter;i++){ if(i%100==0) cout << "starting iteration number " << i << endl; for(ii=1;ii<=noChn;ii++){ for(j=1;j<=numpar;j++) mm[j]=gasdev(&setSeed); for(j=1;j<=numpar;j++){ sum=0.0; for(k=1;k numburn){ for(j=1;j<=numpar;j++){ simres[ii][i-numburn][j]=dev[j][ii]; } } } if(i%100==0 && i > numburn && noChn > 1){ sqrtR(Rstat,noChn,i-numburn,numpar,simres); cout << "convergence diagnostic statistics" << endl; jj=0.0; for(j=1;j<=numpar;j++){ cout << Rstat[j] << " "; } cout << endl; } } for(ii=1;ii<=noChn;ii++){ cout << "for chain " << ii; cout << " the acceptance rate=" << accept[ii]/numIter << " "; cout << " the minimum neg loglik=" << minVal[ii] << endl; cout << "the parameter value for this is" << endl; cout << "b01=" << exp(optDev[1][ii]) << " " << "b02=" << exp(optDev[2][ii]) << " " << "b11=" << exp(optDev[3][ii]) << " " << "b12=" << exp(optDev[4][ii]) << " " << "ts1=" << exp(optDev[5][ii]) << " " << "ts2=" << exp(optDev[6][ii]) << " " << "phi1=" << exp(optDev[7][ii]) << " " << "phi2=" << exp(optDev[8][ii]) << " " << "rstHL=" << exp(optDev[9][ii]) << " " << "actHL=" << exp(optDev[10][ii]) << endl; } free_dvector(minVal,1,noChn); free_dvector(accept,1,noChn); free_dvector(temp1,1,noChn); free_dvector(temp2,1,noChn); free_dvector(Rstat,1,numpar); free_dvector(mm,1,numpar); free_dvector(canDev,1,numpar); free_dvector(pp,1,numpar); free_dmatrix(dev,1,numpar,1,noChn); free_dmatrix(optDev,1,numpar,1,noChn); } void checkConvrg(double *stat,double ***simres,int numiter, int numburn,int noChn) { int ii,j; double th1,th2,***ratioRes; ratioRes=d3tensor(1,noChn,1,numiter-numburn,1,1); for(ii=1;ii<=noChn;ii++){ for(j=1;j<=numiter-numburn;j++){ th1=exp(simres[ii][j][1])*((exp(exp(simres[ii][j][3])*min(exp(simres[ii][j][9]), exp(simres[ii][j][5])))-1.0)/exp(simres[ii][j][3])+ind(exp(simres[ii][j][9]), exp(simres[ii][j][5]))*exp(exp(simres[ii][j][3])*exp(simres[ii][j][5]))* (exp(simres[ii][j][9])-exp(simres[ii][j][5]))); th2=exp(simres[ii][j][2])*((exp(exp(simres[ii][j][4])*min(exp(simres[ii][j][10]), exp(simres[ii][j][6])))-1.0)/exp(simres[ii][j][4])+ind(exp(simres[ii][j][10]), exp(simres[ii][j][6]))*exp(exp(simres[ii][j][4])*exp(simres[ii][j][6]))* (exp(simres[ii][j][10])-exp(simres[ii][j][6]))); ratioRes[ii][j][1]=log(th2/th1); } } sqrtR(stat,noChn,numiter-numburn,1,ratioRes); free_d3tensor(ratioRes,1,noChn,1,numiter-numburn,1,1); } double loglik(double b01,double b02,double b11,double b12, double ts1,double ts2,double phi1,double phi2,double rstHLloc, double actHLloc) { /* this evaluates likelihood for log-normal measurement error model */ int i,idzero=0; double ll=0.0,intgrl; b01Glob=b01; b02Glob=b02; b11Glob=b11; b12Glob=b12; ts1Glob=ts1; ts2Glob=ts2; phi1Glob=phi1; phi2Glob=phi2; rstHL=rstHLloc; actHL=actHLloc; lam1=rstHL/0.6931472; K1=1.0/(lam1*(1-exp(-tGlob/lam1))-tGlob*exp(-tGlob/lam1)); lam2=actHL/0.6931472; K2=1.0/(lam2*(1-exp(-tGlob/lam2))-tGlob*exp(-tGlob/lam2)); for(i=1;i<=nGlob;i++){ xGlob=yRGlob[i]; intgrl=qgaus(vpIntgrnd1,0.0,tGlob); if(intgrl <= 0.0){ idzero=1; break; } else ll+=log(intgrl); } if(idzero==0){ for(i=1;i<=nGlob;i++){ xGlob=yAGlob[i]; intgrl=qgaus(vpIntgrnd2,0.0,tGlob); if(intgrl <= 0.0){ idzero=1; break; } else ll+=log(intgrl); } } /* note use of informative priors here */ if(idzero==0) return ll-0.5*DSQR((log(actHL)-0.41)/0.1)- 0.5*DSQR((log(rstHL)-1.39)/0.1)-0.5*DSQR((log(ts1)-log(10.0))/3.0)- 0.5*DSQR((log(ts2)-log(10.0))/3.0); else return -infty; } double vpIntgrnd1(double x) { int i; double yy,pi,piNum,res; yy=b01Glob*((expB(b11Glob*min(x,ts1Glob))-1.0)/b11Glob+ expB(b11Glob*ts1Glob)*(x-ts1Glob)*ind(x,ts1Glob)); /* should round yy first */ yy=round(yy); if(yy309.0) x=309.0; return exp(x); } double vpIntgrnd2(double x) { int i; double yy,pi,piNum,res; yy=b02Glob*((expB(b12Glob*min(x,ts2Glob))-1.0)/b12Glob+ expB(b12Glob*ts2Glob)*(x-ts2Glob)*ind(x,ts2Glob)); /* should round yy first */ yy=round(yy); if(yy= 0.5) return floor(x)+1.0; else return floor(x); } double ind(double x,double y) { double res; if(x>y) res=1.0; else res=0.0; return res; } double bico(double n,double k) { double x; x=factln(n)-factln(k)-factln(n-k); if(x > 700.0) return floor(0.5+exp(700.0)); else return floor(0.5+exp(x)); } double factln(double n) { static double a[101]; if(n<0.0) nrerror("negative factorial in factln"); if(n <= 1.0) return 0.0; if(n <= 100.0) return a[int(n)] ? a[int(n)] : (a[int(n)]=loggam(n+1.0)); else return loggam(n+1.0); } void getHes(double *initValu,double **obsInfo,double *err,double h, int numpar) { /* here we find the negative Hessian of the log lik */ int NTABd=10,i,j,k,l,m; double ERRMAX=1.0e20,CON=1.4,CON2=1.4*1.4,BIG=1.0e30,SAFE=2.0,errt, fac,hh,ans,temp1,temp2,temp3,temp4,**a; a=dmatrix(1,NTABd,1,NTABd); m=1; for(i=1;i<=numpar;i++){ for(j=i;j<=numpar;j++){ hh=h; initValu[i] -= hh; initValu[j] -= hh; temp4= -minFcn(initValu); initValu[i] += 2.0*hh; temp2= -minFcn(initValu); initValu[j] += 2.0*hh; temp1= -minFcn(initValu); initValu[i] -= 2.0*hh; temp3= -minFcn(initValu); initValu[i] += hh; initValu[j] -= hh; a[1][1]=(temp1-temp2-temp3+temp4)/(4.0*hh*hh); err[m]=BIG; for(k=2;k<=NTABd;k++){ hh /= CON; initValu[i] -= hh; initValu[j] -= hh; temp4= -minFcn(initValu); initValu[i] += 2.0*hh; temp2= -minFcn(initValu); initValu[j] += 2.0*hh; temp1= -minFcn(initValu); initValu[i] -= 2.0*hh; temp3= -minFcn(initValu); initValu[i] += hh; initValu[j] -= hh; a[1][k]=(temp1-temp2-temp3+temp4)/(4.0*hh*hh); fac=CON2; for(l=2;l<=k;l++){ a[l][k]=(a[l-1][k]*fac-a[l-1][k-1])/(fac-1.0); fac=CON2*fac; errt=DMAX(fabs(a[l][k]-a[l-1][k]), fabs(a[l][k]-a[l-1][k-1])); if(errt <= err[m]){ err[m]=errt; ans=a[l][k]; } } if(fabs(a[k][k]-a[k-1][k-1]) >= SAFE*(err[m])) break; } if(err[m] > ERRMAX){ cout << "error on element " << m << " and error is " << err[m] << endl; nrerror("error too big in getHes"); } obsInfo[i][j]= -ans; obsInfo[j][i]= obsInfo[i][j]; m+=1; } } free_dmatrix(a,1,NTABd,1,NTABd); } void sqrtR(double *stat,int noChn,int numiter,int numpar,double ***tens) { /* this calculates Gelman-Rubin sqrtR for assessing convergence for MCMC using overdispersed starting points. */ double mean(double *,int); double var(double *,int); int i,j,k; double *s,*m,*temp,W,inter,BonN; temp=dvector(1,numiter); s=dvector(1,noChn); m=dvector(1,noChn); for(i=1;i<=numpar;i++){ for(j=1;j<=noChn;j++){ for(k=1;k<=numiter;k++) temp[k]=tens[j][k][i]; m[j]=mean(temp,numiter); s[j]=var(temp,numiter); } W=mean(s,noChn); BonN=var(m,noChn); inter=W*(numiter-1.0)/numiter+BonN; if(inter<0.0 || W<=0.0) stat[i]=100.0; else stat[i]=sqrt((inter)/W); } free_dvector(temp,1,numiter); free_dvector(s,1,noChn); free_dvector(m,1,noChn); } double mean(double *data, int n) { int j; double ave=0.0; for(j=1;j<=n;j++) ave += data[j]; ave /= n; return ave; } double var(double *data, int n) { double ave,dev,variance; double s=0.0; int j; ave=mean(data,n); for(j=1;j<=n;j++){ dev = data[j]-ave; s += dev*dev; } variance=s/(n-1.0); return variance; } void initlz(double **initValu,int noPar,int noChn) { /* starting values-these on unbounded scale */ int i,j; ifstream filer("c:\\Spluswin\\home\\initValu.dat"); if(!filer){ cerr << "Failure to open filer\n"; getch(); exit(EXIT_FAILURE); } for(i=1;i<=noChn;i++){ for(j=1;j<=noPar;j++){ filer >> initValu[j][i]; } } }