/* This uses the Metropolis algorithm to conduct inference for a negative binomially distributed set of measurements with fixed effects reflecting the taxonomic structure that has been observed in a metagenomics investigation. The model includes hierarchical smoothing of dispersions. To use you give the name of the executable followed by the filename of the file with the counts, then the filename with the taxonomic information, then the filename with the covariance matrix, then a 1 if you want all the MCMC output written to file, then the number of samples, the number of burnin samples and the thinning factor. */ #include #include #include #include #include #include using namespace std; //#pragma hdrstop #define FREE_ARG char* static double dsqrarg; #define DSQR(a) ((dsqrarg=(a)) == 0.0 ? 0.0 : dsqrarg*dsqrarg) /* function declarations */ double minFcn(double *x); void metrop(long setSeed,int lookInt,int noChn,int numIter,int numBurn, int thinfac,int numpar,double **tryV,double **initValu,double ***simres); void sqrtR(double *stat,int noChn,int numiter,int numpar,double ***tens); double mean(double *data,int n); double var(double *data,int n); double negbindev(double mu, double eta, long *setSeed); void getEstPval(int lookInt,int noChn,int numIter,int numBurn,int thinfac,double ***simres,int *gInd,int *fInd,int *oInd, int *cInd, double **tab1,double **tab2,double **tab3,double **tab4, double *grp1p,double *grp1c,double *grp1o,double *grp1f,double *grp1g); /* following taken from Numerical Recipes in C */ int *ivector(long nl,long nh); void free_ivector(int *v, long nl, long nh); double *dvector(long nl,long nh); void free_dvector(double *v,long nl,long nh); double **dmatrix(long nrl,long nrh,long ncl,long nch); void free_dmatrix(double **m, long nrl,long nrh,long ncl,long nch); double ***d3tensor(long nrl,long nrh,long ncl,long nch,long ndl, long ndh); void free_d3tensor(double ***t,long nrl,long nrh,long ncl,long nch, long ndl,long ndh); double ran1(long *idum); double gasdev(long *idum); void choldc(double **a,int n,double p[]); double loggam(double n); void nrerror(const char error_text[]); int nsub=1,ngrp=2,nphy=1,ntotf=1,ntoto=1,ntotc=1,ncre=1,nore=1,nfre=1,ngre=1,ngen=1,*disGlob; int noPar=ngrp*ngen+ngen+2; double **cntGlob; int main(int argc,char *argv[]) { /* this file holds the count data */ ifstream filer1(argv[1]); if(!filer1){ cerr << "Failure to open filer1\n"; exit(EXIT_FAILURE); } /* this file holds the taxonomic information */ ifstream filer2(argv[2]); if(!filer2){ cerr << "Failure to open filer2\n"; exit(EXIT_FAILURE); } /* this file holds the covariance matrix */ ifstream filer3(argv[3]); if(!filer3){ cerr << "Failure to open filer3\n"; exit(EXIT_FAILURE); } /* samples from the posterior */ ofstream filew1("postSmpl.txt"); if(!filew1){ cerr << "Failure to open filew1\n"; exit(EXIT_FAILURE); } /* samples of estimable effects from the posterior */ ofstream filew2("extEfftsPostSmpl.txt"); if(!filew2){ cerr << "Failure to open filew2\n"; exit(EXIT_FAILURE); } /* numIter is numiter per chain in metrop, numBurn is number of burn in iterations, noChn is number of chains to use in MCMC, thinfac inicates how much thinning of chain output-use 1 to have none, and lookInt indicates how frequently convergence diagnostic statistics should be computed */ int i,j,k,l,allSimOut=0,numIter=10,numBurn=5,noChn=1,lookInt=100,thinfac=1; allSimOut=atoi(argv[4]); numIter=atoi(argv[5]); numBurn=atoi(argv[6]); thinfac=atoi(argv[7]); if((numIter-numBurn)%thinfac!=0) nrerror("non-allowed value for the thinning factor"); /* these are the total number of taxa at each level */ int n1,n2,*phNum,*clNum,*orNum,*faNum,*geNum,*clInd,*orInd,*faInd,*geInd; long setSeed=-8623; double mn,vv,*grp1p,*grp1c,*grp1o,*grp1f,*grp1g,**pcTab,**coTab,**ofTab,**fgTab,**pcMat,**coMat, **ofMat,**fgMat,**cntDataUnsort,**tryV,**initValu,***simres; /* get the number of lines in file 1 */ int numLines=0; std::string line; while(std::getline(filer1,line)) ++numLines; filer1.clear(); filer1.seekg(0, std::ios::beg); nsub=numLines; /* get the number of lines in file 2 */ numLines=0; while(std::getline(filer2,line)) ++numLines; filer2.clear(); filer2.seekg(0, std::ios::beg); ngen=numLines; noPar=ngrp*ngen+ngen+2; disGlob=ivector(1,nsub); cntDataUnsort=dmatrix(1,nsub,1,ngen); cntGlob=dmatrix(1,nsub,1,ngen); phNum=ivector(1,ngen); clNum=ivector(1,ngen); orNum=ivector(1,ngen); faNum=ivector(1,ngen); geNum=ivector(1,ngen); initValu=dmatrix(1,noPar,1,noChn); tryV=dmatrix(1,noPar,1,noPar); simres=d3tensor(1,noChn,1,(numIter-numBurn)/thinfac,1,noPar); /* read in phenotypic and count data */ filer1.seekg(0); for(i=1;i<=nsub;i++){ for(j=1;j<=(ngen+1);j++){ if(j==1) filer1 >> disGlob[i]; else filer1 >> cntDataUnsort[i][j-1]; } } n1=0; n2=0; for(i=1;i<=nsub;i++){ if(disGlob[i]==1) n1+=1; if(disGlob[i]==2) n2+=1; } if(n1==0 | n2==0) nrerror("no data for at least one group: check phenotype formatting"); const int length=300; char buffer[length]; std::string geStrng[ngen],faStrng[ngen],orStrng[ngen],clStrng[ngen],phStrng[ngen]; /* read in taxonomic information */ filer2.seekg(0); for(i=0;i<=(ngen-1);i++){ filer2.getline(buffer,length,'\n'); char *pch; pch=strtok(buffer,"\t"); geStrng[i]=pch; pch=strtok(NULL,"\t"); faStrng[i]=pch; pch=strtok(NULL,"\t"); orStrng[i]=pch; pch=strtok(NULL,"\t"); clStrng[i]=pch; pch=strtok(NULL,"\t"); phStrng[i]=pch; } /* set up taxa sets and get total number of taxa at all levels */ std::set genSet; for(i=0;i<=ngen-1;i++) genSet.insert(geStrng[i]); cout << "Number of distinct genera=" << genSet.size() << endl; if(genSet.size()!=ngen) nrerror("number of distinct genera less than the number of genera"); std::set famSet; for(i=0;i<=ngen-1;i++) famSet.insert(faStrng[i]); cout << "Number of distinct families=" << famSet.size() << endl; ntotf=famSet.size(); std::set ordSet; for(i=0;i<=ngen-1;i++) ordSet.insert(orStrng[i]); cout << "Number of distinct orders=" << ordSet.size() << endl; ntoto=ordSet.size(); std::set claSet; for(i=0;i<=ngen-1;i++) claSet.insert(clStrng[i]); cout << "Number of distinct classes=" << claSet.size() << endl; ntotc=claSet.size(); std::set phySet; for(i=0;i<=ngen-1;i++) phySet.insert(phStrng[i]); cout << "Number of distinct phyla=" << phySet.size() << endl; nphy=phySet.size(); /* now create objects that require knowledge of these sizes */ pcTab=dmatrix(1,nphy,1,ntotc); coTab=dmatrix(1,ntotc,1,ntoto); ofTab=dmatrix(1,ntoto,1,ntotf); fgTab=dmatrix(1,ntotf,1,ngen); pcMat=dmatrix(1,nphy,1,ntotc); coMat=dmatrix(1,ntotc,1,ntoto); ofMat=dmatrix(1,ntoto,1,ntotf); fgMat=dmatrix(1,ntotf,1,ngen); clInd=ivector(1,ntotc); orInd=ivector(1,ntoto); faInd=ivector(1,ntotf); geInd=ivector(1,ngen); /* map taxa names to integers to simplify computing tables */ std::set::iterator it1; std::set::iterator it2; for(i=0;i<=(ngen-1);i++){ it1=phySet.find(phStrng[i]); k=1; for(it2=phySet.begin(); it2!=phySet.end();it2++){ if(*it2==*it1){ phNum[i+1]=k; break; } k+=1; } it1=claSet.find(clStrng[i]); k=1; for(it2=claSet.begin(); it2!=claSet.end();it2++){ if(*it2==*it1){ clNum[i+1]=k; break; } k+=1; } it1=ordSet.find(orStrng[i]); k=1; for(it2=ordSet.begin(); it2!=ordSet.end();it2++){ if(*it2==*it1){ orNum[i+1]=k; break; } k+=1; } it1=famSet.find(faStrng[i]); k=1; for(it2=famSet.begin(); it2!=famSet.end();it2++){ if(*it2==*it1){ faNum[i+1]=k; break; } k+=1; } it1=genSet.find(geStrng[i]); k=1; for(it2=genSet.begin(); it2!=genSet.end();it2++){ if(*it2==*it1){ geNum[i+1]=k; break; } k+=1; } } /* now sort the count data to be consistent with the genus order */ for(i=1;i<=nsub;i++){ for(j=1;j<=ngen;j++){ for(k=1;k<=ngen;k++){ if(geNum[k]==j) break; } cntGlob[i][j]=cntDataUnsort[i][k]; } } /* create tables for getting identified taxa */ for(i=1;i<=ntotf;i++){ for(j=1;j<=ngen;j++){ fgTab[i][j]=0.0; if(i<=ntoto & j<=ntotf) ofTab[i][j]=0.0; if(i<=ntotc & j<=ntoto) coTab[i][j]=0.0; if(i<=nphy & j<=ntotc) pcTab[i][j]=0.0; } } for(i=1;i<=ngen;i++){ if(pcTab[phNum[i]][clNum[i]]==0.0) pcTab[phNum[i]][clNum[i]]=1.0; if(coTab[clNum[i]][orNum[i]]==0.0) coTab[clNum[i]][orNum[i]]=1.0; if(ofTab[orNum[i]][faNum[i]]==0.0) ofTab[orNum[i]][faNum[i]]=1.0; if(fgTab[faNum[i]][geNum[i]]==0.0) fgTab[faNum[i]][geNum[i]]=1.0; } /* use these tables to determine the estimable parameters */ /* set up vectors of length total number of taxa that have 1 if that taxon is identifiable */ for(j=1;j<=ntotc;j++) clInd[j]=0; for(j=1;j<=ntoto;j++) orInd[j]=0; for(j=1;j<=ntotf;j++) faInd[j]=0; for(j=1;j<=ngen;j++) geInd[j]=0; ncre=0; for(i=1;i<=nphy;i++){ k=0; for(j=1;j<=ntotc;j++){ if(pcTab[i][j]>0.0) k+=1; } if(k>1){ l=1; for(j=1;j<=ntotc;j++){ if(pcTab[i][j]>0.0){ pcMat[i][l]=j; l+=1; clInd[j]=1; ncre+=1; } } } } nore=0; for(i=1;i<=ntotc;i++){ k=0; for(j=1;j<=ntoto;j++){ if(coTab[i][j]>0.0) k+=1; } if(k>1){ l=1; for(j=1;j<=ntoto;j++){ if(coTab[i][j]>0.0){ coMat[i][l]=j; l+=1; orInd[j]=1; nore+=1; } } } } nfre=0; for(i=1;i<=ntoto;i++){ k=0; for(j=1;j<=ntotf;j++){ if(ofTab[i][j]>0.0) k+=1; } if(k>1){ l=1; for(j=1;j<=ntotf;j++){ if(ofTab[i][j]>0.0){ ofMat[i][l]=j; l+=1; faInd[j]=1; nfre+=1; } } } } ngre=0; for(i=1;i<=ntotf;i++){ k=0; for(j=1;j<=ngen;j++){ if(fgTab[i][j]>0.0) k+=1; } if(k>1){ l=1; for(j=1;j<=ngen;j++){ if(fgTab[i][j]>0.0){ fgMat[i][l]=j; l+=1; geInd[j]=1; ngre+=1; } } } } cout << "Number of estimable genus level effects=" << ngre << endl; cout << "Number of estimable family level effects=" << nfre << endl; cout << "Number of estimable order level effects=" << nore << endl; cout << "Number of estimable class level effects=" << ncre << endl; /* now get initial values-these should all be on the log scale */ for(i=1;i<=ngen;i++){ mn=1.0; for(j=1;j<=n1;j++) mn+=cntGlob[j][i]; initValu[i][1]=log(mn/(n1+1.0)); mn=1.0; for(j=(n1+1);j<=(n1+n2);j++) mn+=cntGlob[j][i]; initValu[i+ngen][1]=log(mn/(n2+1.0)); mn=((n1+1.0)*exp(initValu[i][1])-(n2+1.0)*exp(initValu[i+ngen][1]))/(n1+n2+2.0); vv=0.0; for(j=1;j<=(n1+n2);j++) vv+=DSQR(cntGlob[j][i]-mn); vv/=(n1+n2); /* note that we expect overdispersion but don't want the program to fail if that is not the case, so just use 1 which is large for metagenomic data sets */ if(mn/(vv/mn-1.0)>0) initValu[i+2*ngen][1]=log(mn/(vv/mn-1.0)); else initValu[i+2*ngen][1]=log(1.0); } mn=1.0e-5; for(i=1;i<=ngen;i++) mn+=exp(initValu[i+2*ngen][1]); mn/=ngen; vv=0.0; for(i=1;i<=ngen;i++) vv+=DSQR(exp(initValu[i+2*ngen][1])-mn); vv/=ngen; initValu[3*ngen+1][1]=log(DSQR(mn)/vv); initValu[3*ngen+2][1]=log(mn/vv); /* read in a cov matrix and rescale */ filer3.seekg(0); for(i=1;i<=noPar;i++){ for(j=1;j<=noPar;j++){ filer3 >> tryV[i][j]; tryV[i][j]*=(2.4/sqrt(noPar)); } } /* run the Metropolis algorithm and write results to file if desired */ metrop(setSeed,lookInt,noChn,numIter,numBurn,thinfac,noPar,tryV,initValu, simres); /* write to file */ if(allSimOut==1){ for(i=1;i<=noChn;i++){ for(j=1;j<=(numIter-numBurn)/thinfac;j++){ for(k=1;k<=noPar;k++) filew1 << simres[i][j][k] << " "; filew1 << endl; } } } /* now get summaries and tests at level of estimable effects */ grp1p=dvector(1,nphy); grp1c=dvector(1,ncre); grp1o=dvector(1,nore); grp1f=dvector(1,nfre); grp1g=dvector(1,ngre); getEstPval(lookInt,noChn,numIter,numBurn,thinfac,simres,geInd,faInd,orInd,clInd,fgTab,ofTab,coTab,pcTab,grp1p,grp1c,grp1o,grp1f,grp1g); j=1; for(it2=phySet.begin(); it2!=phySet.end();it2++){ filew2 << *it2 << "\t" << grp1p[j] << endl; j+=1; } j=1; k=1; for(it2=claSet.begin(); it2!=claSet.end();it2++){ if(clInd[j]==1){ filew2 << *it2 << "\t" << grp1c[k] << endl; k+=1; } j+=1; } j=1; k=1; for(it2=ordSet.begin(); it2!=ordSet.end();it2++){ if(orInd[j]==1){ filew2 << *it2 << "\t" << grp1o[k] << endl; k+=1; } j+=1; } j=1; k=1; for(it2=famSet.begin(); it2!=famSet.end();it2++){ if(faInd[j]==1){ filew2 << *it2 << "\t" << grp1f[k] << endl; k+=1; } j+=1; } j=1; k=1; for(it2=genSet.begin(); it2!=genSet.end();it2++){ if(geInd[j]==1){ filew2 << *it2 << "\t" << grp1g[k] << endl; k+=1; } j+=1; } free_ivector(disGlob,1,nsub); free_dmatrix(cntDataUnsort,1,nsub,1,ngen); free_dmatrix(cntGlob,1,nsub,1,ngen); free_ivector(phNum,1,ngen); free_ivector(clNum,1,ngen); free_ivector(orNum,1,ngen); free_ivector(faNum,1,ngen); free_ivector(geNum,1,ngen); free_dmatrix(pcTab,1,nphy,1,ntotc); free_dmatrix(coTab,1,ntotc,1,ntoto); free_dmatrix(ofTab,1,ntoto,1,ntotf); free_dmatrix(fgTab,1,ntotf,1,ngen); free_dmatrix(pcMat,1,nphy,1,ntotc); free_dmatrix(coMat,1,ntotc,1,ntoto); free_dmatrix(ofMat,1,ntoto,1,ntotf); free_dmatrix(fgMat,1,ntotf,1,ngen); free_ivector(clInd,1,ntotc); free_ivector(orInd,1,ntoto); free_ivector(faInd,1,ntotf); free_ivector(geInd,1,ngen); free_dmatrix(initValu,1,noPar,1,noChn); free_dmatrix(tryV,1,noPar,1,noPar); free_d3tensor(simres,1,noChn,1,(numIter-numBurn)/thinfac,1,noPar); free_dvector(grp1p,1,nphy); free_dvector(grp1c,1,ncre); free_dvector(grp1o,1,nore); free_dvector(grp1f,1,nfre); free_dvector(grp1g,1,ngre); return 0; } void getEstPval(int lookInt,int noChn,int numIter,int numBurn,int thinfac,double ***simres,int *gInd,int *fInd,int *oInd, int *cInd, double **tab1,double **tab2,double **tab3,double **tab4, double *grp1p,double *grp1c,double *grp1o,double *grp1f,double *grp1g) { /* this gets the estimable effects from the simulation results */ int i,j,k,l,i1; double *den1,*den2,*den3,*den4,*resG1,*resG2,*vec1G1,*vec2G1,*vec3G1,*vec4G1,*vec1G2,*vec2G2,*vec3G2,*vec4G2; den1=dvector(1,ntotf); den2=dvector(1,ntoto); den3=dvector(1,ntotc); den4=dvector(1,nphy); resG1=dvector(1,ngen); resG2=dvector(1,ngen); vec1G1=dvector(1,ntotf); vec2G1=dvector(1,ntoto); vec3G1=dvector(1,ntotc); vec4G1=dvector(1,nphy); vec1G2=dvector(1,ntotf); vec2G2=dvector(1,ntoto); vec3G2=dvector(1,ntotc); vec4G2=dvector(1,nphy); /* initialize the vectors that hold results */ for(i=1;i<=nphy;i++) grp1p[i]=0.0; for(i=1;i<=ncre;i++) grp1c[i]=0.0; for(i=1;i<=nore;i++) grp1o[i]=0.0; for(i=1;i<=nfre;i++) grp1f[i]=0.0; for(i=1;i<=ngre;i++) grp1g[i]=0.0; /* set up the denominators for the comparisons-same for both groups */ for(i=1;i<=ntotf;i++){ den1[i]=0.0; for(j=1;j<=ngen;j++){ den1[i]+=tab1[i][j]; } } for(i=1;i<=ntoto;i++){ den2[i]=0.0; for(j=1;j<=ntotf;j++){ den2[i]+=tab2[i][j]; } } for(i=1;i<=ntotc;i++){ den3[i]=0.0; for(j=1;j<=ntoto;j++){ den3[i]+=tab3[i][j]; } } for(i=1;i<=nphy;i++){ den4[i]=0.0; for(j=1;j<=ntotc;j++){ den4[i]+=tab4[i][j]; } } /* now pick off sims and get estimates at all levels */ for(i=1;i<=noChn;i++){ for(j=1;j<=(numIter-numBurn)/thinfac;j++){ for(k=1;k<=ngen;k++) resG1[k]=exp(simres[i][j][k]); for(k=ngen+1;k<=2*ngen;k++) resG2[k-ngen]=exp(simres[i][j][k]); /* now get the means for group 1 */ for(k=1;k<=ntotf;k++){ vec1G1[k]=0.0; for(l=1;l<=ngen;l++){ vec1G1[k]+=tab1[k][l]*resG1[l]; } vec1G1[k]/=den1[k]; } for(k=1;k<=ntoto;k++){ vec2G1[k]=0.0; for(l=1;l<=ntotf;l++){ vec2G1[k]+=tab2[k][l]*vec1G1[l]; } vec2G1[k]/=den2[k]; } for(k=1;k<=ntotc;k++){ vec3G1[k]=0.0; for(l=1;l<=ntoto;l++){ vec3G1[k]+=tab3[k][l]*vec2G1[l]; } vec3G1[k]/=den3[k]; } for(k=1;k<=nphy;k++){ vec4G1[k]=0.0; for(l=1;l<=ntotc;l++){ vec4G1[k]+=tab4[k][l]*vec3G1[l]; } vec4G1[k]/=den4[k]; } /* now get these means for the other group */ for(k=1;k<=ntotf;k++){ vec1G2[k]=0.0; for(l=1;l<=ngen;l++){ vec1G2[k]+=tab1[k][l]*resG2[l]; } vec1G2[k]/=den1[k]; } for(k=1;k<=ntoto;k++){ vec2G2[k]=0.0; for(l=1;l<=ntotf;l++){ vec2G2[k]+=tab2[k][l]*vec1G2[l]; } vec2G2[k]/=den2[k]; } for(k=1;k<=ntotc;k++){ vec3G2[k]=0.0; for(l=1;l<=ntoto;l++){ vec3G2[k]+=tab3[k][l]*vec2G2[l]; } vec3G2[k]/=den3[k]; } for(k=1;k<=nphy;k++){ vec4G2[k]=0.0; for(l=1;l<=ntotc;l++){ vec4G2[k]+=tab4[k][l]*vec3G2[l]; } vec4G2[k]/=den4[k]; } /* now determine if estimable ones differ */ for(k=1;k<=nphy;k++){ if(vec4G1[k]>vec4G2[k]) grp1p[k]+=1.0; } i1=1; for(k=1;k<=ntotc;k++){ if(cInd[k]==1){ if(vec3G1[k]>vec3G2[k]) grp1c[i1]+=1.0; i1+=1; } } i1=1; for(k=1;k<=ntoto;k++){ if(oInd[k]==1){ if(vec2G1[k]>vec2G2[k]) grp1o[i1]+=1.0; i1+=1; } } i1=1; for(k=1;k<=ntotf;k++){ if(fInd[k]==1){ if(vec1G1[k]>vec1G2[k]) grp1f[i1]+=1.0; i1+=1; } } i1=1; for(k=1;k<=ngen;k++){ if(gInd[k]==1){ if(resG1[k]>resG2[k]) grp1g[i1]+=1.0; i1+=1; } } } } for(k=1;k<=nphy;k++) grp1p[k]/=((noChn*(numIter-numBurn))/thinfac); for(k=1;k<=ncre;k++) grp1c[k]/=((noChn*(numIter-numBurn))/thinfac); for(k=1;k<=nore;k++) grp1o[k]/=((noChn*(numIter-numBurn))/thinfac); for(k=1;k<=nfre;k++) grp1f[k]/=((noChn*(numIter-numBurn))/thinfac); for(k=1;k<=ngre;k++) grp1g[k]/=((noChn*(numIter-numBurn))/thinfac); free_dvector(den1,1,ntotf); free_dvector(den2,1,ntoto); free_dvector(den3,1,ntotc); free_dvector(den4,1,nphy); free_dvector(resG1,1,ngen); free_dvector(resG2,1,ngen); free_dvector(vec1G1,1,ntotf); free_dvector(vec2G1,1,ntoto); free_dvector(vec3G1,1,ntotc); free_dvector(vec4G1,1,nphy); free_dvector(vec1G2,1,ntotf); free_dvector(vec2G2,1,ntoto); free_dvector(vec3G2,1,ntotc); free_dvector(vec4G2,1,nphy); } double minFcn(double *x) { /* this evaluates an expression proportional to the negative log posterior */ int i,j,i1=1; double a0=3.1,b0=100.0,res,alpha,lambda,theta,tmp,*eta,**fe; eta=dvector(1,ngen); fe=dmatrix(1,ngen,1,ngrp); for(i=1;i<=ngrp;i++){ for(j=1;j<=ngen;j++){ fe[j][i]=exp(x[i1]); i1+=1; } } for(i=1;i<=ngen;i++){ eta[i]=exp(x[i1]); i1+=1; } alpha=exp(x[i1]); lambda=exp(x[i1+1]); res=0.0; for(i=1;i<=nsub;i++){ for(j=1;j<=ngen;j++){ theta=fe[j][disGlob[i]]; res+=eta[j]*log(eta[j]/(eta[j]+theta))+cntGlob[i][j]*log(theta/(eta[j]+theta)) +loggam(eta[j]+cntGlob[i][j])-loggam(eta[j]); } } /* gamma prior for eta's */ for(i=1;i<=ngen;i++) res+=alpha*log(lambda)-loggam(alpha)+alpha*log(eta[i])-lambda*eta[i]; /* conditionally conjugate prior for mu|eta */ for(i=1;i<=ngen;i++){ for(j=1;j<=ngrp;j++){ theta=fe[i][j]; res+= (a0-2.0)*log(eta[i]/theta)-(a0+b0-2.0)*log(1.0+eta[i]/theta); } } /* vague priors for alpha and lambda */ res+= -0.5*DSQR((log(alpha)+2.3)/1.6); res+= -0.5*DSQR((log(lambda)-1.9)/0.4); free_dvector(eta,1,ngen); free_dmatrix(fe,1,ngen,1,ngrp); return -res; } void metrop(long setSeed,int lookInt,int noChn,int numIter,int numBurn, int thinfac,int numpar,double **tryV,double **initValu,double ***simres) { /* does the Metropolis algorithm-lots of parameters on log scale-simres holds sim results in form chain by iteration by parameter, tryV is the varaince matrix for the normal jumping kernel, and initValu is matrix of initial values for the chain this can find convergence diagnostic stat and report them-can stop if convergence has taken place */ int ii,jj,i,j,k; double RSTOP=1.01,sum,*vec,*minVal,*accept,*temp1,*temp2,*Rstat, *pp,*mm,*canDev,**dev,**optDev; minVal=dvector(1,noChn); vec=dvector(1,numpar); accept=dvector(1,noChn); temp1=dvector(1,noChn); temp2=dvector(1,noChn); Rstat=dvector(1,numpar); pp=dvector(1,numpar); mm=dvector(1,numpar); canDev=dvector(1,numpar); dev=dmatrix(1,numpar,1,noChn); optDev=dmatrix(1,numpar,1,noChn); choldc(tryV,numpar,pp); for(ii=1;ii<=noChn;ii++){ accept[ii]=0.0; for(i=1;i<=numpar;i++){ dev[i][ii]=initValu[i][ii]; optDev[i][ii]=initValu[i][ii]; canDev[i]=initValu[i][ii]; } temp1[ii]=minFcn(canDev); minVal[ii]=temp1[ii]; } for(i=1;i<=numIter;i++){ // cout << "starting iteration number " << i << endl; for(ii=1;ii<=noChn;ii++){ // cout << "starting chain number " << ii << endl; for(j=1;j<=numpar;j++) mm[j]=gasdev(&setSeed); for(j=1;j<=numpar;j++){ sum=0.0; for(k=1;k numBurn){ if((i-numBurn)%thinfac==0){ for(j=1;j<=numpar;j++){ simres[ii][(i-numBurn)/thinfac][j]=dev[j][ii]; } } } // cout << "the minimum neg loglik for chain " << ii << "=" // << minVal[ii] << " acceptance rate=" << accept[ii]/i << endl; } if(i%lookInt==0 && i > numBurn && noChn > 1){ sqrtR(Rstat,noChn,(i-numBurn)/thinfac,numpar,simres); cout << "convergence diagnostic statistics after " << i << " iterations" << endl; jj=0; /* able or disable early return by commenting in/out */ for(j=1;j<=numpar;j++){ // if(Rstat[j] < RSTOP) jj+=1; cout << Rstat[j] << " "; } cout << endl; } // if(jj==numpar) break; } for(ii=1;ii<=noChn;ii++){ cout << "for chain " << ii; cout << " the acceptance rate=" << accept[ii]/numIter << " "; cout << "and the maximum of the log posterior (unscaled)=" << minVal[ii] << endl; cout << "the parameter value for this is" << endl; for(j=1;j<=numpar;j++) vec[j]=optDev[j][ii]; for(j=1;j<=numpar;j++) optDev[j][ii]=vec[j]; for(j=1;j<=numpar;j++) cout << exp(optDev[j][ii]) << " "; cout << endl; } free_dvector(minVal,1,noChn); free_dvector(vec,1,numpar); free_dvector(accept,1,noChn); free_dvector(temp1,1,noChn); free_dvector(temp2,1,noChn); free_dvector(Rstat,1,numpar); free_dvector(mm,1,numpar); free_dvector(canDev,1,numpar); free_dvector(pp,1,numpar); free_dmatrix(dev,1,numpar,1,noChn); free_dmatrix(optDev,1,numpar,1,noChn); } /* next 3 functions set up for multiple chain convergence diagnostics-not functional for current version but future versions may make this feature available */ void sqrtR(double *stat,int noChn,int numiter,int numpar,double ***tens) { /* this calculates Gelman-Rubin sqrtR for assessing convergence for MCMC using overdispersed starting points-set trans=0 to calculate on original scale otherwise it calculates it on unbounded scale - if you want this you must modify below where the comments are */ double mean(double *,int); double var(double *,int); int i,j,k,k1,trans=0; double *s,*m,*temp,W,inter,BonN; temp=dvector(1,numiter); s=dvector(1,noChn); m=dvector(1,noChn); for(i=1;i<=numpar;i++){ for(j=1;j<=noChn;j++){ for(k=1;k<=numiter;k++){ /* can calculate on the original rather than the unbounded scale by changing the next few lines */ if(trans==0) temp[k]=exp(tens[j][k][i]); else temp[k]=tens[j][k][i]; } m[j]=mean(temp,numiter); s[j]=var(temp,numiter); } W=mean(s,noChn); BonN=var(m,noChn); inter=W*(numiter-1.0)/numiter+BonN; if(inter<0.0 || W<=0.0) stat[i]=100.0; else stat[i]=sqrt((inter)/W); } free_dvector(temp,1,numiter); free_dvector(s,1,noChn); free_dvector(m,1,noChn); } double mean(double *data, int n) { int j; double ave=0.0; for(j=1;j<=n;j++) ave+=data[j]; ave/=n; return ave; } double var(double *data, int n) { double ave,dev,variance; double s=0.0; int j; ave=mean(data,n); for(j=1;j<=n;j++){ dev=data[j]-ave; s+=dev*dev; } variance=s/(n-1.0); return variance; } /* next set of routines are from Numerical Recipes in C-thanks to Press, Teukolsky, Vetterling and Flannery! */ int *ivector(long nl,long nh) /* allocate a vector with index range v[nl...nh] */ { int *v; v=(int *)malloc((size_t) ((nh-nl+2)*sizeof(int))); if (!v) nrerror("allocation failure in dvector()"); return v-nl+1; } void free_ivector(int *v, long nl, long nh) /* free memory from an ivector */ { free((FREE_ARG) (v+nl-1)); } double *dvector(long nl,long nh) /* allocate a vector with index range v[nl...nh] */ { double *v; v=(double *)malloc((size_t) ((nh-nl+2)*sizeof(double))); if (!v) nrerror("allocation failure in dvector()"); return v-nl+1; } void free_dvector(double *v, long nl, long nh) /* free memory from an dvector */ { free((FREE_ARG) (v+nl-1)); } double **dmatrix(long nrl,long nrh,long ncl,long nch) /* allocate a double matrix with subscript range m[nrl...nrh][ncl...nch] */ { long i, nrow=nrh-nrl+1,ncol=nch-ncl+1; double **m; /* allocate pointers to rows */ m=(double **) malloc((size_t)((nrow+1)*sizeof(double*))); if(!m) nrerror("allocation failure 1 in dmatrix()"); m+=1; m-=nrl; /* allocate rows and set pointers to them */ m[nrl]=(double *) malloc((size_t)((nrow*ncol+1)*sizeof(double))); if(!m[nrl]) nrerror("allocation failure 2 in dmatrix()"); m[nrl]+=1; m[nrl]-=ncl; for(i=nrl+1;i<=nrh;i++) m[i]=m[i-1]+ncol; /* return pointer to array of pointers to rows */ return m; } void free_dmatrix(double **m, long nrl,long nrh,long ncl,long nch) /* free a double matrix allocated by dmatrix() */ { free((FREE_ARG) (m[nrl]+ncl-1)); free((FREE_ARG) (m+nrl-1)); } double ***d3tensor(long nrl,long nrh,long ncl,long nch,long ndl, long ndh) /* allocate a double 3-tensor */ { long i,j,nrow=nrh-nrl+1,ncol=nch-ncl+1,ndep=ndh-ndl+1; double ***t; /* allocate pointers ot rows */ t=(double ***) malloc((size_t)((nrow+1)*sizeof(double**))); if(!t) nrerror("allocation failure 1 in d3tensor()"); t+=1; t-=nrl; /* allocate pointers to rows and set pointers to them */ t[nrl]=(double **) malloc((size_t)((nrow*ncol+1)*sizeof(double*))); if(!t[nrl]) nrerror("allocation failure 2 in d3tensor()"); t[nrl]+=1; t[nrl]-=ncl; /* allocate rows and set pointers to them */ t[nrl][ncl]=(double *) malloc((size_t)((nrow*ncol*ndep+1)*sizeof(double))); if(!t[nrl][ncl]) nrerror("allocation failure 3 in d3tensor()"); t[nrl][ncl]+=1; t[nrl][ncl]-=ndl; for(j=ncl+1;j<=nch;j++) t[nrl][j]=t[nrl][j-1]+ndep; for(i=nrl+1;i<=nrh;i++){ t[i]=t[i-1]+ncol; t[i][ncl]=t[i-1][ncl]+ncol*ndep; for(j=ncl+1;j<=nch;j++) t[i][j]=t[i][j-1]+ndep; } /* return pointer to array of pointers to rows */ return t; } void free_d3tensor(double ***t,long nrl,long nrh,long ncl,long nch, long ndl,long ndh) /* free a double tensor declared by d3tensor */ { free((FREE_ARG) (t[nrl][ncl]+ndl-1)); free((FREE_ARG) (t[nrl]+ncl-1)); free((FREE_ARG) (t+nrl-1)); } /* this is Unif(0,1) generator */ #define IAran1 16807 #define IMran1 2147483647 #define AMran1 (1.0/IMran1) #define IQran1 127773 #define IRran1 2836 #define NDIVran1 (1+(IMran1-1)/32) #define EPSran1 1.2e-7 #define RNMXran1 (1.0-EPSran1) double ran1(long *idum) { const int NTABran1=32; int j; long k; static long iy=0; static long iv[NTABran1]; double temp; if(*idum <= 0 || !iy){ if(-(*idum)<1) *idum=1; else *idum = -(*idum); for(j=NTABran1+7;j>=0;j--){ k=(*idum)/IQran1; *idum=IAran1*(*idum-k*IQran1)-IRran1*k; if(*idum<0) *idum += IMran1; if(jRNMXran1) return RNMXran1; else return temp; } double gasdev(long *idum) { /* standard normal deviates */ static int iset=0; static double gset; double fac,rsq,v1,v2; if(iset==0){ do{ v1=2.0*ran1(idum)-1.0; v2=2.0*ran1(idum)-1.0; rsq=v1*v1+v2*v2; } while (rsq>=1.0 || rsq==0.0); fac=sqrt(-2.0*log(rsq)/rsq); gset=v1*fac; iset=1; return v2*fac; } else{ iset=0; return gset; } } void choldc(double **a, int n, double p[]) { /* this finds the cholesky decomp. returns result in lower half of a and the diagonal in p */ int i,j,k; double sum; for(i=1;i<=n;i++){ for(j=i;j<=n;j++){ for(sum=a[i][j],k=i-1;k>=1;k--) sum-=a[i][k]*a[j][k]; if(i==j){ if(sum<=0.0){ nrerror("choldc failed"); } p[i]=sqrt(sum); } else a[j][i]=sum/p[i]; } } } double loggam(double xx) { /* returns the log of the gamma function */ double x,y,tmp,ser; static double cof[6]={76.18009172947146, -86.50532032941677, 24.01409824083091,-1.231739572450155, 0.001208650973866179, -5.395239384953e-006}; int j; y=x=xx; tmp=x+5.5; tmp-=(x+0.5)*log(tmp); ser=1.000000000190015; for(j=0;j<=5;j++) ser+=cof[j]/++y; return -tmp+log(2.506628274631*ser/x); } void nrerror(const char error_text[]) /* Numerical Recipes standard error handler */ { fprintf(stderr,"Run-time error...\n"); fprintf(stderr,"%s\n",error_text); fprintf(stderr,"...now exiting to system...\n"); exit(EXIT_FAILURE); }