################################################### # Sample Splus program used to generate figures in # Pan W, Lin J and Le C (2002) ``How Many Replicates of Arrays Are Required to Detect Gene Expression Changes in Microarray Experiments? A Mixture Model Approach". Genome Biology, /2002/3/5/research/0022. # # NOTE: it is NOT a general program; you have to edit it before # using it for yoru data. You may also need to download the EMMIX # or us eother program to estimate Normal mixture models. # # Wei Pan, weip@biostat.umn.edu # Division of Biostatistics, U of Minnesota ################################################### #read-in data: dat<-read.table("../allShort.dat5") #X1<-(dat$V1-mean(dat$V1))/sqrt(var(dat$V1)) #X2<-(dat$V2-mean(dat$V2))/sqrt(var(dat$V2)) #X3<-(dat$V4-mean(dat$V4))/sqrt(var(dat$V4)) #X4<-(dat$V5-mean(dat$V5))/sqrt(var(dat$V5)) X1<-(dat$V1-median(dat$V1)) X2<-(dat$V2-median(dat$V2)) X3<-(dat$V4-median(dat$V4)) X4<-(dat$V5-median(dat$V5)) se1<-se2<-rep(0, length(dat$V1)) for(i in 1:length(dat$V1)){ se1[i]<-sqrt(var(c(X1[i], X2[i]))) se2[i]<-sqrt(var(c(X3[i], X4[i]))) } a01<-quantile(se1, 0.9) a02<-quantile(se2, 0.9) #construct test stat Z and null stat z: set.seed(7) n<-length(X1) z<-Z<-rep(0, n) for(i in 1:n){ Z[i]<-mean(c(X1[i],X2[i]))/(se1[i]+a01)- mean(c(X3[i],X4[i]))/(se2[i]+a02) a<-sample(c(1,-1),2, replace=F) b<-sample(c(1,-1),2, replace=F) z[i]<-sum(a*c(X1[i],X2[i]))/2/(se1[i]+a01) + sum(b*c(X3[i],X4[i]))/2/(se2[i]+a02) } length((1:n)[abs(Z)>1]) [1] 333 hist(z, nclass=20, prob=T) hist(Z, nclass=20, prob=T) sink("E2B1.dat") for(i in 1:n) cat(z[i],"\n") sink() X11(); par(mfrow=c(2,2)) plot(X1, X2) abline(0,1) plot(X3, X4) abline(0,1) plot((X1+X2)/2, (X3+X4)/2); abline(0,1) X12<-(X1+X2)/2; X34<-(X3+X4)/2 fit1<-loess(se1~X12, enp.target=3) fit2<-loess(se2~X34, enp.target=3) #Fig 1: postscript("fig/fig1.ps", horizon=F) par(mfrow=c(2,2)) plot(fit1, ylim=c(0, 0.25), xlab="Xbar", ylab="sigma1"); points(X12, se1) plot(fit2, ylim=c(0, 0.25), xlab="Ybar", ylab="sigma2"); points(X34, se2) dev.off() fitse1<-predict(fit1) fitse2<-predict(fit2) zfit<-Zfit<-rep(0, n) for(i in 1:n){ Zfit[i]<-mean(c(X1[i],X2[i]))/(fitse1[i])- mean(c(X3[i],X4[i]))/(fitse2[i]) a<-sample(c(1,-1),2, replace=F) b<-sample(c(1,-1),2, replace=F) zfit[i]<-sum(a*c(X1[i],X2[i]))/2/(fitse1[i]) + sum(b*c(X3[i],X4[i]))/2/(fitse2[i]) } hist(zfit, nclass=20, prob=T) hist(Zfit, nclass=20, prob=T) length((1:n)[abs(Zfit)>4]) [1] 88 sink("E2fitB1.dat") for(i in 1:n) cat(zfit[i],"\n") sink() #fitted mixture model for z's: x<-seq(-4, 4, by=0.01) pis1<-(1) mus1<-(-0.014716) vs1<-(1.64698) pis<-c(0.760, 0.240) mus<-c(-0.041540, 0.070039) vs<-c(1.31166, 2.69702) #Fig 2: postscript("fig/fig2.ps", horizon=F) par(mfrow=c(2,2)) hist(zfit, nclass=20, prob=T, xlim=c(-4,4), xlab="z2") lines(x, pis[1]*dnorm(x, mus[1], sqrt(vs[1])) + pis[2]*dnorm(x, mus[2], sqrt(vs[2])) , lty=1) lines(x, dnorm(x, mus1, sqrt(vs1)), lty=2) title(main="(a)") z4<-z6<-z8<-rep(0, n) g0<-length(pis) for(i in 1:n){ k1<-sample(1:g0, 1, prob=pis) k2<-sample(1:g0, 1, prob=pis) k3<-sample(1:g0, 1, prob=pis) k4<-sample(1:g0, 1, prob=pis) x1<-rnorm(1, mus[k1], sqrt(vs[k1])) x2<-rnorm(1, mus[k2], sqrt(vs[k2])) x3<-rnorm(1, mus[k3], sqrt(vs[k3])) x4<-rnorm(1, mus[k4], sqrt(vs[k4])) z4[i]<-(x1+x2)/2 z6[i]<-(x1+x2+x3)/3 z8[i]<-(x1+x2+x3+x4)/4 } sink("E4fitB1.dat") for(i in 1:n) cat(z4[i],"\n") sink() sink("E6fitB1.dat") for(i in 1:n) cat(z6[i],"\n") sink() sink("E8fitB1.dat") for(i in 1:n) cat(z8[i],"\n") sink() ##For z4: hist(z4, nclass=20, prob=T, xlim=c(-4,4), xlab="z4") #based on the fitted mixture model: lines(x, dnorm(x, -0.049356, sqrt(0.822640))) #based on the implied theoretical model: yx<-rep(0, length(x)) for(j in 1:g0) for(k in 1:g0) yx<-yx + pis[j]*pis[k]*dnorm(x, (mus[j]+mus[k])/2, sqrt((vs[j]+vs[k])/4)) lines(x, yx, lty=2) title(main="(b)") ##For z6: hist(z6, nclass=20, prob=T, xlim=c(-4,4), xlab="z6") #based on the fitted mixture model: lines(x, dnorm(x, -0.064413, sqrt(0.538290))) #based on the implied theoretical model: yx<-rep(0, length(x)) for(j in 1:g0) for(k in 1:g0) for(l in 1:g0) yx<-yx + pis[j]*pis[k]*pis[l]*dnorm(x, (mus[j]+mus[k]+mus[l])/3, sqrt((vs[j]+vs[k]+vs[l])/9)) lines(x, yx, lty=2) title(main="(c)") ##For z8: hist(z8, nclass=20, prob=T, xlim=c(-4,4), xlab="z8") #based on the fitted mixture model: lines(x, dnorm(x, -0.043806, sqrt(0.420603))) #based on the implied theoretical model: yx<-rep(0, length(x)) for(j in 1:g0) for(k in 1:g0) for(l in 1:g0) for(m in 1:g0) yx<-yx + pis[j]*pis[k]*pis[l]*pis[m]*dnorm(x, (mus[j]+mus[k]+mus[l]+mus[m])/4, sqrt((vs[j]+vs[k]+vs[l]+vs[m])/16)) lines(x, yx, lty=2) title(main="(d)") dev.off() #end of Fig 2 #seek cut-off points A and -A such that the two tail prob=alpha for # a given Normal mixture seekcut<-function(alpha=0.00090, pis = c(0.760, 0.240), mus = c(-0.041540, 0.070039), vs = c(1.31166, 2.69702), low, up, epsilon=1.0e-6) { g<-length(pis) A1<-low A2<-up pr<-0 while (abs(pr-alpha)>epsilon){ A<-(A1+A2)/2 pr<-0 for(k in 1:g) pr<-pr + pis[k]*pnorm(-A, mus[k], sqrt(vs[k])) + pis[k]*(1 - pnorm(A, mus[k], sqrt(vs[k]))) if (pralpha) A1<-A } A } tailpr<-function(A, pis = c(0.760, 0.240), mus = c(-0.041540, 0.070039), vs = c(1.31166, 2.69702) ) { A<-abs(A) g<-length(pis) pr<-0 for(k in 1:g) pr<-pr + pis[k]*pnorm(-A, mus[k], sqrt(vs[k])) + pis[k]*(1 - pnorm(A, mus[k], sqrt(vs[k]))) pr } tailpr(5) [1] 0.0005741134 tailpr(2) [1] 0.115218 > seekcut(low=2, up=5) [1] 4.777344 > tailpr(4.777344) [1] 0.0009008562 > length((1:n)[abs(Zfit)>4.777344 ]) [1] 52 tailpr(5, pis=1, mus=-0.049356, vs=0.822640) [1] 3.698552e-08 tailpr(2, pis=1, mus=-0.049356, vs=0.822640) 1] 0.02767739 seekcut(low=2, up=5, pis=1, mus=-0.049356, vs=0.822640) [1] 3.015869 #z6: tailpr(5, pis=1, mus=-0.064413, vs=0.538290) 1] 1.120196e-11 tailpr(2, pis=1, mus=-0.064413, vs=0.538290) [1] 0.006615828 seekcut(low=2, up=5, pis=1, mus=-0.064413, vs=0.538290) [1] 2.445312 #z8: tailpr(5, pis=1, mus=-0.043806, vs=0.420603) [1] 1.446099e-14 tailpr(2, pis=1, mus=-0.043806, vs=0.420603) [1] 0.002091814 seekcut(low=2, up=5, pis=1, mus=-0.043806, vs=0.420603) [1] 2.158203 seekcut(low=2, up=5, pis=1, mus=-0.043806, vs=0.420603, epsilon=1e-10) [1] 2.158062 powers<-function(A,d, pis = c(0.760, 0.240), mus = c(-0.041540, 0.070039), vs = c(1.31166, 2.69702) ) { A<-abs(A) g<-length(pis) pr<-0 for(k in 1:g) pr<-pr + pis[k]*pnorm(-A, mus[k]+d, sqrt(vs[k])) + pis[k]*(1 - pnorm(A, mus[k]+d, sqrt(vs[k]))) pr } #Fig 3: #Power curve: postscript("fig/fig3.ps", horizon=F) par(mfrow=c(1,1)) d<-seq(-4,4, by=0.01) plot(0, 0, xlim=c(-4,4), ylim=c(0,1), xlab="d", ylab="Power", type="n") lines(d, powers(4.777344, d), lty=1) lines(d, powers(3.015869, d, pis=1, mus=-0.049356, vs=0.822640), lty=2) lines(d, powers(2.445312, d, pis=1, mus=-0.064413, vs=0.538290), lty=3) lines(d, powers(2.158203, d, pis=1, mus=-0.043806, vs=0.420603), lty=4) legend(-1.8, 0.95, legend=c("2 replicates","4 replicates", "6 replicates","8 replicates"), lty=1:4) dev.off() ####added Feb 5, 2002 seekcut(alpha=0.05/1000, low=2, up=10) [1] 6.09375 > tailpr(6.09375) [1] 5.036983e-05 seekcut(alpha=0.05/5000, low=2, up=10) [1] 6.75 > tailpr(6.75) [1] 9.644943e-06 seekcut(alpha=0.05/10000, low=2, up=10) [1] 7 > tailpr(7) #z4: seekcut(alpha=0.05/1000,low=2, up=5, pis=1, mus=-0.049356, vs=0.822640) [1] 3.6875 seekcut(alpha=0.05/5000,low=2, up=5, pis=1, mus=-0.049356, vs=0.822640) [1] 4.015625 seekcut(alpha=0.05/10000,low=2, up=5, pis=1, mus=-0.049356, vs=0.822640) [1] 4.15625 #z6: seekcut(0.05/1000,low=2, up=5, pis=1, mus=-0.064413, vs=0.538290) [1] 2.984375 seekcut(0.05/5000,low=2, up=5, pis=1, mus=-0.064413, vs=0.538290) [1] 3.265625 seekcut(0.05/10000,low=2, up=5, pis=1, mus=-0.064413, vs=0.538290) [1] 3.359375 #z8: seekcut(0.05/1000,low=2, up=5, pis=1, mus=-0.043806, vs=0.420603) [1] 2.638672 seekcut(0.05/5000,low=2, up=5, pis=1, mus=-0.043806, vs=0.420603) [1] 2.867188 seekcut(0.05/10000,low=2, up=5, pis=1, mus=-0.043806, vs=0.420603) [1] 2.984375 #Fig4: #Power curve: postscript("fig/fig4.ps", horizon=F) par(mfrow=c(1,1)) d<-seq(-4,4, by=0.01) plot(0, 0, xlim=c(-4,4), ylim=c(0,1), xlab="d", ylab="Power", type="n") lines(d, powers(6.09375, d), lty=1) lines(d, powers(3.6875, d, pis=1, mus=-0.049356, vs=0.822640), lty=2) lines(d, powers( 2.984375, d, pis=1, mus=-0.064413, vs=0.538290), lty=3) lines(d, powers(2.638672, d, pis=1, mus=-0.043806, vs=0.420603), lty=4) legend(-1.8, 0.95, legend=c("2 replicates","4 replicates", "6 replicates","8 replicates"), lty=1:4) dev.off() #Fig5: #Power curve: postscript("fig/fig5.ps", horizon=F) par(mfrow=c(1,1)) d<-seq(-4,4, by=0.01) plot(0, 0, xlim=c(-4,4), ylim=c(0,1), xlab="d", ylab="Power", type="n") lines(d, powers(6.75, d), lty=1) lines(d, powers(4.015625, d, pis=1, mus=-0.049356, vs=0.822640), lty=2) lines(d, powers(3.265625, d, pis=1, mus=-0.064413, vs=0.538290), lty=3) lines(d, powers(2.867188, d, pis=1, mus=-0.043806, vs=0.420603), lty=4) legend(-1.8, 0.95, legend=c("2 replicates","4 replicates", "6 replicates","8 replicates"), lty=1:4) dev.off() #Fig6: #Power curve: postscript("fig/fig6.ps", horizon=F) par(mfrow=c(1,1)) d<-seq(-4,4, by=0.01) plot(0, 0, xlim=c(-4,4), ylim=c(0,1), xlab="d", ylab="Power", type="n") lines(d, powers(7, d), lty=1) lines(d, powers(4.15625, d, pis=1, mus=-0.049356, vs=0.822640), lty=2) lines(d, powers(3.359375, d, pis=1, mus=-0.064413, vs=0.538290), lty=3) lines(d, powers(2.984375, d, pis=1, mus=-0.043806, vs=0.420603), lty=4) legend(-1.8, 0.95, legend=c("2 replicates","4 replicates", "6 replicates","8 replicates"), lty=1:4) dev.off()