###################################################
# Sample Splus program used to generate figures in
# Pan W, Lin J and Le C (2002) ``How Many Replicates of Arrays 
  Are Required to Detect Gene Expression Changes in Microarray
  Experiments? A Mixture Model Approach". 
  Genome Biology, /2002/3/5/research/0022.
#
# NOTE: it is NOT a general program; you have to edit it before
# using it for yoru data. You may also need to download the EMMIX
# or us eother program to estimate Normal mixture models.
#
# Wei Pan, weip@biostat.umn.edu
# Division of Biostatistics, U of Minnesota
###################################################

#read-in data:
dat<-read.table("../allShort.dat5")
#X1<-(dat$V1-mean(dat$V1))/sqrt(var(dat$V1))
#X2<-(dat$V2-mean(dat$V2))/sqrt(var(dat$V2))
#X3<-(dat$V4-mean(dat$V4))/sqrt(var(dat$V4))
#X4<-(dat$V5-mean(dat$V5))/sqrt(var(dat$V5))
X1<-(dat$V1-median(dat$V1))
X2<-(dat$V2-median(dat$V2))
X3<-(dat$V4-median(dat$V4))
X4<-(dat$V5-median(dat$V5))
se1<-se2<-rep(0, length(dat$V1))
for(i in 1:length(dat$V1)){
 se1[i]<-sqrt(var(c(X1[i], X2[i])))
 se2[i]<-sqrt(var(c(X3[i], X4[i])))
 }
a01<-quantile(se1, 0.9)
a02<-quantile(se2, 0.9)

#construct test stat Z and null stat z:
set.seed(7)
n<-length(X1)
z<-Z<-rep(0, n)
for(i in 1:n){
  Z[i]<-mean(c(X1[i],X2[i]))/(se1[i]+a01)-
        mean(c(X3[i],X4[i]))/(se2[i]+a02)
  a<-sample(c(1,-1),2, replace=F)
  b<-sample(c(1,-1),2, replace=F)
  z[i]<-sum(a*c(X1[i],X2[i]))/2/(se1[i]+a01) +
        sum(b*c(X3[i],X4[i]))/2/(se2[i]+a02)
  }
length((1:n)[abs(Z)>1])
[1] 333
hist(z, nclass=20, prob=T)
hist(Z, nclass=20, prob=T)

sink("E2B1.dat")
for(i in 1:n) cat(z[i],"\n")
sink()


X11(); par(mfrow=c(2,2))
 plot(X1, X2)     
 abline(0,1)
  plot(X3, X4)
 abline(0,1)
 plot((X1+X2)/2, (X3+X4)/2); abline(0,1)
X12<-(X1+X2)/2; X34<-(X3+X4)/2
 fit1<-loess(se1~X12, enp.target=3)
 fit2<-loess(se2~X34, enp.target=3)
#Fig 1:
postscript("fig/fig1.ps", horizon=F)
par(mfrow=c(2,2))
 plot(fit1, ylim=c(0, 0.25), xlab="Xbar", ylab="sigma1"); points(X12, se1)
 plot(fit2, ylim=c(0, 0.25), xlab="Ybar", ylab="sigma2"); points(X34, se2)
dev.off()

fitse1<-predict(fit1) 
fitse2<-predict(fit2) 

zfit<-Zfit<-rep(0, n)
for(i in 1:n){
  Zfit[i]<-mean(c(X1[i],X2[i]))/(fitse1[i])-
        mean(c(X3[i],X4[i]))/(fitse2[i])
  a<-sample(c(1,-1),2, replace=F)
  b<-sample(c(1,-1),2, replace=F)
  zfit[i]<-sum(a*c(X1[i],X2[i]))/2/(fitse1[i]) +
        sum(b*c(X3[i],X4[i]))/2/(fitse2[i])
  }


hist(zfit, nclass=20, prob=T)
hist(Zfit, nclass=20, prob=T)
length((1:n)[abs(Zfit)>4])
[1] 88

sink("E2fitB1.dat")
for(i in 1:n) cat(zfit[i],"\n")
sink()

#fitted mixture model for z's:
x<-seq(-4, 4, by=0.01)
pis1<-(1)
mus1<-(-0.014716)
vs1<-(1.64698)
pis<-c(0.760, 0.240)
mus<-c(-0.041540, 0.070039)
vs<-c(1.31166, 2.69702)

#Fig 2:
postscript("fig/fig2.ps", horizon=F)
par(mfrow=c(2,2))
hist(zfit, nclass=20, prob=T, xlim=c(-4,4), xlab="z2")
lines(x, pis[1]*dnorm(x, mus[1], sqrt(vs[1])) +
         pis[2]*dnorm(x, mus[2], sqrt(vs[2])) , lty=1)
lines(x, dnorm(x, mus1, sqrt(vs1)), lty=2)
title(main="(a)")

z4<-z6<-z8<-rep(0, n)
g0<-length(pis)
for(i in 1:n){
  k1<-sample(1:g0, 1, prob=pis)
  k2<-sample(1:g0, 1, prob=pis)
  k3<-sample(1:g0, 1, prob=pis)
  k4<-sample(1:g0, 1, prob=pis)
  x1<-rnorm(1, mus[k1], sqrt(vs[k1]))
  x2<-rnorm(1, mus[k2], sqrt(vs[k2]))
  x3<-rnorm(1, mus[k3], sqrt(vs[k3]))
  x4<-rnorm(1, mus[k4], sqrt(vs[k4]))
  z4[i]<-(x1+x2)/2
  z6[i]<-(x1+x2+x3)/3
  z8[i]<-(x1+x2+x3+x4)/4
  }
sink("E4fitB1.dat")
for(i in 1:n) cat(z4[i],"\n")
sink()
sink("E6fitB1.dat")
for(i in 1:n) cat(z6[i],"\n")
sink()
sink("E8fitB1.dat")
for(i in 1:n) cat(z8[i],"\n")
sink()

##For z4:
hist(z4, nclass=20, prob=T, xlim=c(-4,4), xlab="z4")
#based on the fitted mixture model:
lines(x, dnorm(x, -0.049356, sqrt(0.822640)))
#based on the implied theoretical model:
yx<-rep(0, length(x))
for(j in 1:g0)
  for(k in 1:g0)
    yx<-yx + pis[j]*pis[k]*dnorm(x, (mus[j]+mus[k])/2, sqrt((vs[j]+vs[k])/4))
lines(x, yx, lty=2)
title(main="(b)")

##For z6:
hist(z6, nclass=20, prob=T, xlim=c(-4,4), xlab="z6")
#based on the fitted mixture model:
lines(x, dnorm(x, -0.064413, sqrt(0.538290)))
#based on the implied theoretical model:
yx<-rep(0, length(x))
for(j in 1:g0)
  for(k in 1:g0)
    for(l in 1:g0)
    yx<-yx + pis[j]*pis[k]*pis[l]*dnorm(x, 
             (mus[j]+mus[k]+mus[l])/3, sqrt((vs[j]+vs[k]+vs[l])/9))
lines(x, yx, lty=2)
title(main="(c)")

##For z8:
hist(z8, nclass=20, prob=T, xlim=c(-4,4), xlab="z8")
#based on the fitted mixture model:
lines(x, dnorm(x, -0.043806, sqrt(0.420603)))
#based on the implied theoretical model:
yx<-rep(0, length(x))
for(j in 1:g0)
  for(k in 1:g0)
    for(l in 1:g0)
    for(m in 1:g0)
    yx<-yx + pis[j]*pis[k]*pis[l]*pis[m]*dnorm(x, 
             (mus[j]+mus[k]+mus[l]+mus[m])/4, 
             sqrt((vs[j]+vs[k]+vs[l]+vs[m])/16))
lines(x, yx, lty=2)
title(main="(d)")

dev.off() #end of Fig 2

#seek cut-off points A and -A such that the two tail prob=alpha for
# a given Normal mixture
seekcut<-function(alpha=0.00090, 
         pis = c(0.760, 0.240),
         mus = c(-0.041540, 0.070039),
         vs = c(1.31166, 2.69702),
           low, up, epsilon=1.0e-6)
{
 g<-length(pis)
 A1<-low
 A2<-up
 pr<-0
 while (abs(pr-alpha)>epsilon){
   A<-(A1+A2)/2
   pr<-0
   for(k in 1:g)
     pr<-pr + pis[k]*pnorm(-A, mus[k], sqrt(vs[k])) +
            pis[k]*(1 - pnorm(A, mus[k], sqrt(vs[k])))
   if (pr<alpha) A2<-A
   if (pr>alpha) A1<-A
   }
 A
}

tailpr<-function(A, 
         pis = c(0.760, 0.240),
         mus = c(-0.041540, 0.070039),
         vs = c(1.31166, 2.69702) )
{
 A<-abs(A)
 g<-length(pis)
   pr<-0
   for(k in 1:g)
     pr<-pr + pis[k]*pnorm(-A, mus[k], sqrt(vs[k])) +
            pis[k]*(1 - pnorm(A, mus[k], sqrt(vs[k])))
 pr
}
 
 tailpr(5)
[1] 0.0005741134
 tailpr(2)
[1] 0.115218
> seekcut(low=2, up=5)
[1] 4.777344
> tailpr(4.777344)
[1] 0.0009008562
> length((1:n)[abs(Zfit)>4.777344 ])
[1] 52

tailpr(5, pis=1, mus=-0.049356, vs=0.822640)
[1] 3.698552e-08
tailpr(2, pis=1, mus=-0.049356, vs=0.822640)
1] 0.02767739
seekcut(low=2, up=5, pis=1, mus=-0.049356, vs=0.822640)
[1] 3.015869
#z6:
tailpr(5, pis=1, mus=-0.064413, vs=0.538290)
1] 1.120196e-11
tailpr(2, pis=1, mus=-0.064413, vs=0.538290)
[1] 0.006615828
seekcut(low=2, up=5, pis=1, mus=-0.064413, vs=0.538290)
[1] 2.445312
#z8:
tailpr(5, pis=1, mus=-0.043806, vs=0.420603)
[1] 1.446099e-14
tailpr(2, pis=1, mus=-0.043806, vs=0.420603)
[1] 0.002091814
seekcut(low=2, up=5, pis=1, mus=-0.043806, vs=0.420603)
[1] 2.158203
seekcut(low=2, up=5, pis=1, mus=-0.043806, vs=0.420603, epsilon=1e-10)
[1] 2.158062

powers<-function(A,d,
         pis = c(0.760, 0.240),
         mus = c(-0.041540, 0.070039),
         vs = c(1.31166, 2.69702) )
{
 A<-abs(A)
 g<-length(pis)
   pr<-0
   for(k in 1:g)
     pr<-pr + pis[k]*pnorm(-A, mus[k]+d, sqrt(vs[k])) +
            pis[k]*(1 - pnorm(A, mus[k]+d, sqrt(vs[k])))
 pr
}

#Fig 3:
#Power curve:
postscript("fig/fig3.ps", horizon=F)
par(mfrow=c(1,1))
d<-seq(-4,4, by=0.01)
plot(0, 0, xlim=c(-4,4), ylim=c(0,1), xlab="d", ylab="Power", type="n") 
lines(d, powers(4.777344, d), lty=1)
lines(d, powers(3.015869, d, pis=1, mus=-0.049356, vs=0.822640), lty=2)
lines(d, powers(2.445312, d, pis=1, mus=-0.064413, vs=0.538290), lty=3)
lines(d, powers(2.158203, d, pis=1, mus=-0.043806, vs=0.420603), lty=4)
legend(-1.8, 0.95, legend=c("2 replicates","4 replicates",
        "6 replicates","8 replicates"), lty=1:4)
dev.off()

####added Feb 5, 2002
seekcut(alpha=0.05/1000, low=2, up=10)
[1] 6.09375
> tailpr(6.09375)
[1] 5.036983e-05
seekcut(alpha=0.05/5000,  low=2, up=10)
[1] 6.75
> tailpr(6.75)
[1] 9.644943e-06
seekcut(alpha=0.05/10000, low=2, up=10)
[1] 7
> tailpr(7)
#z4:
seekcut(alpha=0.05/1000,low=2, up=5, pis=1, mus=-0.049356, vs=0.822640)
[1] 3.6875
seekcut(alpha=0.05/5000,low=2, up=5, pis=1, mus=-0.049356, vs=0.822640)
[1] 4.015625
seekcut(alpha=0.05/10000,low=2, up=5, pis=1, mus=-0.049356, vs=0.822640)
[1] 4.15625
#z6:
seekcut(0.05/1000,low=2, up=5, pis=1, mus=-0.064413, vs=0.538290)
[1] 2.984375
seekcut(0.05/5000,low=2, up=5, pis=1, mus=-0.064413, vs=0.538290)
[1] 3.265625
seekcut(0.05/10000,low=2, up=5, pis=1, mus=-0.064413, vs=0.538290)
[1] 3.359375
#z8:
seekcut(0.05/1000,low=2, up=5, pis=1, mus=-0.043806, vs=0.420603)
[1] 2.638672
seekcut(0.05/5000,low=2, up=5, pis=1, mus=-0.043806, vs=0.420603)
[1] 2.867188
seekcut(0.05/10000,low=2, up=5, pis=1, mus=-0.043806, vs=0.420603)
[1] 2.984375

#Fig4:
#Power curve:
postscript("fig/fig4.ps", horizon=F)
par(mfrow=c(1,1))
d<-seq(-4,4, by=0.01)
plot(0, 0, xlim=c(-4,4), ylim=c(0,1), xlab="d", ylab="Power", type="n")
lines(d, powers(6.09375, d), lty=1)
lines(d, powers(3.6875, d, pis=1, mus=-0.049356, vs=0.822640), lty=2)
lines(d, powers( 2.984375, d, pis=1, mus=-0.064413, vs=0.538290), lty=3)
lines(d, powers(2.638672, d, pis=1, mus=-0.043806, vs=0.420603), lty=4)
legend(-1.8, 0.95, legend=c("2 replicates","4 replicates",
        "6 replicates","8 replicates"), lty=1:4)
dev.off()

#Fig5:
#Power curve:
postscript("fig/fig5.ps", horizon=F)
par(mfrow=c(1,1))
d<-seq(-4,4, by=0.01)
plot(0, 0, xlim=c(-4,4), ylim=c(0,1), xlab="d", ylab="Power", type="n")
lines(d, powers(6.75, d), lty=1)
lines(d, powers(4.015625, d, pis=1, mus=-0.049356, vs=0.822640), lty=2)
lines(d, powers(3.265625, d, pis=1, mus=-0.064413, vs=0.538290), lty=3)
lines(d, powers(2.867188, d, pis=1, mus=-0.043806, vs=0.420603), lty=4)
legend(-1.8, 0.95, legend=c("2 replicates","4 replicates",
        "6 replicates","8 replicates"), lty=1:4)
dev.off()

#Fig6:
#Power curve:
postscript("fig/fig6.ps", horizon=F)
par(mfrow=c(1,1))
d<-seq(-4,4, by=0.01)
plot(0, 0, xlim=c(-4,4), ylim=c(0,1), xlab="d", ylab="Power", type="n")
lines(d, powers(7, d), lty=1)
lines(d, powers(4.15625, d, pis=1, mus=-0.049356, vs=0.822640), lty=2)
lines(d, powers(3.359375, d, pis=1, mus=-0.064413, vs=0.538290), lty=3)
lines(d, powers(2.984375, d, pis=1, mus=-0.043806, vs=0.420603), lty=4)
legend(-1.8, 0.95, legend=c("2 replicates","4 replicates",
        "6 replicates","8 replicates"), lty=1:4)
dev.off()