######################MC simulation-based adaptive UminP test for 
###################### association, possibly including interactions
######################   in the possible presence of nuisance parameters.
###########References: 
#            Pan W, Basu S, Shen X (2011). Adaptive Tests for Detecting Gene-Gene and Gene-Environment Interactions.

######################Wei Pan, Division of Biostatistics, U of Minnesota
######################Email: weip@biostat imn.edu
#### V1, May 25, 2011


library("mvtnorm")

#Input:
#      Y: a vector of 0-1 response, nx1;
#      X: design matrix for covariates (e.g. gene-gene interactions) of
#           interest (i.e. to be tested), nxk;
#      Z: design matrix for nuisance covariates, nxk2;
#      B: # of simulations to calculate p-values
#
#Goal:
#      fit model: logit(Pr(Y=1)) = a0+ a1*X+a2*Z ,
#      test H0: a1=0.
#
#Output: p-values for MCaUminP based on the natural order (i.e. order given by
#        the columns of X1 and X2, and on the marginal/univariate test stat
#        U^2_j/v_j; that is, the OUTPUT is a vector of two p-values for
#        aUminP-Loc and aUminP-Ord, respectively.


############################################
############################################
############################################

#####Adaptive UminP test based on MC simulations, not numerical 
###### multi-dimensional integrations (as in aUminP in ANeymanV1.r)
#####  since the latter seems to be too slow.

MCaUminPV1<-function(Y, X, Z=NULL, B=200){

n<-length(Y)
k<-ncol(X)
k2<-ncol(Z)

#######construction of the score vector and its cov matrix:
if (is.null(Z)){
## NO nuisance parameters:
   Xg <- X
   Xbar<-apply(Xg, 2, mean)
   Xgb<-Xg
   for(i in 1:nrow(Xg))
      Xgb[i,]<-Xg[i,]-Xbar
   ##score vector:
   U<-t(Xg) %*% (Y-mean(Y))
   ##cov of the score stats:
   CovS<- mean(Y)*(1-mean(Y))*(t(Xgb) %*% Xgb)

} else {
## with nuisance parameters:
tdat1<-data.frame(trait=Y, Z)
fit1<-glm(trait~.,family="binomial",data=tdat1)
pis<-fitted.values(fit1)
Us<-matrix(0, nrow=n, ncol=k)
for(i in 1:k){
    tdat2<-data.frame(X1=X[,i], Z)
    fit2<-glm(X1~.,data=tdat2)
    X1mus<-fitted.values(fit2)
    Us[, i]<-(Y - pis)*(X[,i] - X1mus)
    }
U<-apply(Us, 2, sum)
CovS<-matrix(0, nrow=k, ncol=k)
for(i in 1:n)
  CovS<-CovS + Us[i,] %*% t(Us[i,])
}

K<-length(U)

   ##standardized score vector (s.t. it is N(0,I) under null)
      CovS.edecomp<-eigen(CovS)
      CovS.rank<-sum(abs(CovS.edecomp$values)> 1e-8)
      inveigen<-ifelse(abs(CovS.edecomp$values) >1e-10, 1/CovS.edecomp$values, 1e+10)
      #P<-solve(CovS.edecomp$vectors)
      P<-t(CovS.edecomp$vectors)
      gInv.CovS<-t(P) %*% diag(inveigen) %*% P

      gInv.CovSsqrt<-t(P) %*% diag(sqrt(inveigen))  %*% P

      svd.CovS<-svd(CovS)
      CovSsqrt<-svd.CovS$u %*% diag(sqrt(svd.CovS$d))

   #order U's components from large to small:
      Ord<-order(U^2, decreasing=T)
      Ordw<-order(U^2/diag(CovS), decreasing=T)
      Uord<-U[Ord]
      Uordw<-U[Ordw]
      CovSord<-CovS[Ord, Ord]
      CovSordw<-CovS[Ordw, Ordw]

   UminPs<-UminPords<-UminPps<-UminPpords<-rep(0, length(U))
   for (k in 1:length(U)){
     UminPs[k]<-UminPstat(U[1:k], CovS[1:k, 1:k])
     UminPords[k]<-UminPstat(Uordw[1:k], CovSordw[1:k, 1:k])
     }
#   UminP1<-max(UminPs)
#   UminPord1<-max(UminPords)
 
   #######simulations:
   n<-length(Y)
   UminP0s<-UminP0ords<-UminP0ps<-UminP0pords<-matrix(1, nrow=B, ncol=K)
   for(b in 1:B){
      U00<-rnorm(K, 0, 1)
      U0<-CovSsqrt %*% U00

     #order U's components from large to small in Z-stats:
      Ord<-order(U0^2, decreasing=T)
      Ordw<-order(U0^2/diag(CovS), decreasing=T)
      U0ord<-U0[Ord]
      U0ordw<-U0[Ordw]
      CovS0ord<-CovS[Ord, Ord]
      CovS0ordw<-CovS[Ordw, Ordw]

     for (k in 1:K){
       UminP0s[b,k]<-UminPstat(U0[1:k], CovS[1:k,1:k])
       UminP0ords[b,k]<-UminPstat(U0ordw[1:k], CovS0ordw[1:k,1:k])
       }
     }

###p-value for each k:
for(i in 1:K){
  UminPps[i]<-sum(UminPs[i] <= UminP0s[,i])/B
  UminPpords[i]<-sum(UminPords[i] <= UminP0ords[,i])/B
  }
####Null p-valeus (used to calulate minP):
for(b in 1:B)
for(i in 1:K){
  UminP0ps[b, i]<-sum(UminP0s[b, i] <= UminP0s[-b,i])/(B-1)
  UminP0pords[b, i]<-sum(UminP0ords[b, i] <= UminP0ords[-b,i])/(B-1)
  }
UminP0psMin<-apply(UminP0ps, 1, min)
UminP0pordsMin<-apply(UminP0pords, 1, min)

return(c( 
         lessThan(UminP0psMin, min(UminPps))/B,
         lessThan(UminP0pordsMin, min(UminPpords))/B
        ))
}  

lessThan<-function(x, x0){
  if (x0==0) sum(x <= x0)
  else sum(x<x0)
  }

##########Return UminP Test statistics########################
UminPstat<-function(U, CovS){

     if (is.null(dim(CovS))) {# only one-dim:
        Tu<- sum(U^2 /CovS)
        if (is.na(Tu) || is.infinite(Tu) || is.nan(Tu)) Tu<-0
#        pTu<-as.numeric(1-pchisq(Tu, 1))
        }
     else{
          Tu<-as.vector(abs(U)/sqrt(diag(CovS)))
#    k<-length(U)
#    V <- matrix(0,nrow=k, ncol=k)
#    for(i in 1:k){
#      for(j in 1:k){
#          if (abs(CovS[i,j])>1e-20)
#             V[i,j] <- CovS[i,j]/sqrt(CovS[i,i]*CovS[j,j])
#          else   Vs[i,j] <- 1e-20
#        }
#      }
#    pTu <- as.numeric(PowerUniv(Tu,V))
        }

Tu<-max(Tu)
        if (is.na(Tu) || is.infinite(Tu) || is.nan(Tu)) Tu<-0
Tu
}