###################### 5 tests for association, possibly including interactions
######################   in the possible presence of nuisance parameters:
######################        Score, SSU, SSUw, Sum, UminP tests.
###########References:  Pan W, Basu S, Shen X (2011). Adaptive Tests for Detecting Gene-Gene and Gene-Environment Interactions.

######################Wei Pan, Division of Biostatistics, U of Minnesota
######################Email: weip@biostat.umn.edu
#### V1, Feb 16, 2011


library("mvtnorm")

#Input:
#      Y: a vector of 0-1 response, nx1;
#      X: design matrix for covariates (e.g. gene-gene interactions) of
#           interest (i.e. to be tested), nxk;
#      Z: design matrix for nuisance covariates, nxk2;
#Goal:
#      fit model: logit(Pr(Y=1)) = a0+ a1*X+a2*Z ,
#      test H0: a1=0.
#Output: p-values for Score, SSU, SSUw, Sum, UminP

#Examples: suppose that one has two groups of variables in design matrices
#          X1 and X2 respectively, and Y is a binary response vector,
#          1) if one would like to test the marginal effects of X1, then
#             Tests5V1(Y, X=X1);
#          2) if one would like to test the effects of X1 after ajusting for X2,
#             Tests5V1(Y, X=X1, Z=X2);
#          3) if one would like to test the effects of X1 in the presence of 
#             possible interactions with X2, then
#             Tests5V1(Y, X=cbind(X1, X12), Z=X2), 
#             where X12 is the design matrix for interactions;
#          4) if one would like to test on the interaction between X1 and X2
#             after adjusting for the main effects of X1 and X2, then
#             Tests5V1(Y, X=X12, Z=cbind(X1,X2)), 
#             where X12 is the design matrix for interactions;

############################################
############################################
############################################

Tests5V1<-function(Y, X, Z=NULL ){

n<-length(Y)
k<-ncol(X)
k2<-ncol(Z)

#######construction of the score vector and its cov matrix:
if (is.null(Z)){
## NO nuisance parameters:
   Xg <- X
   Xbar<-apply(Xg, 2, mean)
   Xgb<-Xg
   for(i in 1:nrow(Xg))
      Xgb[i,]<-Xg[i,]-Xbar
   ##score vector:
   U<-t(Xg) %*% (Y-mean(Y))
   ##cov of the score stats:
   CovS<- mean(Y)*(1-mean(Y))*(t(Xgb) %*% Xgb)

} else {
## with nuisance parameters:
tdat1<-data.frame(trait=Y, Z)
fit1<-glm(trait~.,family="binomial",data=tdat1)
pis<-fitted.values(fit1)
Us<-matrix(0, nrow=n, ncol=k)
for(i in 1:k){
    tdat2<-data.frame(X1=X[,i], Z)
    fit2<-glm(X1~.,data=tdat2)
    X1mus<-fitted.values(fit2)
    Us[, i]<-(Y - pis)*(X[,i] - X1mus)
    }
U<-apply(Us, 2, sum)
CovS<-matrix(0, nrow=k, ncol=k)
for(i in 1:n)
  CovS<-CovS + Us[i,] %*% t(Us[i,])
}

pSco<-Score(U, CovS)
pSSU<-SumSqU(U, CovS)
pSSUw<-SumSqUw(U, CovS)
pSum<-Sum(U, CovS)
pUminP<-UminP(U, CovS)

return(c(pSco, pSSU, pSSUw, pSum, pUminP))

}  

SumSqU<-function(U, CovS){
    if (is.null(dim(CovS))) {# only one-dim:
        Tscore<- sum(U^2 /CovS)
        if (is.na(Tscore) || is.infinite(Tscore) || is.nan(Tscore)) Tscore<-0
        pTg1<-as.numeric(1-pchisq(Tscore, 1))
        }
     else {
     #it's possible U=0 and Cov(U)=0:
     if (all(abs(U)<1e-20)) pTg1<-1 else{
     Tg1<- t(U) %*% U
     ##distr of Tg1 is sum of cr Chisq_1:
       cr<-eigen(CovS, only.values=TRUE)$values
     ##approximate the distri by alpha Chisq_d + beta:
       alpha1<-sum(cr*cr*cr)/sum(cr*cr)
     beta1<-sum(cr) - (sum(cr*cr)^2)/(sum(cr*cr*cr))
     d1<-(sum(cr*cr)^3)/(sum(cr*cr*cr)^2)
     alpha1<-as.real(alpha1)
     beta1<-as.real(beta1)
     d1<-as.real(d1)
     pTg1<-as.numeric(1-pchisq((Tg1-beta1)/alpha1, d1))
     }
     }
return(pTg1)
}

SumSqUw<-function(U, CovS){
     ###STRANGELY: diag(A) fails if A is 1X1:
# diag(CovS[40,40])
#<0 x 0 matrix>
#> CovS0<-CovS[40,40]
#> CovS0
#[1] 0.24975
#> diag(CovS0)
#<0 x 0 matrix>
#> diag(1)
#     [,1]
#[1,]    1

     if (is.null(dim(CovS))) {# only one-dim:
        Tg2<- sum(U^2 /CovS)
        if (is.na(Tg2) || is.infinite(Tg2) || is.nan(Tg2)) Tg2<-0
        pTg2<-as.numeric(1-pchisq(Tg2, 1))
        }
     else {
     #it's possible U=0 and Cov(U)=0:
     if (all(abs(U)<1e-20)) pTg2<-1 else{
     diagCovS<-diag(CovS)
     diagCovS<-ifelse(diagCovS>1e-10, diagCovS, 1e-10)
     #Tg2<- t(U) %*%  diag(1/diagCovS) %*% U
     Tg2<- sum(U^2 /diagCovS)
     ##distr of Tg1 is sum of cr Chisq_1:
       cr<-eigen(CovS %*% diag(1/diagCovS), only.values=TRUE)$values
     ##approximate the distri by alpha Chisq_d + beta:
       alpha2<-sum(cr*cr*cr)/sum(cr*cr)
       beta2<-sum(cr) - (sum(cr*cr)^2)/(sum(cr*cr*cr))
       d2<-(sum(cr*cr)^3)/(sum(cr*cr*cr)^2)
       alpha2<-as.real(alpha2)
       beta2<-as.real(beta2)
       d2<-as.real(d2)
     pTg2<-as.numeric(1-pchisq((Tg2-beta2)/alpha2, d2))
     }
     }
   pTg2
}

 ##########score test:
Score<-function(U, CovS){
    if (is.null(dim(CovS))) {# only one-dim:
        Tscore<- sum(U^2 /CovS)
        if (is.na(Tscore) || is.infinite(Tscore) || is.nan(Tscore)) Tscore<-0
        pTscore<-as.numeric(1-pchisq(Tscore, 1))
        }
    else {
    ##gInv of CovS:
      CovS.edecomp<-eigen(CovS)
      CovS.rank<-sum(abs(CovS.edecomp$values)> 1e-8)
      inveigen<-ifelse(abs(CovS.edecomp$values) >1e-8, 1/CovS.edecomp$values, 0)
      P<-solve(CovS.edecomp$vectors)
      gInv.CovS<-t(P) %*% diag(inveigen) %*% P
    Tscore<- t(U) %*% gInv.CovS  %*% U
    pTscore<-as.numeric( 1-pchisq(Tscore, CovS.rank) )
    }
pTscore
}

##########SumTest########################
Sum<-function(U, CovS){
     #it's possible U=0 and Cov(U)=0:
     if (all(abs(sum(U))<1e-20)) pTsum<-1 else{
      a<-rep(1, length(U))
      Tsum<- sum(U)/(sqrt(as.numeric(t(a) %*% CovS %*% (a))))
      pTsum <- as.numeric( 1-pchisq(Tsum^2, 1) )
      }
pTsum
}

##########UminP Test########################
UminP<-function(U, CovS){

     if (is.null(dim(CovS))) {# only one-dim:
        Tu<- sum(U^2 /CovS)
        if (is.na(Tu) || is.infinite(Tu) || is.nan(Tu)) Tu<-0
        pTu<-as.numeric(1-pchisq(Tu, 1))
        }
     else{
          ####it's POSSIBLR Ui=0 and CovS[i,i]=0!
          Tu<-as.vector(abs(U)/(sqrt(diag(CovS)) + 1e-20) )
    k<-length(U)
    V <- matrix(0,nrow=k, ncol=k)
    for(i in 1:k){
      for(j in 1:k){
          if (abs(CovS[i,j])>1e-20)
             V[i,j] <- CovS[i,j]/sqrt(CovS[i,i]*CovS[j,j])
          else   V[i,j] <- 1e-20
        }
      }
    pTu <- as.numeric(PowerUniv(Tu,V))
        }

pTu
}

PowerUniv <- function(U,V){
      n <- dim(V)[1]

      x <- as.numeric(max(abs(U)))
      TER <- as.numeric(1-pmvnorm(lower=c(rep(-x,n)),upper=c(rep(x,n)),mean=c(rep(0,n)),sigma=V))

      return(TER)
}