###########Adaptive Neyman's tests for association, with possibe interactions
###########   in the possible presence of nuisance parameters:
###########  aScore, aSSU, aSSUw, aSum, (aUminP--too slow for sim).

###########References: Fan (1996, JASA 91:674-688)
#            Pan W, Shen X (2011). Adaptive Tests for Association Analysis of Rare Variants. Genet Epi.
#            Pan W, Basu S, Shen X (2011). Adaptive Tests for Detecting Gene-Gene and Gene-Environment Interactions.
###########Author: Wei Pan, Division of Biostatistics, U of Minnesota
###########        Email: weip@biostat imn.edu
#### V1, May 25, 2011


library("mvtnorm")

#Input:
#      Y: a vector of 0-1 response, nx1;
#      X: design matrix for covariates (e.g. gene-gene interactions) of
#           interest (i.e. to be tested), nxk;
#      Z=NULL: design matrix for nuisance covariates, nxk2;
#      useUminP=FALSE: whether to use the (time-consuming) 
#                      aUminP-Loc and aUminP-Ord tests;
#      B=200: # of simulations to calculate p-values for the adaptive tests.
#
#Goal:
#      fit model: logit(Pr(Y=1)) = a0+ a1*X+a2*Z ,
#      test H0: a1=0.
#
#Output: p-values for aScore, aSSU, aSSUw, aSum, (aUminP--too slow)

#Examples: suppose that one has two groups of variables in design matrices
#          X1 and X2 respectively, and Y is a binary response vector,
#          1) if one would like to test the marginal effects of X1, then
#             Tests5V1(Y, X=X1);
#          2) if one would like to test the effects of X1 after ajusting for X2,
#             Tests5V1(Y, X=X1, Z=X2);
#          3) if one would like to test the effects of X1 in the presence of 
#             possible interactions with X2, then
#             Tests5V1(Y, X=cbind(X1, X12), Z=X2), 
#             where X12 is the design matrix for interactions;
#          4) if one would like to test on the interaction between X1 and X2
#             after adjusting for the main effects of X1 and X2, then
#             Tests5V1(Y, X=X12, Z=cbind(X1,X2)), 
#             where X12 is the design matrix for interactions;

############################################
############################################
############################################

#####Adaptive Neyman's tests implemented as the Score, SSU, SSUw, Sum (& Sum2, Sum2minP), UminP tests:
##### Order the components of the score vector by the input SNP locations, or
##### order by |standardized U|, U^2, U^2/v, U/sqrt(v), U^2/v; the two ordering
##### schemes result in two adaptive tests called aTest-Loc and aTest-Ord,
##### respectively.
##### The OUTPUT is a vector of p-values for
#####  aScore-Loc, aScore-Ord, aSSU-Loc, aSSU-Ord, aSSUw-Loc, aSSUw-Ord,
#####  aSum-Loc, aSum-Ord, and poissbly UminP-Loc, UminP-Ord;
##### Since the UminP, and accordingly aUminP, use numerical multiple 
##### integrations, aUminP is slow; an MC simulation-based method to calculate
##### a p-value for UminP or aUminP is much faster than the integration-based
##### implementtaion here; the former is avaibale as a separate function;
##### default useUminP=FALSE (o/w setting it as TRUE) truns off (or on) running aUminP.

ANeymanV1<-function(Y, X, Z=NULL, useUminP=FALSE, B=200){

n<-length(Y)
k<-ncol(X)
k2<-ncol(Z)

#######construction of the score vector and its cov matrix:
if (is.null(Z)){
## NO nuisance parameters:
   Xg <- X
   Xbar<-apply(Xg, 2, mean)
   Xgb<-Xg
   for(i in 1:nrow(Xg))
      Xgb[i,]<-Xg[i,]-Xbar
   ##score vector:
   U<-t(Xg) %*% (Y-mean(Y))
   ##cov of the score stats:
   CovS<- mean(Y)*(1-mean(Y))*(t(Xgb) %*% Xgb)

} else {
## with nuisance parameters:
tdat1<-data.frame(trait=Y, Z)
fit1<-glm(trait~.,family="binomial",data=tdat1)
pis<-fitted.values(fit1)
Us<-matrix(0, nrow=n, ncol=k)
for(i in 1:k){
    tdat2<-data.frame(X1=X[,i], Z)
    fit2<-glm(X1~.,data=tdat2)
    X1mus<-fitted.values(fit2)
    Us[, i]<-(Y - pis)*(X[,i] - X1mus)
    }
U<-apply(Us, 2, sum)
CovS<-matrix(0, nrow=k, ncol=k)
for(i in 1:n)
  CovS<-CovS + Us[i,] %*% t(Us[i,])
}

K<-length(U)

   ##standardized score vector (s.t. it is N(0,I) under null)
      CovS.edecomp<-eigen(CovS)
      CovS.rank<-sum(abs(CovS.edecomp$values)> 1e-8)
      inveigen<-ifelse(abs(CovS.edecomp$values) >1e-10, 1/CovS.edecomp$values, 1e+10)
      #P<-solve(CovS.edecomp$vectors)
      P<-t(CovS.edecomp$vectors)
      gInv.CovS<-t(P) %*% diag(inveigen) %*% P

      gInv.CovSsqrt<-t(P) %*% diag(sqrt(inveigen))  %*% P

      svd.CovS<-svd(CovS)
      CovSsqrt<-svd.CovS$u %*% diag(sqrt(svd.CovS$d))

      Ust<- gInv.CovSsqrt %*% U
      Ust2<- Ust^2
   #order Ust's components from large to smalle in absolute values:
      Uord<-Ust[order(abs(Ust), decreasing=T)]
      Uord2<-Uord^2

   #order U's components from large to small in Z-stats:
      Ord<-order(U^2, decreasing=T)
      Ordw<-order(U^2/diag(CovS), decreasing=T)
      Ords<-order(U/sqrt(diag(CovS)), decreasing=T)
      Uord<-U[Ord]
      Uordw<-U[Ordw]
      Uords<-U[Ords]
      CovSord<-CovS[Ord, Ord]
      CovSordw<-CovS[Ordw, Ordw]
      CovSords<-CovS[Ords, Ords]

   SCOs<-SCOords<-SSUs<-SSUords<-SSUws<-SSUwords<-Sums<-Sumords<-UminPs<-UminPords<-rep(0, length(U))
   for (k in 1:length(U)){
     SCOs[k]<-sum(Ust2[1:k]-1)/sqrt(2*k)
     SCOords[k]<-sum(Uord2[1:k]-1)/sqrt(2*k)
     SSUs[k]<-SumSqU(U[1:k], CovS[1:k,1:k])
     SSUords[k]<-SumSqU(Uord[1:k], CovSord[1:k,1:k])
     SSUws[k]<-SumSqUw(U[1:k], CovS[1:k,1:k])
     SSUwords[k]<-SumSqUw(Uordw[1:k], CovSordw[1:k,1:k])
     Sums[k]<-Sum(U[1:k], CovS[1:k,1:k])
     Sumords[k]<-Sum(Uords[1:k], CovSords[1:k,1:k])
     if (useUminP){
     UminPs[k]<-UminP(U[1:k], CovS[1:k, 1:k])
     UminPords[k]<-UminP(Uordw[1:k], CovSordw[1:k, 1:k])
     }
     }
   SCO1<-max(SCOs)
   SCOord1<-max(SCOords)
   SSU1<-min(SSUs)
   SSUord1<-min(SSUords)
   SSUw1<-min(SSUws)
   SSUword1<-min(SSUwords)
   Sum1<-min(Sums)
   Sumord1<-min(Sumords)
   if (useUminP){
   UminP1<-min(UminPs)
   UminPord1<-min(UminPords)
   }
 
   #######simulations:
   n<-length(Y)
   SCO0s<-SCO0ords<-SSU0s<-SSU0ords<-SSUw0s<-SSUw0ords<-Sum0s<-Sum0ords<-UminP0s<-UminP0ords<-rep(0, B)
   for(b in 1:B){
      U00<-rnorm(K, 0, 1)
      U0<-CovSsqrt %*% U00

      U0st2<- U00^2
   #order Ust's components from large to smalle in absolute values:
      U0ord<-U00[order(abs(U00), decreasing=T)]
      U0ord2<-U0ord^2

     #order U's components from large to small in Z-stats:
      Ord<-order(U0^2, decreasing=T)
      Ordw<-order(U0^2/diag(CovS), decreasing=T)
      Ords<-order(U0/sqrt(diag(CovS)), decreasing=T)
      U0ord<-U0[Ord]
      U0ordw<-U0[Ordw]
      U0ords<-U0[Ords]
      CovS0ord<-CovS[Ord, Ord]
      CovS0ordw<-CovS[Ordw, Ordw]
      CovS0ords<-CovS[Ords, Ords]

     for (k in 1:length(U0)){
       SCOs[k]<-sum(U0st2[1:k]-1)/sqrt(2*k)
       SCOords[k]<-sum(U0ord2[1:k]-1)/sqrt(2*k)
       SSUs[k]<-SumSqU(U0[1:k], CovS[1:k,1:k])
       SSUords[k]<-SumSqU(U0ord[1:k], CovS0ord[1:k,1:k])
       SSUws[k]<-SumSqUw(U0[1:k], CovS[1:k,1:k])
       SSUwords[k]<-SumSqUw(U0ordw[1:k], CovS0ordw[1:k,1:k])
       Sums[k]<-Sum(U0[1:k], CovS[1:k,1:k])
       Sumords[k]<-Sum(U0ords[1:k], CovS0ords[1:k,1:k])
       if (useUminP){
       UminPs[k]<-UminP(U0[1:k], CovS[1:k,1:k])
       UminPords[k]<-UminP(U0ordw[1:k], CovS0ordw[1:k,1:k])
       }
       }
   #SSU0s<-SSU0ords<-SSUw0s<-SSUw0ords<-Sum0s<-Sum0ords<-rep(0, length(U))
     SCO0s[b]<-max(SCOs)
     SCO0ords[b]<-max(SCOords)
     SSU0s[b]<-min(SSUs)
     SSU0ords[b]<-min(SSUords)
     SSUw0s[b]<-min(SSUws)
     SSUw0ords[b]<-min(SSUwords)
     Sum0s[b]<-min(Sums)
     Sum0ords[b]<-min(Sumords)
     if (useUminP){
     UminP0s[b]<-min(UminPs)
     UminP0ords[b]<-min(UminPords)
     }
     }

if (useUminP) return(c( 
         biggerThan(SCO0s, SCO1)/B,
         biggerThan(SCO0ords, SCOord1)/B,
         lessThan(SSU0s, SSU1)/B,
         lessThan(SSU0ords, SSUord1)/B,
         lessThan(SSUw0s, SSUw1)/B,
         lessThan(SSUw0ords, SSUword1)/B,
         lessThan(Sum0s, Sum1)/B,
         lessThan(Sum0ords, Sumord1)/B,
         lessThan(UminP0s, UminP1)/B,
         lessThan(UminP0ords, UminPord1)/B
        )) else return(c( 
         biggerThan(SCO0s, SCO1)/B,
         biggerThan(SCO0ords, SCOord1)/B,
         lessThan(SSU0s, SSU1)/B,
         lessThan(SSU0ords, SSUord1)/B,
         lessThan(SSUw0s, SSUw1)/B,
         lessThan(SSUw0ords, SSUword1)/B,
         lessThan(Sum0s, Sum1)/B,
         lessThan(Sum0ords, Sumord1)/B
         ))
}  

lessThan<-function(x, x0){
  if (x0==0) sum(x <= x0)
  else sum(x<x0)
  }

biggerThan<-function(x, x0){
  sum(x>x0)
  }

SumSqU<-function(U, CovS){
    if (is.null(dim(CovS))) {# only one-dim:
        Tscore<- sum(U^2 /CovS)
        if (is.na(Tscore) || is.infinite(Tscore) || is.nan(Tscore)) Tscore<-0
        pTg1<-as.numeric(1-pchisq(Tscore, 1))
        }
     else {
     #it's possible U=0 and Cov(U)=0:
     if (all(abs(U)<1e-20)) pTg1<-1 else{
     Tg1<- t(U) %*% U
     ##distr of Tg1 is sum of cr Chisq_1:
       cr<-eigen(CovS, only.values=TRUE)$values
     ##approximate the distri by alpha Chisq_d + beta:
       alpha1<-sum(cr*cr*cr)/sum(cr*cr)
     beta1<-sum(cr) - (sum(cr*cr)^2)/(sum(cr*cr*cr))
     d1<-(sum(cr*cr)^3)/(sum(cr*cr*cr)^2)
     alpha1<-as.real(alpha1)
     beta1<-as.real(beta1)
     d1<-as.real(d1)
     pTg1<-as.numeric(1-pchisq((Tg1-beta1)/alpha1, d1))
     }
     }
return(pTg1)
}

SumSqUw<-function(U, CovS){
     ###STRANGELY: diag(A) fails if A is 1X1:
# diag(CovS[40,40])
#<0 x 0 matrix>
#> CovS0<-CovS[40,40]
#> CovS0
#[1] 0.24975
#> diag(CovS0)
#<0 x 0 matrix>
#> diag(1)
#     [,1]
#[1,]    1

     if (is.null(dim(CovS))) {# only one-dim:
        Tg2<- sum(U^2 /CovS)
        if (is.na(Tg2) || is.infinite(Tg2) || is.nan(Tg2)) Tg2<-0
        pTg2<-as.numeric(1-pchisq(Tg2, 1))
        }
     else {
     #it's possible U=0 and Cov(U)=0:
     #if (all(abs(U)<1e-20)  ) pTg2<-1 else{
     if (all(abs(U)<1e-20) || all(abs(CovS)<1e-20) ) pTg2<-1 else{
     diagCovS<-diag(CovS)
     diagCovS<-ifelse(diagCovS>1e-10, diagCovS, 1e-10)
     #Tg2<- t(U) %*%  diag(1/diagCovS) %*% U
     Tg2<- sum(U^2 /diagCovS)
     ##distr of Tg1 is sum of cr Chisq_1:
       cr<-eigen(CovS %*% diag(1/diagCovS), only.values=TRUE)$values
     ##approximate the distri by alpha Chisq_d + beta:
       alpha2<-sum(cr*cr*cr)/sum(cr*cr)
       beta2<-sum(cr) - (sum(cr*cr)^2)/(sum(cr*cr*cr))
       d2<-(sum(cr*cr)^3)/(sum(cr*cr*cr)^2)
       alpha2<-as.real(alpha2)
       beta2<-as.real(beta2)
       d2<-as.real(d2)
     pTg2<-as.numeric(1-pchisq((Tg2-beta2)/alpha2, d2))
     }
     }
   pTg2
}

 ##########score test:
Score<-function(U, CovS){
    if (is.null(dim(CovS))) {# only one-dim:
        Tscore<- sum(U^2 /CovS)
        if (is.na(Tscore) || is.infinite(Tscore) || is.nan(Tscore)) Tscore<-0
        pTscore<-as.numeric(1-pchisq(Tscore, 1))
        }
    else {
    ##gInv of CovS:
      CovS.edecomp<-eigen(CovS)
      CovS.rank<-sum(abs(CovS.edecomp$values)> 1e-8)
      inveigen<-ifelse(abs(CovS.edecomp$values) >1e-8, 1/CovS.edecomp$values, 0)
      P<-solve(CovS.edecomp$vectors)
      gInv.CovS<-t(P) %*% diag(inveigen) %*% P
    Tscore<- t(U) %*% gInv.CovS  %*% U
    pTscore<-as.numeric( 1-pchisq(Tscore, CovS.rank) )
    }
pTscore
}

##########SumTest########################
Sum<-function(U, CovS){
     #it's possible U=0 and Cov(U)=0:
     if (all(abs(sum(U))<1e-20)) pTsum<-1 else{
      a<-rep(1, length(U))
      Tsum<- sum(U)/(sqrt(as.numeric(t(a) %*% CovS %*% (a))))
      pTsum <- as.numeric( 1-pchisq(Tsum^2, 1) )
      }
pTsum
}

##########UminP Test########################
UminP<-function(U, CovS){

     if (is.null(dim(CovS))) {# only one-dim:
        Tu<- sum(U^2 /CovS)
        if (is.na(Tu) || is.infinite(Tu) || is.nan(Tu)) Tu<-0
        pTu<-as.numeric(1-pchisq(Tu, 1))
        }
     else{
          Tu<-as.vector(abs(U)/sqrt(diag(CovS)))
    k<-length(U)
    V <- matrix(0,nrow=k, ncol=k)
    for(i in 1:k){
      for(j in 1:k){
          if (abs(CovS[i,j])>1e-20)
             V[i,j] <- CovS[i,j]/sqrt(CovS[i,i]*CovS[j,j])
          else   Vs[i,j] <- 1e-20
        }
      }
    pTu <- as.numeric(PowerUniv(Tu,V))
        }

pTu
}

PowerUniv <- function(U,V){
      n <- dim(V)[1]

      x <- as.numeric(max(abs(U)))
      TER <- as.numeric(1-pmvnorm(lower=c(rep(-x,n)),upper=c(rep(x,n)),
                                  mean=c(rep(0,n)),sigma=V))

      return(TER)
}