###########Adaptive Neyman's tests for association, with possibe interactions ########### in the possible presence of nuisance parameters: ########### aScore, aSSU, aSSUw, aSum, (aUminP--too slow for sim). ###########References: Fan (1996, JASA 91:674-688) # Pan W, Shen X (2011). Adaptive Tests for Association Analysis of Rare Variants. Genet Epi. # Pan W, Basu S, Shen X (2011). Adaptive Tests for Detecting Gene-Gene and Gene-Environment Interactions. ###########Author: Wei Pan, Division of Biostatistics, U of Minnesota ########### Email: weip@biostat imn.edu #### V1, May 25, 2011 library("mvtnorm") #Input: # Y: a vector of 0-1 response, nx1; # X: design matrix for covariates (e.g. gene-gene interactions) of # interest (i.e. to be tested), nxk; # Z=NULL: design matrix for nuisance covariates, nxk2; # useUminP=FALSE: whether to use the (time-consuming) # aUminP-Loc and aUminP-Ord tests; # B=200: # of simulations to calculate p-values for the adaptive tests. # #Goal: # fit model: logit(Pr(Y=1)) = a0+ a1*X+a2*Z , # test H0: a1=0. # #Output: p-values for aScore, aSSU, aSSUw, aSum, (aUminP--too slow) #Examples: suppose that one has two groups of variables in design matrices # X1 and X2 respectively, and Y is a binary response vector, # 1) if one would like to test the marginal effects of X1, then # Tests5V1(Y, X=X1); # 2) if one would like to test the effects of X1 after ajusting for X2, # Tests5V1(Y, X=X1, Z=X2); # 3) if one would like to test the effects of X1 in the presence of # possible interactions with X2, then # Tests5V1(Y, X=cbind(X1, X12), Z=X2), # where X12 is the design matrix for interactions; # 4) if one would like to test on the interaction between X1 and X2 # after adjusting for the main effects of X1 and X2, then # Tests5V1(Y, X=X12, Z=cbind(X1,X2)), # where X12 is the design matrix for interactions; ############################################ ############################################ ############################################ #####Adaptive Neyman's tests implemented as the Score, SSU, SSUw, Sum (& Sum2, Sum2minP), UminP tests: ##### Order the components of the score vector by the input SNP locations, or ##### order by |standardized U|, U^2, U^2/v, U/sqrt(v), U^2/v; the two ordering ##### schemes result in two adaptive tests called aTest-Loc and aTest-Ord, ##### respectively. ##### The OUTPUT is a vector of p-values for ##### aScore-Loc, aScore-Ord, aSSU-Loc, aSSU-Ord, aSSUw-Loc, aSSUw-Ord, ##### aSum-Loc, aSum-Ord, and poissbly UminP-Loc, UminP-Ord; ##### Since the UminP, and accordingly aUminP, use numerical multiple ##### integrations, aUminP is slow; an MC simulation-based method to calculate ##### a p-value for UminP or aUminP is much faster than the integration-based ##### implementtaion here; the former is avaibale as a separate function; ##### default useUminP=FALSE (o/w setting it as TRUE) truns off (or on) running aUminP. ANeymanV1<-function(Y, X, Z=NULL, useUminP=FALSE, B=200){ n<-length(Y) k<-ncol(X) k2<-ncol(Z) #######construction of the score vector and its cov matrix: if (is.null(Z)){ ## NO nuisance parameters: Xg <- X Xbar<-apply(Xg, 2, mean) Xgb<-Xg for(i in 1:nrow(Xg)) Xgb[i,]<-Xg[i,]-Xbar ##score vector: U<-t(Xg) %*% (Y-mean(Y)) ##cov of the score stats: CovS<- mean(Y)*(1-mean(Y))*(t(Xgb) %*% Xgb) } else { ## with nuisance parameters: tdat1<-data.frame(trait=Y, Z) fit1<-glm(trait~.,family="binomial",data=tdat1) pis<-fitted.values(fit1) Us<-matrix(0, nrow=n, ncol=k) for(i in 1:k){ tdat2<-data.frame(X1=X[,i], Z) fit2<-glm(X1~.,data=tdat2) X1mus<-fitted.values(fit2) Us[, i]<-(Y - pis)*(X[,i] - X1mus) } U<-apply(Us, 2, sum) CovS<-matrix(0, nrow=k, ncol=k) for(i in 1:n) CovS<-CovS + Us[i,] %*% t(Us[i,]) } K<-length(U) ##standardized score vector (s.t. it is N(0,I) under null) CovS.edecomp<-eigen(CovS) CovS.rank<-sum(abs(CovS.edecomp$values)> 1e-8) inveigen<-ifelse(abs(CovS.edecomp$values) >1e-10, 1/CovS.edecomp$values, 1e+10) #P<-solve(CovS.edecomp$vectors) P<-t(CovS.edecomp$vectors) gInv.CovS<-t(P) %*% diag(inveigen) %*% P gInv.CovSsqrt<-t(P) %*% diag(sqrt(inveigen)) %*% P svd.CovS<-svd(CovS) CovSsqrt<-svd.CovS$u %*% diag(sqrt(svd.CovS$d)) Ust<- gInv.CovSsqrt %*% U Ust2<- Ust^2 #order Ust's components from large to smalle in absolute values: Uord<-Ust[order(abs(Ust), decreasing=T)] Uord2<-Uord^2 #order U's components from large to small in Z-stats: Ord<-order(U^2, decreasing=T) Ordw<-order(U^2/diag(CovS), decreasing=T) Ords<-order(U/sqrt(diag(CovS)), decreasing=T) Uord<-U[Ord] Uordw<-U[Ordw] Uords<-U[Ords] CovSord<-CovS[Ord, Ord] CovSordw<-CovS[Ordw, Ordw] CovSords<-CovS[Ords, Ords] SCOs<-SCOords<-SSUs<-SSUords<-SSUws<-SSUwords<-Sums<-Sumords<-UminPs<-UminPords<-rep(0, length(U)) for (k in 1:length(U)){ SCOs[k]<-sum(Ust2[1:k]-1)/sqrt(2*k) SCOords[k]<-sum(Uord2[1:k]-1)/sqrt(2*k) SSUs[k]<-SumSqU(U[1:k], CovS[1:k,1:k]) SSUords[k]<-SumSqU(Uord[1:k], CovSord[1:k,1:k]) SSUws[k]<-SumSqUw(U[1:k], CovS[1:k,1:k]) SSUwords[k]<-SumSqUw(Uordw[1:k], CovSordw[1:k,1:k]) Sums[k]<-Sum(U[1:k], CovS[1:k,1:k]) Sumords[k]<-Sum(Uords[1:k], CovSords[1:k,1:k]) if (useUminP){ UminPs[k]<-UminP(U[1:k], CovS[1:k, 1:k]) UminPords[k]<-UminP(Uordw[1:k], CovSordw[1:k, 1:k]) } } SCO1<-max(SCOs) SCOord1<-max(SCOords) SSU1<-min(SSUs) SSUord1<-min(SSUords) SSUw1<-min(SSUws) SSUword1<-min(SSUwords) Sum1<-min(Sums) Sumord1<-min(Sumords) if (useUminP){ UminP1<-min(UminPs) UminPord1<-min(UminPords) } #######simulations: n<-length(Y) SCO0s<-SCO0ords<-SSU0s<-SSU0ords<-SSUw0s<-SSUw0ords<-Sum0s<-Sum0ords<-UminP0s<-UminP0ords<-rep(0, B) for(b in 1:B){ U00<-rnorm(K, 0, 1) U0<-CovSsqrt %*% U00 U0st2<- U00^2 #order Ust's components from large to smalle in absolute values: U0ord<-U00[order(abs(U00), decreasing=T)] U0ord2<-U0ord^2 #order U's components from large to small in Z-stats: Ord<-order(U0^2, decreasing=T) Ordw<-order(U0^2/diag(CovS), decreasing=T) Ords<-order(U0/sqrt(diag(CovS)), decreasing=T) U0ord<-U0[Ord] U0ordw<-U0[Ordw] U0ords<-U0[Ords] CovS0ord<-CovS[Ord, Ord] CovS0ordw<-CovS[Ordw, Ordw] CovS0ords<-CovS[Ords, Ords] for (k in 1:length(U0)){ SCOs[k]<-sum(U0st2[1:k]-1)/sqrt(2*k) SCOords[k]<-sum(U0ord2[1:k]-1)/sqrt(2*k) SSUs[k]<-SumSqU(U0[1:k], CovS[1:k,1:k]) SSUords[k]<-SumSqU(U0ord[1:k], CovS0ord[1:k,1:k]) SSUws[k]<-SumSqUw(U0[1:k], CovS[1:k,1:k]) SSUwords[k]<-SumSqUw(U0ordw[1:k], CovS0ordw[1:k,1:k]) Sums[k]<-Sum(U0[1:k], CovS[1:k,1:k]) Sumords[k]<-Sum(U0ords[1:k], CovS0ords[1:k,1:k]) if (useUminP){ UminPs[k]<-UminP(U0[1:k], CovS[1:k,1:k]) UminPords[k]<-UminP(U0ordw[1:k], CovS0ordw[1:k,1:k]) } } #SSU0s<-SSU0ords<-SSUw0s<-SSUw0ords<-Sum0s<-Sum0ords<-rep(0, length(U)) SCO0s[b]<-max(SCOs) SCO0ords[b]<-max(SCOords) SSU0s[b]<-min(SSUs) SSU0ords[b]<-min(SSUords) SSUw0s[b]<-min(SSUws) SSUw0ords[b]<-min(SSUwords) Sum0s[b]<-min(Sums) Sum0ords[b]<-min(Sumords) if (useUminP){ UminP0s[b]<-min(UminPs) UminP0ords[b]<-min(UminPords) } } if (useUminP) return(c( biggerThan(SCO0s, SCO1)/B, biggerThan(SCO0ords, SCOord1)/B, lessThan(SSU0s, SSU1)/B, lessThan(SSU0ords, SSUord1)/B, lessThan(SSUw0s, SSUw1)/B, lessThan(SSUw0ords, SSUword1)/B, lessThan(Sum0s, Sum1)/B, lessThan(Sum0ords, Sumord1)/B, lessThan(UminP0s, UminP1)/B, lessThan(UminP0ords, UminPord1)/B )) else return(c( biggerThan(SCO0s, SCO1)/B, biggerThan(SCO0ords, SCOord1)/B, lessThan(SSU0s, SSU1)/B, lessThan(SSU0ords, SSUord1)/B, lessThan(SSUw0s, SSUw1)/B, lessThan(SSUw0ords, SSUword1)/B, lessThan(Sum0s, Sum1)/B, lessThan(Sum0ords, Sumord1)/B )) } lessThan<-function(x, x0){ if (x0==0) sum(x <= x0) else sum(xx0) } SumSqU<-function(U, CovS){ if (is.null(dim(CovS))) {# only one-dim: Tscore<- sum(U^2 /CovS) if (is.na(Tscore) || is.infinite(Tscore) || is.nan(Tscore)) Tscore<-0 pTg1<-as.numeric(1-pchisq(Tscore, 1)) } else { #it's possible U=0 and Cov(U)=0: if (all(abs(U)<1e-20)) pTg1<-1 else{ Tg1<- t(U) %*% U ##distr of Tg1 is sum of cr Chisq_1: cr<-eigen(CovS, only.values=TRUE)$values ##approximate the distri by alpha Chisq_d + beta: alpha1<-sum(cr*cr*cr)/sum(cr*cr) beta1<-sum(cr) - (sum(cr*cr)^2)/(sum(cr*cr*cr)) d1<-(sum(cr*cr)^3)/(sum(cr*cr*cr)^2) alpha1<-as.real(alpha1) beta1<-as.real(beta1) d1<-as.real(d1) pTg1<-as.numeric(1-pchisq((Tg1-beta1)/alpha1, d1)) } } return(pTg1) } SumSqUw<-function(U, CovS){ ###STRANGELY: diag(A) fails if A is 1X1: # diag(CovS[40,40]) #<0 x 0 matrix> #> CovS0<-CovS[40,40] #> CovS0 #[1] 0.24975 #> diag(CovS0) #<0 x 0 matrix> #> diag(1) # [,1] #[1,] 1 if (is.null(dim(CovS))) {# only one-dim: Tg2<- sum(U^2 /CovS) if (is.na(Tg2) || is.infinite(Tg2) || is.nan(Tg2)) Tg2<-0 pTg2<-as.numeric(1-pchisq(Tg2, 1)) } else { #it's possible U=0 and Cov(U)=0: #if (all(abs(U)<1e-20) ) pTg2<-1 else{ if (all(abs(U)<1e-20) || all(abs(CovS)<1e-20) ) pTg2<-1 else{ diagCovS<-diag(CovS) diagCovS<-ifelse(diagCovS>1e-10, diagCovS, 1e-10) #Tg2<- t(U) %*% diag(1/diagCovS) %*% U Tg2<- sum(U^2 /diagCovS) ##distr of Tg1 is sum of cr Chisq_1: cr<-eigen(CovS %*% diag(1/diagCovS), only.values=TRUE)$values ##approximate the distri by alpha Chisq_d + beta: alpha2<-sum(cr*cr*cr)/sum(cr*cr) beta2<-sum(cr) - (sum(cr*cr)^2)/(sum(cr*cr*cr)) d2<-(sum(cr*cr)^3)/(sum(cr*cr*cr)^2) alpha2<-as.real(alpha2) beta2<-as.real(beta2) d2<-as.real(d2) pTg2<-as.numeric(1-pchisq((Tg2-beta2)/alpha2, d2)) } } pTg2 } ##########score test: Score<-function(U, CovS){ if (is.null(dim(CovS))) {# only one-dim: Tscore<- sum(U^2 /CovS) if (is.na(Tscore) || is.infinite(Tscore) || is.nan(Tscore)) Tscore<-0 pTscore<-as.numeric(1-pchisq(Tscore, 1)) } else { ##gInv of CovS: CovS.edecomp<-eigen(CovS) CovS.rank<-sum(abs(CovS.edecomp$values)> 1e-8) inveigen<-ifelse(abs(CovS.edecomp$values) >1e-8, 1/CovS.edecomp$values, 0) P<-solve(CovS.edecomp$vectors) gInv.CovS<-t(P) %*% diag(inveigen) %*% P Tscore<- t(U) %*% gInv.CovS %*% U pTscore<-as.numeric( 1-pchisq(Tscore, CovS.rank) ) } pTscore } ##########SumTest######################## Sum<-function(U, CovS){ #it's possible U=0 and Cov(U)=0: if (all(abs(sum(U))<1e-20)) pTsum<-1 else{ a<-rep(1, length(U)) Tsum<- sum(U)/(sqrt(as.numeric(t(a) %*% CovS %*% (a)))) pTsum <- as.numeric( 1-pchisq(Tsum^2, 1) ) } pTsum } ##########UminP Test######################## UminP<-function(U, CovS){ if (is.null(dim(CovS))) {# only one-dim: Tu<- sum(U^2 /CovS) if (is.na(Tu) || is.infinite(Tu) || is.nan(Tu)) Tu<-0 pTu<-as.numeric(1-pchisq(Tu, 1)) } else{ Tu<-as.vector(abs(U)/sqrt(diag(CovS))) k<-length(U) V <- matrix(0,nrow=k, ncol=k) for(i in 1:k){ for(j in 1:k){ if (abs(CovS[i,j])>1e-20) V[i,j] <- CovS[i,j]/sqrt(CovS[i,i]*CovS[j,j]) else Vs[i,j] <- 1e-20 } } pTu <- as.numeric(PowerUniv(Tu,V)) } pTu } PowerUniv <- function(U,V){ n <- dim(V)[1] x <- as.numeric(max(abs(U))) TER <- as.numeric(1-pmvnorm(lower=c(rep(-x,n)),upper=c(rep(x,n)), mean=c(rep(0,n)),sigma=V)) return(TER) }