######################MC simulation-based adaptive UminP test for ###################### association, possibly including interactions ###################### in the possible presence of nuisance parameters. ###########References: # Pan W, Basu S, Shen X (2011). Adaptive Tests for Detecting Gene-Gene and Gene-Environment Interactions. ######################Wei Pan, Division of Biostatistics, U of Minnesota ######################Email: weip@biostat imn.edu #### V1, May 25, 2011 library("mvtnorm") #Input: # Y: a vector of 0-1 response, nx1; # X: design matrix for covariates (e.g. gene-gene interactions) of # interest (i.e. to be tested), nxk; # Z: design matrix for nuisance covariates, nxk2; # B: # of simulations to calculate p-values # #Goal: # fit model: logit(Pr(Y=1)) = a0+ a1*X+a2*Z , # test H0: a1=0. # #Output: p-values for MCaUminP based on the natural order (i.e. order given by # the columns of X1 and X2, and on the marginal/univariate test stat # U^2_j/v_j; that is, the OUTPUT is a vector of two p-values for # aUminP-Loc and aUminP-Ord, respectively. ############################################ ############################################ ############################################ #####Adaptive UminP test based on MC simulations, not numerical ###### multi-dimensional integrations (as in aUminP in ANeymanV1.r) ##### since the latter seems to be too slow. MCaUminPV1<-function(Y, X, Z=NULL, B=200){ n<-length(Y) k<-ncol(X) k2<-ncol(Z) #######construction of the score vector and its cov matrix: if (is.null(Z)){ ## NO nuisance parameters: Xg <- X Xbar<-apply(Xg, 2, mean) Xgb<-Xg for(i in 1:nrow(Xg)) Xgb[i,]<-Xg[i,]-Xbar ##score vector: U<-t(Xg) %*% (Y-mean(Y)) ##cov of the score stats: CovS<- mean(Y)*(1-mean(Y))*(t(Xgb) %*% Xgb) } else { ## with nuisance parameters: tdat1<-data.frame(trait=Y, Z) fit1<-glm(trait~.,family="binomial",data=tdat1) pis<-fitted.values(fit1) Us<-matrix(0, nrow=n, ncol=k) for(i in 1:k){ tdat2<-data.frame(X1=X[,i], Z) fit2<-glm(X1~.,data=tdat2) X1mus<-fitted.values(fit2) Us[, i]<-(Y - pis)*(X[,i] - X1mus) } U<-apply(Us, 2, sum) CovS<-matrix(0, nrow=k, ncol=k) for(i in 1:n) CovS<-CovS + Us[i,] %*% t(Us[i,]) } K<-length(U) ##standardized score vector (s.t. it is N(0,I) under null) CovS.edecomp<-eigen(CovS) CovS.rank<-sum(abs(CovS.edecomp$values)> 1e-8) inveigen<-ifelse(abs(CovS.edecomp$values) >1e-10, 1/CovS.edecomp$values, 1e+10) #P<-solve(CovS.edecomp$vectors) P<-t(CovS.edecomp$vectors) gInv.CovS<-t(P) %*% diag(inveigen) %*% P gInv.CovSsqrt<-t(P) %*% diag(sqrt(inveigen)) %*% P svd.CovS<-svd(CovS) CovSsqrt<-svd.CovS$u %*% diag(sqrt(svd.CovS$d)) #order U's components from large to small: Ord<-order(U^2, decreasing=T) Ordw<-order(U^2/diag(CovS), decreasing=T) Uord<-U[Ord] Uordw<-U[Ordw] CovSord<-CovS[Ord, Ord] CovSordw<-CovS[Ordw, Ordw] UminPs<-UminPords<-UminPps<-UminPpords<-rep(0, length(U)) for (k in 1:length(U)){ UminPs[k]<-UminPstat(U[1:k], CovS[1:k, 1:k]) UminPords[k]<-UminPstat(Uordw[1:k], CovSordw[1:k, 1:k]) } # UminP1<-max(UminPs) # UminPord1<-max(UminPords) #######simulations: n<-length(Y) UminP0s<-UminP0ords<-UminP0ps<-UminP0pords<-matrix(1, nrow=B, ncol=K) for(b in 1:B){ U00<-rnorm(K, 0, 1) U0<-CovSsqrt %*% U00 #order U's components from large to small in Z-stats: Ord<-order(U0^2, decreasing=T) Ordw<-order(U0^2/diag(CovS), decreasing=T) U0ord<-U0[Ord] U0ordw<-U0[Ordw] CovS0ord<-CovS[Ord, Ord] CovS0ordw<-CovS[Ordw, Ordw] for (k in 1:K){ UminP0s[b,k]<-UminPstat(U0[1:k], CovS[1:k,1:k]) UminP0ords[b,k]<-UminPstat(U0ordw[1:k], CovS0ordw[1:k,1:k]) } } ###p-value for each k: for(i in 1:K){ UminPps[i]<-sum(UminPs[i] <= UminP0s[,i])/B UminPpords[i]<-sum(UminPords[i] <= UminP0ords[,i])/B } ####Null p-valeus (used to calulate minP): for(b in 1:B) for(i in 1:K){ UminP0ps[b, i]<-sum(UminP0s[b, i] <= UminP0s[-b,i])/(B-1) UminP0pords[b, i]<-sum(UminP0ords[b, i] <= UminP0ords[-b,i])/(B-1) } UminP0psMin<-apply(UminP0ps, 1, min) UminP0pordsMin<-apply(UminP0pords, 1, min) return(c( lessThan(UminP0psMin, min(UminPps))/B, lessThan(UminP0pordsMin, min(UminPpords))/B )) } lessThan<-function(x, x0){ if (x0==0) sum(x <= x0) else sum(x1e-20) # V[i,j] <- CovS[i,j]/sqrt(CovS[i,i]*CovS[j,j]) # else Vs[i,j] <- 1e-20 # } # } # pTu <- as.numeric(PowerUniv(Tu,V)) } Tu<-max(Tu) if (is.na(Tu) || is.infinite(Tu) || is.nan(Tu)) Tu<-0 Tu }