###################### Tests based on multivariate probit models with R mprobit package ##Ref: Han F, Pan W (2011). A Composite Likelihood Approach to Latent Multivariate Gaussian Modeling of SNP Data with Application to Genetic Association Testing. To appear in Biometrics. ######################modified from the program of Fang HAN (Email: fhan@jhsph.edu) on 3/08/11 library("mvtnorm") library("mprobit") alpha=0.05 # Introduction to input parameters: # Y: disease lables; =0 for controls, =1 for cases; # X: genotype data; each row for a subject, and each column for an SNP; ############################ # Output is the p-values of the eight tests in the order of: # 1-6: F-LRT, F-M3-Wald, F-M4-Wald, F-M4-SSU, F-M4-SSUw, F-M4-UminP ############################################ #######Some back-up programs ############################################ PowerUniv <- function(U,V){ n <- dim(V)[1] x <- as.numeric(max(abs(U))) TER <- as.numeric(1-pmvnorm(lower=c(rep(-x,n)),upper=c(rep(x,n)),mean=c(rep(0,n)),sigma=V)) return(TER) } pvTest <- function(U,V){ CovS <- V #SumSqU: Tg1<- t(U) %*% U ##distr of Tg1 is sum of cr Chisq_1: cr<-eigen(CovS, only.values=TRUE)$values ##approximate the distri by alpha Chisq_d + beta: alpha1<-sum(cr*cr*cr)/sum(cr*cr) beta1<-sum(cr) - (sum(cr*cr)^2)/(sum(cr*cr*cr)) d1<-(sum(cr*cr)^3)/(sum(cr*cr*cr)^2) alpha1<-as.real(alpha1) beta1<-as.real(beta1) d1<-as.real(d1) pTg1<-as.numeric(1-pchisq((Tg1-beta1)/alpha1, d1)) #SumSqUw: Tg2<- t(U) %*% diag(1/diag(CovS)) %*% U ##distr of Tg1 is sum of cr Chisq_1: cr<-eigen(CovS %*% diag(1/diag(CovS)), only.values=TRUE)$values ##approximate the distri by alpha Chisq_d + beta: alpha2<-sum(cr*cr*cr)/sum(cr*cr) beta2<-sum(cr) - (sum(cr*cr)^2)/(sum(cr*cr*cr)) d2<-(sum(cr*cr)^3)/(sum(cr*cr*cr)^2) alpha2<-as.real(alpha2) beta2<-as.real(beta2) d2<-as.real(d2) pTg2<-as.numeric(1-pchisq((Tg2-beta2)/alpha2, d2)) ##########score test: ##gInv of CovS: CovS.edecomp<-eigen(CovS) CovS.rank<-sum(abs(CovS.edecomp$values)> 1e-8) inveigen<-ifelse(abs(CovS.edecomp$values) >1e-8, 1/CovS.edecomp$values, 0) P<-solve(CovS.edecomp$vectors) gInv.CovS<-t(P) %*% diag(inveigen) %*% P Tscore<- t(U) %*% gInv.CovS %*% U pTscore<-as.numeric( 1-pchisq(Tscore, CovS.rank) ) #### V1 %*% t(V1)=CovS: CovS.ev<-ifelse(abs(CovS.edecomp$values) >1e-8, CovS.edecomp$values, 0) V1<-CovS.edecomp$vectors %*% diag(sqrt(abs(CovS.ev))) ##univariate/marginal tests: nSNP<-length(U) Tus<-as.vector(abs(U)/sqrt(diag(CovS))) Vs <- matrix(c(rep(0,nSNP^2)),nrow=nSNP) for(i in 1:nSNP){ for(j in 1:nSNP){ Vs[i,j] <- CovS[i,j]/sqrt(CovS[i,i]*CovS[j,j]) } } pTus <- as.numeric(PowerUniv(Tus,Vs)) return(cbind(pTscore,pTg1,pTg2,pTus)) } probitL <- function(X){ k = dim(X)[2] n = dim(X)[1] y <- as.vector(t(X))+1 id <- NULL for(i in 1:n) id <- c(id,rep(i,k)) x <- NULL M <- matrix(0,k,k) for(i in 2:k) M[i,i]=1 for(i in 1:n) x <- rbind(x,M) stat <- ordprobit.unstr(x,y,id) LF<-stat$negloglik return(LF) } probitL1 <- function(Y,X){ k = dim(X)[2] n = dim(X)[1] y <- as.vector(t(X))+1 id <- NULL YY<-NULL for(i in 1:n) YY <- c(YY,rep(Y[i],k)) for(i in 1:n) id <- c(id,rep(i,k)) x <- NULL M <- matrix(0,k,k) for(i in 2:k) M[i,i]=1 for(i in 1:n) x <- rbind(x,M) x <- cbind(x,YY) stat <- ordprobit.unstr(x,y,id) group <- stat$beta[k+1] gvar <- stat$cov[k+3,k+3] gstat<-group^2/gvar pv <- as.numeric(1-pchisq(gstat,1)) return(pv) } probitL2 <- function(Y,X){ k = dim(X)[2] n = dim(X)[1] y <- as.vector(t(X))+1 id <- NULL YY<-NULL for(i in 1:n) YY <- c(YY,rep(Y[i],k)) for(i in 1:n) id <- c(id,rep(i,k)) x <- NULL M <- matrix(0,k,k) for(i in 2:k) M[i,i]=1 for(i in 1:n) x <- rbind(x,M) YYY<-YY for(i in 2:k) YYY<-cbind(YYY,YY*x[,i]) x <- cbind(x,YYY) stat <- ordprobit.unstr(x,y,id) r <- (k+3):(2*k+2) group <- stat$mle[r] gvar <- stat$cov[r,r] pv <- pvTest(group,gvar) return(pv) } ############################################ ############# The main program ########### ############################################ LVMpv_probit <- function(Y,X){ p=dim(X)[2] f = p*(p-1)/2+1+p X1 <- X[Y==0,] X2 <- X[Y==1,] LF1 <- probitL(X1) LF2 <- probitL(X2) LF <- probitL(X) LRT = 2*(LF-LF1-LF2) FLRT <- as.numeric(1-pchisq(LRT,f)) FM3 <- probitL1(Y,X) FM4 <- probitL2(Y,X) return(c(FLRT,FM3,FM4)) }