###SumSqU and SumSqUw tests for main-effects and interactions for gene 1 ### AND INCLUDING the main effects of gene 2 (differing from Wang et al (2009)). ### WP, 12/17/08 #########CovS0 is sometime singular; replace its inverse by G-inverse. ######### WP 1/5/09 ### Copyright: Wei Pan, 12/17/08 ### weip@biostat.umn.edu ### http://www.biostat.umn.edu/~weip/ #input: sdat: a data-frame with components named # 1) trait: a vector of 0's and 1's for the disease indicator; # 2) Ro: genotypes for gene 1 # 3) Rt: genotypes for gene 2 # B: # of simulations used to estimate the p-value for UminP; default=1000 # output: pvalues for # 1) main-effects + (2-way) interactions model: SumSqU, SumSqUw, score, (simulation-based) UminP, (Bonferonni-adjusted) UminP # 2) main-effects model: SumSqU, SumSqUw, score, (simulation-based) UminP, (Bonferonni-adjusted) UminP SumSqUsInter<-function(sdat, B=1000){ n<-dim(sdat)[1] R1<-sdat[,substring(names(sdat),1,2)=="Ro"] mm1<-dim(R1)[2] R2<-sdat[,substring(names(sdat),1,2)=="Rt"] mm2<-dim(R2)[2] # main-effects plus interactions with gene 1: X<-R1 for(i in 1:mm1) for(j in 1:mm2) X<-cbind(X, R1[,i]*R2[,j]) # standardize X: X0<-cbind(X, R2) Xc<-apply(X, 2, function(x)(x-mean(x)) ) X0c<-apply(X0, 2, function(x)(x-mean(x)) ) Y<-sdat$trait #fit with gene 2's genotype: tdat<-data.frame(trait=sdat$trait,R2) tfit<-glm(trait~.,family="binomial",data=tdat) mu<-fitted.values(tfit) # score stat: U<-t(X) %*% (Y-mu) ######Cov of the WHOLE score stat vector: CovS0<-t(X0c) %*% diag(mu*(1-mu)) %*% X0c ##cov of the score stats: #CovS<- solve(solve(CovS0)[1:(mm1+mm1*mm2), 1:(mm1+mm1*mm2)]) ##gInv of CovS: CovS0.edecomp<-eigen(CovS0) CovS0.rank<-sum(abs(CovS0.edecomp$values)> 1e-8) inveigen0<-ifelse(abs(CovS0.edecomp$values) >1e-8, 1/CovS0.edecomp$values, 0) P0<-solve(CovS0.edecomp$vectors) gInv.CovS0<-t(P0) %*% diag(inveigen0) %*% P0 g.CovS<-gInv.CovS0[1:(mm1+mm1*mm2), 1:(mm1+mm1*mm2)] ##gInv of CovS: gCovS.edecomp<-eigen(g.CovS) ginveigen<-ifelse(abs(gCovS.edecomp$values) >1e-8, 1/gCovS.edecomp$values, 0) gP<-solve(gCovS.edecomp$vectors) CovS<-t(gP) %*% diag(ginveigen) %*% gP #unweighted one: Tg1<- t(U) %*% U ###cov of the score stats: No, not the below, which ignored the nuisance para's! #CovS<- t(Xc) %*% diag(mu*(1-mu)) %*% Xc ##distr of Tg1 is sum of cr Chisq_1: cr<-eigen(CovS, only.values=TRUE)$values ##approximate the distri by alpha Chisq_d + beta: alpha1<-sum(cr*cr*cr)/sum(cr*cr) beta1<-sum(cr) - (sum(cr*cr)^2)/(sum(cr*cr*cr)) d1<-(sum(cr*cr)^3)/(sum(cr*cr*cr)^2) alpha1<-as.real(alpha1) beta1<-as.real(beta1) d1<-as.real(d1) pSumSqU<-(1-pchisq((Tg1-beta1)/alpha1, d1)) #paraTg1<-c(alpha, beta, d) #weighted one: Tg2<- t(U) %*% diag(1/diag(CovS)) %*% U ##distr of Tg1 is sum of cr Chisq_1: cr<-eigen(CovS %*% diag(1/diag(CovS)), only.values=TRUE)$values ##approximate the distri by alpha Chisq_d + beta: alpha2<-sum(cr*cr*cr)/sum(cr*cr) beta2<-sum(cr) - (sum(cr*cr)^2)/(sum(cr*cr*cr)) d2<-(sum(cr*cr)^3)/(sum(cr*cr*cr)^2) alpha2<-as.real(alpha2) beta2<-as.real(beta2) d2<-as.real(d2) pSumSqUw<-(1-pchisq((Tg2-beta2)/alpha2, d2)) #paraTg2<-c(alpha, beta, d) ##standard score test: ##gInv of CovS: CovS.edecomp<-eigen(CovS) CovS.rank<-sum(abs(CovS.edecomp$values)> 1e-8) inveigen<-ifelse(abs(CovS.edecomp$values) >1e-8, 1/CovS.edecomp$values, 0) P<-solve(CovS.edecomp$vectors) gInv.CovS<-t(P) %*% diag(inveigen) %*% P # CovS %*% gInv.CovS %*% CovS == CovS Tscore<- t(U) %*% gInv.CovS %*% U pTscore<-as.numeric( 1-pchisq(Tscore, CovS.rank) ) ## score test for ONLY main-effects: U1<-U[1:(mm1)] CovS1<-CovS[1:(mm1), 1:(mm1)] ##gInv of CovS1: CovS1.edecomp<-eigen(CovS1) CovS1.rank<-sum(abs(CovS1.edecomp$values)> 1e-8) inveigen<-ifelse(abs(CovS1.edecomp$values) >1e-8, 1/CovS1.edecomp$values, 0) P<-solve(CovS1.edecomp$vectors) gInv.CovS1<-t(P) %*% diag(inveigen) %*% P # CovS1 %*% gInv.CovS1 %*% CovS1 == CovS1 Tscore1<- t(U1) %*% gInv.CovS1 %*% U1 pTscore1<-as.numeric( 1-pchisq(Tscore1, CovS1.rank) ) ## SumSqUs for ONLY main-effects: #unweighted one: Tg1<- t(U1) %*% U1 ##distr of Tg1 is sum of cr Chisq_1: cr<-eigen(CovS1, only.values=TRUE)$values ##approximate the distri by alpha Chisq_d + beta: alpha1<-sum(cr*cr*cr)/sum(cr*cr) beta1<-sum(cr) - (sum(cr*cr)^2)/(sum(cr*cr*cr)) d1<-(sum(cr*cr)^3)/(sum(cr*cr*cr)^2) alpha1<-as.real(alpha1) beta1<-as.real(beta1) d1<-as.real(d1) pSumSqU1<-(1-pchisq((Tg1-beta1)/alpha1, d1)) #paraTg1<-c(alpha, beta, d) #weighted one: Tg2<- t(U1) %*% diag(1/diag(CovS1)) %*% U1 ##distr of Tg1 is sum of cr Chisq_1: cr<-eigen(CovS1 %*% diag(1/diag(CovS1)), only.values=TRUE)$values ##approximate the distri by alpha Chisq_d + beta: alpha2<-sum(cr*cr*cr)/sum(cr*cr) beta2<-sum(cr) - (sum(cr*cr)^2)/(sum(cr*cr*cr)) d2<-(sum(cr*cr)^3)/(sum(cr*cr*cr)^2) alpha2<-as.real(alpha2) beta2<-as.real(beta2) d2<-as.real(d2) pSumSqUw1<-(1-pchisq((Tg2-beta2)/alpha2, d2)) #paraTg2<-c(alpha, beta, d) #### V1 %*% t(V1)=CovS: #CovS.edecomp<-eigen(CovS) CovS.ev<-ifelse(abs(CovS.edecomp$values) >1e-8, CovS.edecomp$values, 0) V1<-CovS.edecomp$vectors %*% diag(sqrt(CovS.ev)) ##univariate/marginal tests: Main-effect or All: Tus<-as.vector(U^2/diag(CovS)) pTus<- 1-pchisq(Tus, 1) pUmin<-min(pTus) pUminM<-min(pTus[1:(mm1)]) ############################MC simulations to estimate the p-values: NMCsim<-B pUmins<-pUminMs<-rep(1, NMCsim) for(MCsim in 1:NMCsim){ #########SumSqUs: Us<-rnorm(nrow(U)) Us<-V1 %*% Us Tus<-as.vector(Us^2/diag(CovS)) pTuss<- 1-pchisq(Tus, 1) pUmins[MCsim]<-min(pTuss) pUminMs[MCsim]<-min(pTuss[1:(mm1)]) } return(c(pSumSqU, pSumSqUw, pTscore, pUmin*length(U), sum(pUmins