### V1&V2: using permutations to calculate p-values ### for sequentially selected final "grouping" model like the aSum test; ### V2 differs from V1 and the aSum test in its SNP selection: ### assign each SNP to a positive or negative or NULL group, ### as indicated by SNPstatus=1 or -1 or 0, while V1 only assign to ### positive or negative group with SNPstatus=1 or -1. #### A key: use the Score test to compare the models to SPEED up since #### aymptotically the score test and LRT are the same! ### Reference: Basu and Pan (2011). Comparison of Statistical Tests for Disease Association with Rare Variants. Genetic Epi. Downloadable from http://www.sph.umn.edu/biostatistics/research.asp ###### Copyright: Wei Pan, Division of Biostatistics, University of Minnesota ###### weip@biostat.umn.edu 10/22/10 ####INPUT: Y: binary outcome; =0 or 1 #### X: SNPs; nObs by nSNP #### B: # of permuttaions ####OUTPUT: a list of test stats, p-values and SNPstatus for two versions of #### the test: V1 does NOT do SNP selection while V2 does; both #### share a common reg coef for the two non-null groups #### and allows a separate intercept term. seqSumScoP<-function(Y, X, B=500){ n<-nrow(X); nSNP<-ncol(X) dat1<-as.data.frame(cbind(Y, X)) ones<-rep(1, n) signs1<-signs2<-rep(1, nSNP) ##############V1: # eq sum: eqSum1<-eqSum2<-rep(0, n) for(j in 1:nSNP) eqSum1<-eqSum1 + dat1[,j+1] ####rather than using max loglikelihood (or deviance), use the score stat: #res1<-glm.fit(cbind(ones, eqSum1), dat1[,1], family=binomial()) res1<-UscoTest(Y, eqSum1) ###seq check whether to move each SNP to another group: for (k in 1:nSNP){ eqSum1new<-eqSum1 - dat1[,k+1] eqSum2new<-eqSum2 + dat1[,k+1] #res2<-glm.fit(cbind(ones, eqSum1new - eqSum2new), dat1[,1], family=binomial()) res2<-UscoTest(Y, eqSum1new - eqSum2new) #if (res2$dev < res1$dev) { if (res2 > res1) { signs1[k]<-0-1 eqSum1<-eqSum1new eqSum2<-eqSum2new res1<-res2 } } T1<-res1 ####permute Y to get p-values: T1s<-rep(0, B) for(r in 1:B){ Yb<-sample(Y, n) eqSum1<-eqSum2<-rep(0, n) for(j in 1:nSNP) eqSum1<-eqSum1 + dat1[,j+1] #res1<-glm.fit(cbind(ones, eqSum1), Yb, family=binomial()) res1<-UscoTest(Yb, eqSum1) ###seq check whether to move each SNP to another group: for (k in 1:nSNP){ eqSum1new<-eqSum1 - dat1[,k+1] eqSum2new<-eqSum2 + dat1[,k+1] #res2<-glm.fit(cbind(ones, eqSum1new - eqSum2new), Yb, family=binomial()) res2<-UscoTest(Yb, eqSum1new - eqSum2new) if (res2 > res1) { eqSum1<-eqSum1new eqSum2<-eqSum2new res1<-res2 } } T1s[r]<-res1 } ######V2: eqSum1<-eqSum2<-rep(0, n) for(j in 1:nSNP) eqSum1<-eqSum1 + dat1[,j+1] #res1<-glm.fit(cbind(ones, eqSum1), dat1[,1], family=binomial()) res1<-UscoTest(Y, eqSum1) ###seq check whether to move each SNP to another group: for (k in 1:nSNP){ eqSum1new<-eqSum1 - dat1[,k+1] eqSum2new<-eqSum2 + dat1[,k+1] #res2<-glm.fit(cbind(ones, eqSum1new - eqSum2new), dat1[,1], family=binomial()) res2<-UscoTest(Y, eqSum1new - eqSum2new) #res3<-glm.fit(cbind(ones, eqSum1new - eqSum2), dat1[,1], family=binomial()) res3<-UscoTest(Y, eqSum1new - eqSum2) #if (res2$dev <= res1$dev && res2$dev <= res3$dev) { if (res2 >= res1 && res2 >= res3) { signs2[k]<-0-1 eqSum1<-eqSum1new eqSum2<-eqSum2new res1<-res2 } #if (res3$dev <= res1$dev && res3$dev <= res2$dev) { if (res3 >= res1 && res3 >= res2) { signs2[k]<-0 eqSum1<-eqSum1new res1<-res3 } } T2<-res1 ####permute Y to get p-values: T2s<-rep(0, B) for(r in 1:B){ Yb<-sample(Y, n) eqSum1<-eqSum2<-rep(0, n) for(j in 1:nSNP) eqSum1<-eqSum1 + dat1[,j+1] #res1<-glm.fit(cbind(ones, eqSum1), Yb, family=binomial()) res1<-UscoTest(Yb, eqSum1) ###seq check whether to move each SNP to another group: for (k in 1:nSNP){ eqSum1new<-eqSum1 - dat1[,k+1] eqSum2new<-eqSum2 + dat1[,k+1] #res2<-glm.fit(cbind(ones, eqSum1new - eqSum2new), Yb, family=binomial()) res2<-UscoTest(Yb, eqSum1new - eqSum2new) #res3<-glm.fit(cbind(ones, eqSum1new - eqSum2), Yb, family=binomial()) res3<-UscoTest(Yb, eqSum1new - eqSum2) if (res2 >= res1 && res2 >= res3) { eqSum1<-eqSum1new eqSum2<-eqSum2new res1<-res2 } if (res3 >= res1 && res3 >= res2) { eqSum1<-eqSum1new res1<-res3 } } T2s[r]<-res1 } ###########################save results: list(T1stat=T1, T1pvalue=sum(T1s>T1)/B, T2stat=T2, T2pvalue=sum(T2s>T2)/B, SNPstatus1=signs1, SNPstatus2=signs2 ) } ##################Univariate score test: UscoTest<-function(Y,X){ U<-sum((X-mean(X))*(Y-mean(Y))) V<- mean(Y)*(1-mean(Y))*sum( (X-mean(X))^2) if (abs(U)<1e-20) Ts<-0 else Ts<-U^2/V if (is.na(Ts) || is.infinite(Ts) || is.nan(Ts)) Ts<-0 Ts }