# Example code for the following paper: # Park JY, Wu C, Basu S, McGue M, Pan W (2017). Adaptive SNP-set Association Testing in Generalized Linear Mixed Models with Application to Family Studies. Submitted to Behav Genet. # Author and contact: Jun Young Park # 8/2/2017 # Function to generate the score vector and its covariance matrix from GLMM using GMMAT glmm.score<-function(fit0, G){ U = t(G)%*%fit0$P%*%fit0$Y V = t(G)%*%fit0$P%*%G return(list(U=U, V=V)) } #fit0 : a generalized linear mixed model fitted under the null hypothesis (beta.1, ..., beta.p)=0 using the "glmmkin" function of the GMMAT R package, where p is the number of variants. #G : a (n x p) genotype matrix. n is the number of samples and the order must correspond with fit0. ######Example###### #install the required packages library(GMMAT) library(aSPU2) #generate example data ##Data consists of 100 individuals: 50 families with 2 monozygotic twins in each family set.seed(1131) n<-100 p<-10 age<-rep(rbinom(n/2, 70, 0.5),2) gender<-rep(rbinom(n/2, 1, 0.5),2) smoke<-rbinom(n, 1, 0.5) ##Generate Genotypes G<-matrix(NA, ncol=p, nrow=n) for( i in 1:n/2){ G[i,]<-G[i+n/2,]<-rbinom(p, 2, 0.2) } ##Set effect sizes for generating phenotypes alpha<-runif(4, 0, 1 ) # effect sizes of the covariates beta<-runif(p, -0.5, 0.5) # effect sizes of the genotypes ##Make phenotypes mydata<-data.frame(age,gender,smoke) mydata$pheno<-cbind(1,age,gender,smoke)%*%alpha+G%*%beta+rnorm(n) ##Generate a relationship matrix: This example considers familial relationship only FRM<-matrix(1,2,2)%x%diag(n/2) rownames(FRM)<-colnames(FRM)<-paste0("id", 1:100) #Fit GLMM using the GMMAT package fit0<-glmmkin(fixed=pheno~smoke+age+gender,data=mydata, kins=FRM, family=gaussian("identity")) #Estimated coefficients for covariates (alpha) fit0$coefficients # [1] -2.4642907 0.2800777 0.1985169 1.1653376 #Estimated variance components fit0$theta # [1] 1.15984551 0.09505597 #Extract the score vector and its covariance matrix (U and V) S<-glmm.score(fit0, G) S$U # [,1] # [1,] 3.961942 # [2,] 7.950440 # [3,] -2.005727 # [4,] 11.073983 # [5,] 5.245730 # [6,] 12.376524 # [7,] -1.550665 # [8,] -3.416834 # [9,] -13.363365 # [10,] -3.215833 S$V # [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] # [1,] 21.653305 3.7116688 1.2076672 3.0614818 2.9112244 2.4014448 -4.1163277 -2.0688897 -2.4400334 1.7698513 # [2,] 3.711669 32.3227470 -0.5508538 3.9098579 8.1764154 9.5752082 -2.3817511 2.3538634 2.0666017 1.3299752 # [3,] 1.207667 -0.5508538 16.8796064 2.2819771 -0.4900604 -4.5106955 -1.7311156 -2.9045171 -4.5792509 2.8766990 # [4,] 3.061482 3.9098579 2.2819771 19.6962951 0.1320176 2.5174808 3.7179278 -2.7884559 -4.7728097 0.5162858 # [5,] 2.911224 8.1764154 -0.4900604 0.1320176 21.1182593 -4.7661377 -3.9648896 -0.7093291 -0.4900421 -1.4782717 # [6,] 2.401445 9.5752082 -4.5106955 2.5174808 -4.7661377 28.8218721 -1.6620875 0.1459329 3.1710408 -0.2873747 # [7,] -4.116328 -2.3817511 -1.7311156 3.7179278 -3.9648896 -1.6620875 16.5209203 -0.1709368 1.7090064 1.2881873 # [8,] -2.068890 2.3538634 -2.9045171 -2.7884559 -0.7093291 0.1459329 -0.1709368 18.1970522 0.1540615 1.9816730 # [9,] -2.440033 2.0666017 -4.5792509 -4.7728097 -0.4900421 3.1710408 1.7090064 0.1540615 29.6396277 -2.6690045 # [10,] 1.769851 1.3299752 2.8766990 0.5162858 -1.4782717 -0.2873747 1.2881873 1.9816730 -2.6690045 22.0284197 #Conduct the aSPU test (Note: the p-values might not be the same, even if the random seed is fixed) aSPU(S$U, S$V, weight=rep(1,p)) # $Ts # SPU(1) SPU(2) SPU(3) SPU(4) SPU(5) # 1.705620e+01 5.892590e+02 1.491568e+03 7.565740e+04 6.663462e+04 # SPU(6) SPU(7) SPU(8) SPU(Inf) aSPU # 1.141343e+07 -9.075204e+06 1.810350e+09 1.336336e+01 8.000000e-03 # # $pvs # SPU(1) SPU(2) SPU(3) SPU(4) SPU(5) SPU(6) SPU(7) # 0.29300000 0.00800000 0.21400000 0.01300000 0.42400000 0.02300000 0.32900000 # SPU(8) SPU(Inf) aSPU # 0.03100000 0.05200000 0.01998002