#####Normal-based LRT-PC test by Wang et al (2008, Genetic Epi). ####W.P. #### 1) differs from LRTPC.R in using the whole data matrix (or its permuted #### version), instead of that for controls, to do PCA. 1/4/09 #### 2) differs from LRTPC1.2.R in adding parameter varpct, the percentage of #### variations explained by PC's to extract PCs so that, in addition to #### the LRTpc proposed by Wang et al, when the input X is SINGULAR, one #### can use PCA to get a transformed X of FULL rank! 2/12/09 ### Copyright: Wei Pan, 1/3/09 ### weip@biostat.umn.edu ### http://www.biostat.umn.edu/~weip/ #input: Y: a vector of 0's and 1's for the disease indicator # X: a genotype matrix; each row is for one subject, and each col for # one SNP (with 1-DF coding by additive, recessive or dominant # inheritance mode) or one of the two indicators for the 2-DF coding; # for additive mode, coded as 0, 1 and 2. # varpct: % of the total variance of X explained by the first few # principle components (PCs); to deterimin the # of PCs used; # default=(0.85, 0.99) # B: # of permutations used to estimate the p-value for LRT/LRTpc; # default=1000 # output: a vector of the p-value for LRT (use all X), # the p-value for LRTpc with the first k1 PCs explaining varpct[1] of # the total variance of X, k1, the p-value for LRTpc with the first # k2 PCs explaining varpct[2] of the total variance of X, k2. LRTPC<-function(Y, X, varpct=c(0.85, 0.99), B=1000){ X1<-X[Y==1,]; n1<-length(X1[,1]) X0<-X[Y==0,]; n0<-length(X0[,1]) n<-length(X[,1]); m<-length(X[1,]) Sigma1<-cov(X1) Sigma0<-cov(X0) Sigma<-cov(X) logdetSigma<-log(det(Sigma)) LRT1<- (-n1)*log(det(Sigma1)) - n0*log(det(Sigma0)) + n*logdetSigma ###########PCA 1: X.pca<-prcomp(X, center=FALSE) Xevs<-X.pca$sdev^2 for(k in 1:m) if (sum(Xevs[1:k])/sum(Xevs) >= varpct[1]) break; X1pc<-X1 %*% X.pca$rot[,1:k] X0pc<-X0 %*% X.pca$rot[,1:k] Xpc1<-X %*% X.pca$rot[,1:k] k1<-k Sigma1pc<-cov(X1pc) Sigma0pc<-cov(X0pc) Sigmapc1<-cov(Xpc1) logdetSigmapc1<-log(det(Sigmapc1)) LRTpc1<- (-n1)*log(det(Sigma1pc)) - n0*log(det(Sigma0pc)) + n*logdetSigmapc1 ###########PCA 2: #X.pca<-prcomp(X, center=FALSE) #Xevs<-X.pca$sdev^2 for(k in 1:m) if (sum(Xevs[1:k])/sum(Xevs) >= varpct[2]) break; X1pc<-X1 %*% X.pca$rot[,1:k] X0pc<-X0 %*% X.pca$rot[,1:k] Xpc2<-X %*% X.pca$rot[,1:k] k2<-k Sigma1pc<-cov(X1pc) Sigma0pc<-cov(X0pc) Sigmapc2<-cov(Xpc2) logdetSigmapc2<-log(det(Sigmapc2)) LRTpc2<- (-n1)*log(det(Sigma1pc)) - n0*log(det(Sigma0pc)) + n*logdetSigmapc2 ################use permutation to get p-values: LRTs<-LRTs1<-LRTpcs<-LRTpcs1<-LRTpcs2<-rep(0, B) for(b in 1:B){ indx<-sample(1:n, n) #Xb<-X[indx,] X0b<-X[indx[1:n0],] X1b<-X[indx[(n0+1):n],] Sigma1<-cov(X1b) Sigma0<-cov(X0b) LRTs1[b]<- (-n1)*log(det(Sigma1)) - n0*log(det(Sigma0)) + n*logdetSigma ###########PC 1: if (k1>1) X0pcb<-Xpc1[indx[1:n0],] else X0pcb<-matrix(Xpc1[indx[1:n0],], ncol=1) if (k1>1) X1pcb<-Xpc1[indx[(n0+1):n],] else X1pcb<-matrix(Xpc1[indx[(n0+1):n],] , ncol=1) #cat("dim of X0pcb=", dim(X0pcb)) #cat("dim of X0pcb=", nrow(X0pcb), ncol(X0pcb)) Sigma0pc<-cov(X0pcb) Sigma1pc<-cov(X1pcb) LRTpcs1[b]<- (-n1)*log(det(Sigma1pc)) - n0*log(det(Sigma0pc)) + n*logdetSigmapc1 ###########PC 2: if (k2>1) X0pcb<-Xpc2[indx[1:n0],] else X0pcb<-matrix(Xpc2[indx[1:n0],], ncol=1) if (k2>1) X1pcb<-Xpc2[indx[(n0+1):n],] else X1pcb<-matrix(Xpc2[indx[(n0+1):n],] , ncol=1) #cat("dim of X0pcb=", dim(X0pcb)) #cat("dim of X0pcb=", nrow(X0pcb), ncol(X0pcb)) Sigma0pc<-cov(X0pcb) Sigma1pc<-cov(X1pcb) LRTpcs2[b]<- (-n1)*log(det(Sigma1pc)) - n0*log(det(Sigma0pc)) + n*logdetSigmapc2 } c(sum(LRT1