#####Normal-based LRT-PC test by Wang et al (2008, Genetic Epi).
####W.P.
#### 1) differs from LRTPC.R in using the whole data matrix (or its permuted
#### version), instead of that for controls, to do PCA. 1/4/09
#### 2) differs from LRTPC1.2.R in adding parameter varpct, the percentage of
####    variations explained by PC's to extract PCs so that, in addition to
####    the LRTpc proposed by Wang et al, when the input X is SINGULAR, one
####    can use PCA to get a transformed X of FULL rank! 2/12/09

### Copyright: Wei Pan, 1/3/09
###            weip@biostat.umn.edu 
###            http://www.biostat.umn.edu/~weip/

#input: Y: a vector of 0's and 1's for the disease indicator
#       X: a genotype matrix; each row is for one subject, and each col for
#          one SNP (with 1-DF coding by additive, recessive or dominant
#          inheritance mode) or one of the two indicators for the 2-DF coding;
#          for additive mode, coded as 0, 1 and 2.
#       varpct: % of the total variance of X explained by the first few 
#               principle components (PCs); to deterimin the # of PCs used; 
#               default=(0.85, 0.99)
#       B: # of permutations used to estimate the p-value for LRT/LRTpc; 
#          default=1000
# output: a vector of  the p-value for LRT (use all X), 
#         the p-value for LRTpc with the first k1 PCs explaining varpct[1] of 
#         the total variance of X, k1, the p-value for LRTpc with the first 
#         k2 PCs explaining varpct[2] of the total variance of X, k2.

LRTPC<-function(Y, X, varpct=c(0.85, 0.99), B=1000){

X1<-X[Y==1,]; n1<-length(X1[,1])
X0<-X[Y==0,]; n0<-length(X0[,1])
n<-length(X[,1]); m<-length(X[1,])

Sigma1<-cov(X1)
Sigma0<-cov(X0)
Sigma<-cov(X)

logdetSigma<-log(det(Sigma))
LRT1<- (-n1)*log(det(Sigma1)) - n0*log(det(Sigma0))  +
      n*logdetSigma

###########PCA 1:
X.pca<-prcomp(X, center=FALSE)
Xevs<-X.pca$sdev^2
for(k in 1:m)
  if (sum(Xevs[1:k])/sum(Xevs) >= varpct[1]) break;
X1pc<-X1 %*% X.pca$rot[,1:k]
X0pc<-X0 %*% X.pca$rot[,1:k]
Xpc1<-X %*% X.pca$rot[,1:k]
k1<-k
Sigma1pc<-cov(X1pc)
Sigma0pc<-cov(X0pc)
Sigmapc1<-cov(Xpc1)

logdetSigmapc1<-log(det(Sigmapc1))
LRTpc1<- (-n1)*log(det(Sigma1pc)) - n0*log(det(Sigma0pc))  +  
         n*logdetSigmapc1                                   

###########PCA 2:
#X.pca<-prcomp(X, center=FALSE)
#Xevs<-X.pca$sdev^2
for(k in 1:m)
  if (sum(Xevs[1:k])/sum(Xevs) >= varpct[2]) break;
X1pc<-X1 %*% X.pca$rot[,1:k]
X0pc<-X0 %*% X.pca$rot[,1:k]
Xpc2<-X %*% X.pca$rot[,1:k]
k2<-k
Sigma1pc<-cov(X1pc)
Sigma0pc<-cov(X0pc)
Sigmapc2<-cov(Xpc2)

logdetSigmapc2<-log(det(Sigmapc2))
LRTpc2<- (-n1)*log(det(Sigma1pc)) - n0*log(det(Sigma0pc))  +  
         n*logdetSigmapc2                                   

################use permutation to get p-values:
LRTs<-LRTs1<-LRTpcs<-LRTpcs1<-LRTpcs2<-rep(0, B)
for(b in 1:B){
indx<-sample(1:n, n)
#Xb<-X[indx,]
X0b<-X[indx[1:n0],]
X1b<-X[indx[(n0+1):n],]

Sigma1<-cov(X1b)
Sigma0<-cov(X0b)

LRTs1[b]<- (-n1)*log(det(Sigma1)) - n0*log(det(Sigma0)) +
      n*logdetSigma 

###########PC 1:
if (k1>1) X0pcb<-Xpc1[indx[1:n0],] else
         X0pcb<-matrix(Xpc1[indx[1:n0],], ncol=1)
if (k1>1) X1pcb<-Xpc1[indx[(n0+1):n],] else
         X1pcb<-matrix(Xpc1[indx[(n0+1):n],] , ncol=1)
#cat("dim of X0pcb=", dim(X0pcb))
#cat("dim of X0pcb=", nrow(X0pcb), ncol(X0pcb))

Sigma0pc<-cov(X0pcb)
Sigma1pc<-cov(X1pcb)

LRTpcs1[b]<- (-n1)*log(det(Sigma1pc)) - n0*log(det(Sigma0pc)) +
             n*logdetSigmapc1

###########PC 2:
if (k2>1) X0pcb<-Xpc2[indx[1:n0],] else
         X0pcb<-matrix(Xpc2[indx[1:n0],], ncol=1)
if (k2>1) X1pcb<-Xpc2[indx[(n0+1):n],] else
         X1pcb<-matrix(Xpc2[indx[(n0+1):n],] , ncol=1)
#cat("dim of X0pcb=", dim(X0pcb))
#cat("dim of X0pcb=", nrow(X0pcb), ncol(X0pcb))

Sigma0pc<-cov(X0pcb)
Sigma1pc<-cov(X1pcb)

LRTpcs2[b]<- (-n1)*log(det(Sigma1pc)) - n0*log(det(Sigma0pc)) +
             n*logdetSigmapc2

}
c(sum(LRT1<LRTs1)/B, sum(LRTpc1<LRTpcs1)/B, k1, sum(LRTpc2<LRTpcs2)/B, k2 )

}