## =============================================== ## variable selection with aSPU ## =============================================== ## Goal ## aSPU test provides some information how much each ## variable contributes to the test significance. ## varSEL() function quantifies the contribution ## ------------------------------------------------ ## ## Input ## 1) Y is a n by 1 vector for binary reponses of n subjects ## 2) X is a n by p matrix for SNPs Data ## 3) B is a number of permutation ## ----------------------------------------------- ## ## Output ## 1) sorted proportion : abs(Ui)/sum(abs(Ui)) i=1,..,p ## each variable's contribution ## 2) cumulative proportion : cumulative score ## 3) number.variable: the smallest number of variables required to ## explain certain percentage of data ## percentages are predefined : 75, 80, 85, 90, 95% varSEL<-function(Y, X, Z=NULL, pow=c(1:8, Inf), B=1000){ n <- length(Y) if (is.null(X) && length(X)>0) X=as.matrix(X, ncol=1) k <- ncol(X) if (is.null(Z)){ ## NO nuisance parameters: Xg <- X U<-t(Xg) %*% (Y-mean(Y)) } else { tdat1<-data.frame(trait=Y, Z) fit1<-glm(trait~.,family="binomial",data=tdat1) pis<-fitted.values(fit1) Us<-XUs<-matrix(0, nrow=n, ncol=k) Xmus = X for(i in 1:k){ tdat2<-data.frame(X1=X[,i], Z) fit2<-glm(X1~.,data=tdat2) Xmus[,i]<-fitted.values(fit2) XUs[, i]<-(X[,i] - Xmus[,i]) } U<-t(XUs) %*% (Y - pis) } ##observed statistics Ts=rep(NA,length(pow)) for (j in 1:length(pow)){ if (pow[j]= z) numS <- apply(smallS, 2, function(z) which( as.numeric(z)!=0)[1]) names(numS) <- c("th0.75", "th0.8", "th0.85", "th0.9", "th0.95") list(sorted.prop = sort.prop, cumulative.prop = cum.prop, number.variable= numS) }