########simulate SNPs from a latent multivariate Gaussian variable with ######## a CS(rho) correlation structure. ########Input: OR: association in OR between the causal SNPs and outcome; # MAF0low, MAF0sup: MAFs for the causal SNPs from # Unif(MAFslow, MAFsup); # MAFslow, MAFsup: MAF's of the noise rare SNPs are drawn from # Unif(MAFslow, MAFsup); # cMAFslow, cMAFsup: MAF's of the noise common SNPs are drawn from # Unif(cMAFslow, cMAFsup); # nSNP: # of noise/marker SNPs to be generated; # nCSNP: # of noise/marker common SNPs to be generated; # n: # of cases (= # of controls); # rho: the parameter in CS(rho) for the latent multivariate # Gaussian variable to be discretized to SNPs. # p0: background disease prevalence;i.e. intercept=log(p0/(1-p0)) ########Output: a list of the binary outcome Y (=0 or 1) and SNPs (=0, 1 or 2); # Y is a vector of length 2n; X is a matrix of n by nSNP. simRareSNP<-function(ORs, n0=500, n1=500, nSNP=0, nCSNP=0, rho=0, MAF0slow=0.001, MAF0sup=0.01, MAFslow=0.001, MAFsup=0.01, cMAFslow=0.05, cMAFsup=0.5, p0=0.05){ nSNP0<-length(ORs) q<-nSNP+nSNP0+nCSNP ###############index for causal SNPs; mix causal and noise SNPs: ############### SNP j is causal iff SNP0index[j]=1 or -1; =0 o/w signORs<-rep(1, nSNP0) signORs[ORs<1]<-0-1 SNP0indx<-sample(c(signORs, rep(0, nSNP+nCSNP)), q, replace = FALSE) R<-matrix(1, nrow=q, ncol=q) for(i in 1:q) for(j in 1:q) if (i!=j) R[i, j]<-rho svd.R<-svd(R) R1<-svd.R$u %*% diag(sqrt(svd.R$d)) ##background disease prev = p0 b0<-log(p0/(1-p0)) logORs<-log(ORs) MAF0s<-runif(nSNP0, MAF0slow, MAF0sup) MAFs1<-runif(nSNP, MAFslow, MAFsup) MAFs2<-runif(nCSNP, cMAFslow, cMAFsup) cutoff0<-qnorm(MAF0s) cutoff1<-qnorm(MAFs1) cutoff2<-qnorm(MAFs2) cutoff<-rep(0, q) cutoff[SNP0indx!=0]=cutoff0 cutoff[SNP0indx==0]=c(cutoff1, cutoff2) X<-matrix(0, nrow=n0+n1, ncol=q) Y<-rep(0, n0+n1); Y[(n0+1):(n0+n1)]<-1 i<-1 #sampling controls: while ( i <= n0){ X0<-rnorm(q, 0, 1) #: X0 ~ MVN(0, I) X1<-R1 %*% X0 #: X1 ~ MVN(0, R) X2<-ifelse(X1