### PubH 7450 ### Added example: showing simulation-based significance testing. ## Simple example 1: using simulation to calculate the p-value ## for a Z-test statistic Z=2: p<-1-pnorm(2) #[1] 0.02275013 set.seed(1) B<-100 z0s<-rnorm(B) sum(z0s>2)/B #[1] 0.02 set.seed(1) B<-1000 z0s<-rnorm(B) sum(z0s>2)/B #[1] 0.027 set.seed(1) B<-10000 z0s<-rnorm(B) sum(z0s>2)/B #[1] 0.0243 set.seed(1) B<-100000 z0s<-rnorm(B) sum(z0s>2)/B #[1] 0.02351 ###Example 2: permutation testing: library(survival) dat<-read.table('/home/merganser/weip/public_html/course/7450/data/bmt.txt', skip=86, col.names=c("grp","T1","T2","d1","d2","d3","Ta","da", "Tc","dc","Tp","dp","Z1","Z2","Z3","Z4", "Z5","Z6","Z7","Z8","Z9","Z10")) fit1<-coxph(Surv(T2, d3) ~ factor(grp), data=dat) summary(fit1) # n= 137 # coef exp(coef) se(coef) z Pr(>|z|) #factor(grp)2 -0.5742 0.5632 0.2873 -1.999 0.0457 * #factor(grp)3 0.3834 1.4673 0.2674 1.434 0.1516 #Rsquare= 0.094 (max possible= 0.996 ) #Likelihood ratio test= 13.45 on 2 df, p=0.001199 #Wald test = 13.03 on 2 df, p=0.00148 #Score (logrank) test = 13.81 on 2 df, p=0.001004 fit1$wald.test #[1] 13.03152 set.seed(1) B=10000 W0s<-rep(0, B) n<-137 for(b in 1:B){ indx<-sample(1:n, n) fit0<-coxph(Surv(T2[indx], d3[indx]) ~ factor(grp), data=dat) W0s[b]<-fit0$wald.test } sum(W0s>fit1$wald.test)/B #[1] 0.0013 ks.test(W0s, "pchisq", df=2) # One-sample Kolmogorov-Smirnov test #data: W0s #D = 0.0084, p-value = 0.4802 #alternative hypothesis: two-sided ks.test(W0s, "pchisq", df=1) #D = 0.3014, p-value < 2.2e-16 ks.test(W0s, "pchisq", df=3) #D = 0.2142, p-value < 2.2e-16 ###Advantage: no asymptotics, i.e. so-called "distribution-free" ###Downside: computationally intensive; only global testing.