### PubH 7450
### Added example: showing simulation-based significance testing.

## Simple example 1: using simulation to calculate the p-value
##                   for a Z-test statistic Z=2:

p<-1-pnorm(2)
#[1] 0.02275013

set.seed(1)
B<-100
z0s<-rnorm(B)
sum(z0s>2)/B
#[1] 0.02

set.seed(1)
B<-1000
z0s<-rnorm(B)
sum(z0s>2)/B
#[1] 0.027

set.seed(1)
B<-10000
z0s<-rnorm(B)
sum(z0s>2)/B
#[1] 0.0243

set.seed(1)
B<-100000
z0s<-rnorm(B)
sum(z0s>2)/B
#[1] 0.02351

###Example 2: permutation testing:
library(survival)
dat<-read.table('/home/merganser/weip/public_html/course/7450/data/bmt.txt',
                skip=86,
                col.names=c("grp","T1","T2","d1","d2","d3","Ta","da",
                            "Tc","dc","Tp","dp","Z1","Z2","Z3","Z4",
                            "Z5","Z6","Z7","Z8","Z9","Z10"))

fit1<-coxph(Surv(T2, d3) ~ factor(grp), data=dat)
summary(fit1)

#  n= 137 

#                coef exp(coef) se(coef)      z Pr(>|z|)  
#factor(grp)2 -0.5742    0.5632   0.2873 -1.999   0.0457 *
#factor(grp)3  0.3834    1.4673   0.2674  1.434   0.1516  

#Rsquare= 0.094   (max possible= 0.996 )
#Likelihood ratio test= 13.45  on 2 df,   p=0.001199
#Wald test            = 13.03  on 2 df,   p=0.00148
#Score (logrank) test = 13.81  on 2 df,   p=0.001004

 fit1$wald.test
#[1] 13.03152

set.seed(1)
B=10000
W0s<-rep(0, B)
n<-137
for(b in 1:B){
  indx<-sample(1:n, n)
  fit0<-coxph(Surv(T2[indx], d3[indx]) ~ factor(grp), data=dat)
  W0s[b]<-fit0$wald.test
  }
sum(W0s>fit1$wald.test)/B
#[1] 0.0013

ks.test(W0s, "pchisq", df=2)
#        One-sample Kolmogorov-Smirnov test

#data:  W0s 
#D = 0.0084, p-value = 0.4802
#alternative hypothesis: two-sided

ks.test(W0s, "pchisq", df=1)
#D = 0.3014, p-value < 2.2e-16
ks.test(W0s, "pchisq", df=3)
#D = 0.2142, p-value < 2.2e-16

###Advantage: no asymptotics, i.e. so-called "distribution-free"
###Downside: computationally intensive; only global testing.