Baolin Wu :: BiostatCDA :: Biostat CDA

Some codes for 2x2 table inference

Independence testing: large sample and exact inference

 ## Gender and belief in afterlife example
 t21 = matrix( c(435,375,147,134), 2,2 )
 ## Fisher test
 fisher.test(t21)
 ## Pearson X2 test
 chisq.test(t21, correct=FALSE)
 ## LR,Score,Wald test
 mu21 = outer(rowSums(t21), colSums(t21))/sum(t21)
 lrt = 2*sum(t21*log(t21/mu21))
 sct = sum((t21-mu21)^2/mu21)
 wdt = sum((t21-mu21)^2/t21)
 lrt; sct; wdt
 1-pchisq(lrt,df=1); 1-pchisq(sct,df=1); 1-pchisq(wdt,df=1)

Exact distribution and large sample approximation

### Exact distribution: hypergeometric
n1p = t21[1,1]+t21[1,2]
n2p = t21[2,1]+t21[2,2]
np1 = t21[1,1]+t21[2,1]
n11 = t21[1,1]; n = sum(t21)
dhyper(n11, n1p, n2p, np1)
K = 10
phyper(n11+K, n1p, n2p, np1) - phyper(n11-K, n1p, n2p, np1)
### large-sample approximation
m11 = mu21[1,1]
x = (n11-m11)/sqrt(m11*(1-n1p/n)*(1-np1/n))
xp1 = (n11+1 -m11)/sqrt(m11*(1-n1p/n)*(1-np1/n))
pnorm(xp1)-pnorm(x); dhyper(n11, n1p, n2p, np1)
pnorm(xp1)-pnorm(x) - dhyper(n11, n1p, n2p, np1)
xmh = (n11-0.5 -m11)/sqrt(m11*(1-n1p/n)*(1-np1/n))
xph = (n11+0.5 -m11)/sqrt(m11*(1-n1p/n)*(1-np1/n))
pnorm(xph)-pnorm(xmh); dhyper(n11, n1p, n2p, np1)
pnorm(xph)-pnorm(xmh) - dhyper(n11, n1p, n2p, np1)
K = 5
xmK = (n11-K -m11)/sqrt(m11*(1-n1p/n)*(1-np1/n))
xpK = (n11+K -m11)/sqrt(m11*(1-n1p/n)*(1-np1/n))
pnorm(xpK)-pnorm(xmK)
phyper(n11+K, n1p, n2p, np1) - phyper(n11-K, n1p, n2p, np1)

Quantitative association large sample inference

### proportion difference inference
pi1 = n11/n1p; pi2 = t21[2,1]/n2p
pd = pi1-pi2
spd = sqrt(pi1*(1-pi1)/n1p+pi2*(1-pi2)/n2p)
pd/spd; pnorm(-abs(.Last.value))
pd + c(-1,1)*qnorm(1-0.05/2)*spd ## including 0
### relative risk inference
rk = pi1/pi2
srk = sqrt((1-pi1)/pi1/n1p+(1-pi2)/pi2/n2p)
log(rk)/srk; pnorm(-abs(.Last.value))
log(rk)+c(-1,1)*qnorm(1-0.05/2)*srk
exp(.Last.value) ## including 1
### odds ratio
odr = pi1/(1-pi1)/(pi2/(1-pi2))
sor = sqrt(sum(1/t21))
log(odr)/sor; pnorm(-abs(.Last.value))
log(odr)+c(-1,1)*qnorm(1-0.05/2)*sor
exp(.Last.value) ## including 1

Exact p-value calculation

### Exact P-value calculation
lrt = 2*sum(t21*log(t21/mu21))
sct = sum((t21-mu21)^2/mu21)
wdt = sum((t21-mu21)^2/t21)
LSI = function(n11, n1p,n2p,np1){
  n = n1p+n2p; np2 = n-np1
  mu21 = outer(c(n1p,n2p), c(np1,np2))/n
  tst = sapply(n11, function(x){
    t21 = matrix( c(x,np1-x,n1p-x,np2-n1p+x),2,2 )
    lrt = 2*sum(t21*log(t21/mu21))
    sct = sum((t21-mu21)^2/mu21)
    wdt = sum((t21-mu21)^2/t21)
    return( c(sct,lrt,wdt) )
    })
  return( tst )
}
np2 = n1p+n2p-np1
Rx = min(np1, n1p); Lx = n1p-np2
a = LSI(Lx:Rx, n1p,n2p,np1)
a0 = LSI(435, n1p,n2p,np1)
matplot(Lx:Rx, t(a), type="l", lty=1:3, col=1:3, log="y",
        xlab=expression(n[11]), ylab=expression(list(X^2,G^2,W^2)))
abline(v=n11, col="gray", lty=4); abline(v=m11, col="purple", lty=4)
abline(h=a0[1], col="gray", lty=4)
legend(Lx,a0[1], c(expression(X^2), expression(G^2), expression(W^2)),
       col=1:3, lty=1:3, bty="n")
for(k in 1:3){
  rng = (Lx:Rx)[which(a[k,]>=a0[k,])]
  cat( sum( dhyper(rng, n1p,n2p,np1) ), "\n")
}
1-pchisq(wdt,df=1); 1-pchisq(lrt,df=1); 1-pchisq(sct,df=1);

Biostat CDA - Codes for Chapter 02