Biostat CDA - Codes for Chapter 02

Some codes for 2x2 table inference

  1. Independence testing: large sample and exact inference
     ## Gender and belief in afterlife example
     t21 = matrix( c(435,375,147,134), 2,2 )
     ## Fisher test
     fisher.test(t21)
     ## Pearson X2 test
     chisq.test(t21, correct=FALSE)
     ## LR,Score,Wald test
     mu21 = outer(rowSums(t21), colSums(t21))/sum(t21)
     lrt = 2*sum(t21*log(t21/mu21))
     sct = sum((t21-mu21)^2/mu21)
     wdt = sum((t21-mu21)^2/t21)
     lrt; sct; wdt
     1-pchisq(lrt,df=1); 1-pchisq(sct,df=1); 1-pchisq(wdt,df=1)
    
  2. Exact distribution and large sample approximation
    ### Exact distribution: hypergeometric
    n1p = t21[1,1]+t21[1,2]
    n2p = t21[2,1]+t21[2,2]
    np1 = t21[1,1]+t21[2,1]
    n11 = t21[1,1]; n = sum(t21)
    dhyper(n11, n1p, n2p, np1)
    K = 10
    phyper(n11+K, n1p, n2p, np1) - phyper(n11-K, n1p, n2p, np1)
    ### large-sample approximation
    m11 = mu21[1,1]
    x = (n11-m11)/sqrt(m11*(1-n1p/n)*(1-np1/n))
    xp1 = (n11+1 -m11)/sqrt(m11*(1-n1p/n)*(1-np1/n))
    pnorm(xp1)-pnorm(x); dhyper(n11, n1p, n2p, np1)
    pnorm(xp1)-pnorm(x) - dhyper(n11, n1p, n2p, np1)
    xmh = (n11-0.5 -m11)/sqrt(m11*(1-n1p/n)*(1-np1/n))
    xph = (n11+0.5 -m11)/sqrt(m11*(1-n1p/n)*(1-np1/n))
    pnorm(xph)-pnorm(xmh); dhyper(n11, n1p, n2p, np1)
    pnorm(xph)-pnorm(xmh) - dhyper(n11, n1p, n2p, np1)
    K = 5
    xmK = (n11-K -m11)/sqrt(m11*(1-n1p/n)*(1-np1/n))
    xpK = (n11+K -m11)/sqrt(m11*(1-n1p/n)*(1-np1/n))
    pnorm(xpK)-pnorm(xmK)
    phyper(n11+K, n1p, n2p, np1) - phyper(n11-K, n1p, n2p, np1)
    
  3. Quantitative association large sample inference
    ### proportion difference inference
    pi1 = n11/n1p; pi2 = t21[2,1]/n2p
    pd = pi1-pi2
    spd = sqrt(pi1*(1-pi1)/n1p+pi2*(1-pi2)/n2p)
    pd/spd; pnorm(-abs(.Last.value))
    pd + c(-1,1)*qnorm(1-0.05/2)*spd ## including 0
    ### relative risk inference
    rk = pi1/pi2
    srk = sqrt((1-pi1)/pi1/n1p+(1-pi2)/pi2/n2p)
    log(rk)/srk; pnorm(-abs(.Last.value))
    log(rk)+c(-1,1)*qnorm(1-0.05/2)*srk
    exp(.Last.value) ## including 1
    ### odds ratio
    odr = pi1/(1-pi1)/(pi2/(1-pi2))
    sor = sqrt(sum(1/t21))
    log(odr)/sor; pnorm(-abs(.Last.value))
    log(odr)+c(-1,1)*qnorm(1-0.05/2)*sor
    exp(.Last.value) ## including 1
    
  4. Exact p-value calculation
    ### Exact P-value calculation
    lrt = 2*sum(t21*log(t21/mu21))
    sct = sum((t21-mu21)^2/mu21)
    wdt = sum((t21-mu21)^2/t21)
    LSI = function(n11, n1p,n2p,np1){
      n = n1p+n2p; np2 = n-np1
      mu21 = outer(c(n1p,n2p), c(np1,np2))/n
      tst = sapply(n11, function(x){
        t21 = matrix( c(x,np1-x,n1p-x,np2-n1p+x),2,2 )
        lrt = 2*sum(t21*log(t21/mu21))
        sct = sum((t21-mu21)^2/mu21)
        wdt = sum((t21-mu21)^2/t21)
        return( c(sct,lrt,wdt) )
        })
      return( tst )
    }
    np2 = n1p+n2p-np1
    Rx = min(np1, n1p); Lx = n1p-np2
    a = LSI(Lx:Rx, n1p,n2p,np1)
    a0 = LSI(435, n1p,n2p,np1)
    matplot(Lx:Rx, t(a), type="l", lty=1:3, col=1:3, log="y",
            xlab=expression(n[11]), ylab=expression(list(X^2,G^2,W^2)))
    abline(v=n11, col="gray", lty=4); abline(v=m11, col="purple", lty=4)
    abline(h=a0[1], col="gray", lty=4)
    legend(Lx,a0[1], c(expression(X^2), expression(G^2), expression(W^2)),
           col=1:3, lty=1:3, bty="n")
    for(k in 1:3){
      rng = (Lx:Rx)[which(a[k,]>=a0[k,])]
      cat( sum( dhyper(rng, n1p,n2p,np1) ), "\n")
    }
    1-pchisq(wdt,df=1); 1-pchisq(lrt,df=1); 1-pchisq(sct,df=1);