#PubH Stat Learning and Data Mining
#Example 5.2: GAM and use AIC/BIC/CV to select smoothing parameter in GAM

library(gam)
#available on titanium

#use Kyphosis data
data(kyphosis)
kyphosis[1:5,]
#  Kyphosis Age Number Start
#1   absent  71      3     5
#2   absent 158      3    14
#3  present 128      4     5
#4   absent   2      5     1
#5   absent   1      4    15

n<-length(kyphosis$Age)
# 81
kyphosis[1,]$Kyphosis
#[1] absent
#Levels:
#[1] "absent"  "present"

#Splus (R too?) glm() or gam() rule: taking the first level as 0 and 
#                                    the other as 1
#You can more explicitly specify a binary response as Kyphosis=="present"
# in the formula option of glm() or gam()

#fit a GAM:
fit1<-gam(Kyphosis~lo(Age)+lo(Number)+lo(Start), family=binomial, data=kyphosis)
summary(fit1)

#    Null Deviance: 83.2345 on 80 degrees of freedom
#Residual Deviance: 42.9555 on 69.4146 degrees of freedom
#AIC: 66.1264 
#
#Number of Local Scoring Iterations: 11 
#
#DF for Terms and Chi-squares for Nonparametric Effects
#
#             Df Npar Df Npar Chisq P(Chi)
#(Intercept) 1.0                          
#lo(Age)     1.0     2.5     6.4581 0.0646
#lo(Number)  1.0     2.7     4.1960 0.2061
#lo(Start)   1.0     2.3     5.3104 0.0921

fit2<-gam((Kyphosis=="present")~s(Age)+s(Number)+s(Start),
          family=binomial, data=kyphosis)
summary(fit2)

#    Null Deviance: 83.2345 on 80 degrees of freedom
#Residual Deviance: 40.526 on 67.9997 degrees of freedom
#AIC: 66.5266 
#
#Number of Local Scoring Iterations: 11 
#
#DF for Terms and Chi-squares for Nonparametric Effects
#
#            Df Npar Df Npar Chisq P(Chi)
#(Intercept)  1                          
#s(Age)       1       3     5.7937 0.1221
#s(Number)    1       3     5.6880 0.1278
#s(Start)     1       3     5.8876 0.1172


postscript("ex5.2.ps")
par(mfrow=c(2,3))
plot(fit1, se=T)
# the dashed curves are pointwise +- 2*SE
plot(fit2, se=T)
dev.off()

#the smoothing parameter is the target degrees of freedom in s()
spSet<-(1:10)
AIC<-BIC<-rep(0, length(spSet))
for (k in 1:length(spSet)){
  sp0<-spSet[k]
  fit<-gam(Kyphosis~s(Age, sp0)+s(Start), 
           family=binomial, data=kyphosis)
  predp<-predict(fit, type="response")
  AIC[k]<-fit$deviance + 2*(n-fit$df.residual)
  BIC[k]<-fit$deviance + log(n)*(n-fit$df.residual)
  }

#print out:
cat("df=", spSet, "\n")
cat("AIC=", AIC , "\n")
cat("BIC=", BIC , "\n")
#df= 1 2 3 4 5 6 7 8 9 10 
#AIC= 67.73905 64.10075 64.88276 66.29924 67.57357 68.74138 69.85456 70.89715 71.83167 72.65876 
#BIC= 82.1086 80.8621 84.03873 87.84968 91.51828 95.08143 98.58787 102.0244 105.3531 108.5762 

#########CV:
#the smoothing parameter is span in lo()
spanSet<-(1:10)/10
logL<-rep(0, length(spanSet))
for (k in 1:length(spanSet)){
  span0<-spanSet[k]
  for(i in 1:n){
    fit<-gam(Kyphosis~lo(Age, span=span0)+lo(Start),
             family=binomial, data=kyphosis, subset=((1:n)[-i]))
    predpi<-predict(fit, newdata=kyphosis[i,], type="response")
    if (kyphosis[i,]$Kyphosis=="present") logL[k]<-logL[k]+log(predpi)
    else logL[k]<-logL[k]+log(1-predpi)
    }
  }
#There were 50 or more warnings (use warnings() to see the first 50)
### related to convergence and extrapolation...

#print out:
cat("span=", spanSet, "\n")
cat("CV logL=", logL, "\n")
#span= 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1 
#CV logL= -208.7682 -76.56352 -47.05613 -37.47847 -35.11352 -34.12871 -33.30827 -32.56275 -32.07728 -32.18931

******************
> names(fit1)
 [1] "coefficients"        "residuals"           "fitted.values"      
 [4] "R"                   "rank"                "smooth"             
 [7] "nl.df"               "df.residual"         "var"                
[10] "assign"              "terms"               "call"               
[13] "formula"             "family"              "nl.chisq"           
[16] "y"                   "weights"             "iter"               
[19] "additive.predictors" "deviance"            "null.deviance"      
[22] "contrasts"          

#see any big difference with various df=2, 5, 10:
fit1<-gam(Kyphosis~s(Age, 2)+s(Start), family=binomial, data=kyphosis)
fit2<-gam(Kyphosis~s(Age, 5)+s(Start), family=binomial, data=kyphosis)
fit3<-gam(Kyphosis~s(Age, 10)+s(Start), family=binomial, data=kyphosis)
postscript("ex5.2b.ps")
par(mfcol=c(2,3))
plot(fit1, se=T)
title(main="df=2")
plot(fit2, se=T)
title(main="df=5")
plot(fit3, se=T)
title(main="df=10")
dev.off()