#PubH Stat Learning and Data Mining
#Example 3.1: Use of glmnet for LASSO, Enet, ...


# LASSO implemented in a package:
library("glmnet")
###
# glmnet(x, y, family = c("gaussian", "binomial", "poisson", "multinomial",
# "cox", "mgaussian"), weights, offset = NULL, alpha = 1,
# nlambda = 100, lambda = NULL, standardize = TRUE, intercept = TRUE,
# penalty.factor = rep(1, nvars), ...)
###
# alpha: coef for the Enet penalty;
#  penalty.factor: lambda[j]= penalty.factor[j]*lambda;

# diabetes data in:
library("lars")

data(diabetes)
# a dataset in the package, which contains 442 rows and 3 columns;
# the 3 columns: x, a matrix with 10 col's, **standardized** with L2 norm=1 and 
#                   mean=0; x1-x4: age, sex, BMI, BP; x5-x10: serum measurements
#                y, a numeric vector; a measure of disease progression one
#                   year after baseline
#                x2, a matrix with 64 col's= x + some interactions.
###Q: why standardize x?
attach(diabetes)

#fit a LM using LASSO:
lasso.fit<-glmnet(x, y, family="gaussian" )
#plots:
pdf("ex3.1.pdf")
par(mfrow=c(2,2))
#plot: coef's vs L1 norm constraint
plot(lasso.fit, xvar="norm" );

#plot: coef's vs log(lambda):
plot(lasso.fit, xvar="lambda");


#uisng CV to select the constraint value:
# a plot of CV error vs  fraction is automatically generated
# (fraction: norm of the estimates over that of the unconstrained/LS one.)
set.seed(1)
cv.lasso.fit<-cv.glmnet(x, y, nfold=10, family="gaussian")
plot(cv.lasso.fit)

#mean CV error:
cv.lasso.fit$cvm
# [1] 5919.483 5585.973 5199.052 4866.371 4590.263 4361.120 4170.954 4016.069
# [9] 3886.919 3769.986 3666.861 3577.415 3501.421 3432.665 3373.224 3322.938
#[17] 3281.198 3246.549 3217.788 3193.914 3174.221 3158.506 3142.561 3123.397
#[25] 3101.438 3084.115 3070.882 3061.001 3052.903 3045.784 3039.402 3033.060
#[33] 3025.011 3017.431 3011.489 3006.987 3003.267 3000.326 2998.119 2996.797
#[41] 2995.953 2995.729 2995.760 2995.899 2996.256 2996.915 2997.307 2997.572
#[49] 2997.892 2998.252 2998.637 2999.039 2999.439 2999.854 3000.347 3001.063
#[57] 3001.765 3002.141 3002.440 3002.793 3002.737 3002.484 3002.198 3001.831
#[65] 3001.078 2999.967 2999.033 2998.396 2997.978 2997.774 2997.718 2997.792
#[73] 2997.890 2998.007 2998.134 2998.259 2998.373 2998.454 2998.596 2998.717
#[81] 2998.868 2998.992 2999.093 2999.164 2999.194 2999.222 2999.261 2999.284

#SE of CV error: cv.lasso.fit$cvsd

#lambda's used in CV
cv.lasso.fit$lambda
# [1] 45.16003002 41.14813742 37.49265030 34.16190658 31.12705696 28.36181501
# [7] 25.84222953 23.54647708 21.45467295 19.54869894 17.81204641 16.22967329
#[13] 14.78787385 13.47415989 12.27715267 11.18648426 10.19270783  9.28721576
#[19]  8.46216511  7.71040968  7.02543814  6.40131758  5.83264216  5.31448631
#[25]  4.84236199  4.41217990  4.02021400  3.66306927  3.33765229  3.04114446
#[31]  2.77097757  2.52481156  2.30051426  2.09614291  1.90992735  1.74025467
#[37]  1.58565524  1.44479000  1.31643884  1.19949004  1.09293065  0.99583770
#[43]  0.90737023  0.82676196  0.75331470  0.68639230  0.62541510  0.56985495
#[49]  0.51923060  0.47310359  0.43107437  0.39277891  0.35788551  0.32609195
#[55]  0.29712284  0.27072727  0.24667660  0.22476253  0.20479525  0.18660180
#[61]  0.17002461  0.15492010  0.14115742  0.12861739  0.11719137  0.10678041
#[67]  0.09729434  0.08865097  0.08077547  0.07359960  0.06706121  0.06110368
#[73]  0.05567540  0.05072935  0.04622269  0.04211640  0.03837489  0.03496577
#[79]  0.03185951  0.02902920  0.02645032  0.02410055  0.02195952  0.02000870
#[85]  0.01823118  0.01661157  0.01513585  0.01379122


#value of lambda giving min cve:
cv.lasso.fit$lambda.min
#[1] 0.9958377

#Using "1-SE rule": 
#largest value of lambda such that error is within 1 standard error of the minimum:
cv.lasso.fit$lambda.1se
#[1] 7.71041

#do prediction and see its estimated coef's:
yhat<-predict(cv.lasso.fit, newx=x)
plot(y, yhat)
abline(a=0, b=1)

# coef's:
 coef(cv.lasso.fit)
#11 x 1 sparse Matrix of class "dgCMatrix"
#                     1
#(Intercept) 152.13348
#age           .      
#sex           .      
#bmi         493.42401
#map         172.00436
#tc            .      
#ldl           .      
#hdl         -94.48438
#tch           .      
#ltg         428.55200
#glu           .      

dev.off()

###USUALLY, you'd use one of the following two:
fit1<-glmnet(x, y, family="gaussian", lambda=cv.lasso.fit$lambda.min)
coef(fit1)

fit2<-glmnet(x, y, family="gaussian", lambda=cv.lasso.fit$lambda.1se)
coef(fit2)
#same as cv.lasso.fit.