#PubH Stat Learning and Data Mining #Example 3.2: Use of PCR and PLS # PCR and PLS implemented in several R packages; # install one if not yet as the following: # install.packages("pls") library("pls") # a dataset in the package, which contains a training set and a test set; # training set: 21 NIR spectra of PET yarns, measureed at 268 wavelengths, # and 21 corresponding densities. # test set: 7 samples # data format: Xtrain (21x268), Xtest (7x268), Ytrain, Ytest. # Near-Infrared Spectroscopy (NIR) spectropscopy is often used for product monitoring and QC data(yarn) #?yarn #A data frame with components #NIR: Numeric matrix of NIR measurements #density: Numeric vector of densities #train: Logical vector with TRUE for the training samples and FALSE for the test samples ###############################PCR: #fit PCR models with 1 to ncomp components; max ncomp used if not specified. # CV can be done at the same time (o/w let validation="none"): # (default: 10-fold CV) # pcr.fits<-pcr(density ~ NIR, data=yarn, ncomp=20, validation="CV" ) #CV error: pcr.fits$validation$PRESS # 1 comps 2 comps 3 comps 4 comps 5 comps 6 comps 7 comps 8 comps #density 23166.7 500.8572 191.5036 182.7043 75.55089 6.667932 7.442739 2.654206 # 9 comps 10 comps 11 comps 12 comps 13 comps 14 comps 15 comps 16 comps #density 2.637498 2.668565 2.594406 2.523516 2.162171 2.194259 2.076411 1.719334 # 17 comps 18 comps 19 comps 20 comps #density 1.546414 1.504741 1.559006 1.128121 #use coef() to extract the estimated coef's (a long vector!) coef(pcr.fits) #or, draw the etimated coef's: coefplot(pcr.fits) ################################PLS: pls.fits<-plsr(density ~ NIR, data=yarn, ncomp=20, validation="CV" ) #CV error: pls.fits$validation$PRESS # 1 comps 2 comps 3 comps 4 comps 5 comps 6 comps 7 comps 8 comps #density 1002.934 487.0624 107.0895 13.0618 5.122679 5.084268 2.252613 2.110554 # 9 comps 10 comps 11 comps 12 comps 13 comps 14 comps 15 comps 16 comps #density 1.895861 1.667876 1.515828 1.530591 1.594989 1.673725 1.705663 1.703486 # 17 comps 18 comps 19 comps 20 comps #density 1.722105 1.728397 1.733559 1.732144 #use coef() to extract the estimated coef's (a long vector!) coef(pls.fits, ncomp=11) #or, draw the etimated coef's: coefplot(pls.fits, ncomp=11) ####compare fitted PCR and PLS models: setwd("C:/Users/panxx014/Documents/courses/7475/Examples/figs") pdf("ex3.2.pdf") par(mfrow=c(4,2)) coefplot(pls.fits, ncomp=3, main="PLS, ncomp=3") coefplot(pcr.fits, ncomp=3, main="PCR, ncomp=3") coefplot(pls.fits, ncomp=5, main="PLS, ncomp=5") coefplot(pcr.fits, ncomp=5, main="PCR, ncomp=5") coefplot(pls.fits, ncomp=11, main="PLS, ncomp=11") coefplot(pcr.fits, ncomp=11, main="PCR, ncomp=11") coefplot(pls.fits, ncomp=20, main="PLS, ncomp=20") coefplot(pcr.fits, ncomp=20, main="PCR, ncomp=20") dev.off() #####you can use predict() to do prediction with any given ncomp (and new data).