#PubH Stat Learning and Data Mining #Example 4.2: Use of nearest shrunken centroids; # i.e. Predictive Analysis of Microarray (PAM) #install.packages("pamr") library("pamr") set.seed(120) #generate some data x <- matrix(rnorm(1000*20),ncol=20) y <- sample(c(1:4),size=20,replace=TRUE) mydata <- list(x=x,y=factor(y)) #run classifier myfit <- pamr.train(mydata) #do n-fold CV to, among others, select the shrinkage parameter myfit.cv<-pamr.cv(myfit, mydata, nfold=5) myfit.cv #Call: #pamr.cv(fit = myfit, data = mydata, nfold = 5) # threshold nonzero errors #1 0.000 1000 15 #2 0.073 997 16 #3 0.146 989 17 #4 0.219 973 16 #5 0.292 926 16 #6 0.364 871 16 #7 0.437 806 16 #8 0.510 714 17 #9 0.583 629 18 #10 0.656 521 17 #11 0.729 422 17 #12 0.802 335 16 #13 0.875 254 16 #14 0.947 198 15 #15 1.020 138 15 #16 1.093 101 14 #17 1.166 74 13 #18 1.239 54 12 #19 1.312 43 11 #20 1.385 29 12 #21 1.458 22 10 #22 1.530 16 10 #23 1.603 15 10 #24 1.676 7 11 #25 1.749 5 11 #26 1.822 5 11 #27 1.895 2 12 #28 1.968 1 12 #29 2.041 1 13 #30 2.113 0 13 setwd("C:/Users/panxx014/Documents/courses/7475/Examples/figs") pdf("ex4.2.pdf") #plot the cross-validated error curves vs shrinkage parameter pamr.plotcv(myfit.cv) #plot the cross-validated sample probabilities pamr.plotcvprob(myfit.cv, mydata, threshold=1.603) #plot the shrunken class centroids; but for this example, it gives some error message #pamr.plotcen(myfit, mydata, threshold=1.603) #plot genes's expression values in each class pamr.geneplot(myfit, mydata, threshold=1.603) dev.off() #print out the confusion matrix: pamr.confusion(myfit, threshold=1.603) # 1 2 3 4 Class Error rate #1 3 0 2 0 0.4 #2 0 0 4 0 1.0 #3 0 0 6 0 0.0 #4 0 0 0 5 0.0 #Overall error rate= 0.28 ##Is the above error rate 0.28 reasonable? why or why not? pamr.confusion(myfit.cv, threshold=1.603) # 1 2 3 4 Class Error rate #1 1 0 2 2 0.8 #2 0 0 4 0 1.0 #3 0 0 6 0 0.0 #4 0 0 2 3 0.4 #Overall error rate= 0.45 #predict for new data: pamr.predict(myfit, newx=matrix(rnorm(1000*100),ncol=100),threshold=1.603) # [1] 3 1 3 3 4 3 3 3 4 1 3 3 3 3 4 3 3 3 3 3 4 3 3 3 3 3 3 3 3 3 3 3 3 4 3 3 3 # [38] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 # [75] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 3 3 3 3 3 3 3 3 #Levels: 1 2 3 4