#Stat Learning and Data Mining
#Example 8.1: Use of SVM

library(e1071)

data(iris)
#     This famous (Fisher's or Anderson's) iris data set gives the
#     measurements in centimeters of the variables sepal length and
#     width and petal length and width, respectively, for 50 flowers
#     from each of 3 species of iris.  The species are _Iris setosa_,
#     _versicolor_, and _virginica_.

iris[1:3,]
#  Sepal.Length Sepal.Width Petal.Length Petal.Width Species
#1          5.1         3.5          1.4         0.2  setosa
#2          4.9         3.0          1.4         0.2  setosa
#3          4.7         3.2          1.3         0.2  setosa

###fit a SVM:
#training data: n=100; test data: n=50
set.seed(022104)
tr<-sample(1:150, 100)
ir.svm0 <- svm(Species ~., data=iris[tr,])

####################Manual for svm():
Usage:

     ## S3 method for class 'formula':
     svm(formula, data = NULL, ..., subset, na.action =
     na.omit, scale = TRUE)
     ## Default S3 method:
     svm(x, y = NULL, scale = TRUE, type = NULL, kernel =
     "radial", degree = 3, gamma = 1 / ncol(as.matrix(x)), coef0 = 0, cost = 1, nu = 0.5,
     class.weights = NULL, cachesize = 40, tolerance = 0.001, epsilon = 0.1,
     shrinking = TRUE, cross = 0, fitted = TRUE, ..., subset, na.action = na.omit)

Arguments:

 formula: a symbolic description of the model to be fit. Note, that an
          intercept is always included, whether given in the formula or
          not.

    data: an optional data frame containing the variables in the model.
          By default the variables are taken from the environment which
          `svm' is called from.

       x: a data matrix, a vector, or a sparse matrix (object of class
          'matrix.csr' as provided by the package 'SparseM').

       y: a response vector with one label for each row/component of
          'x'. Can be either a factor (for classification tasks) or a
          numeric vector (for regression).

   scale: A logical vector indicating the variables to be scaled. If
          'scale' is of length 1, the value is recycled as many times
          as needed. Per default, data are scaled internally (both 'x'
          and 'y' variables) to zero mean and unit variance. The center
          and scale values are returned and used for later predictions.

    type: 'svm' can be used as a classification machine, as a regresson
          machine, or for novelty detection. Depending of whether 'y'
          is a factor or not, the default setting for 'type' is
          'C-classification' or 'eps-regression', respectively, but may
          be overwritten by setting an explicit value.
           Valid options are:

             *  'C-classification'

             *  'nu-classification'

             *  'one-classification' (for novelty detection)

             *  'eps-regression'

             *  'nu-regression'

  kernel: the kernel used in training and predicting. You might
          consider changing some of the following parameters, depending
          on the kernel type.

          linear: u'*v

          polynomial: (gamma*u'*v + coef0)^degree

          radial basis: exp(-gamma*|u-v|^2)

          sigmoid: tanh(gamma*u'*v + coef0)

  degree: parameter needed for kernel of type 'polynomial' (default: 3)

   gamma: parameter needed for all kernels except 'linear' (default:
          1/(data dimension))

   coef0: parameter needed for kernels of type 'polynomial' and
          'sigmoid' (default: 0)

    cost: cost of constraints violation (default: 1)-it is the
          `C'-constant of the regularization term in the Lagrange
          formulation.

      nu: parameter needed for 'nu-classification' and
          'one-classification'

class.weights: a named vector of weights for the different classes,
          used for asymetric class sizes. Not all factor levels have to
          be supplied (default weight: 1). All components have to be
          named.

cachesize: cache memory in MB (default 40)

tolerance: tolerance of termination criterion (default: 0.001)

 epsilon: epsilon in the insensitive-loss function (default: 0.1)

shrinking: option whether to use the shrinking-heuristics (default:
          'TRUE')

   cross: if a integer value k>0 is specified, a k-fold cross
          validation on the training data is performed to assess the
          quality of the model: the accuracy rate for classification
          and the Mean Sqared Error for regression

  fitted: indicates whether the fitted values should be computed and
          included in the model or not (default: 'TRUE')

     ...: additional parameters for the low level fitting function
          'svm.default'

  subset: An index vector specifying the cases to be used in the
          training sample.  (NOTE: If given, this argument must be
          named.)

na.action: A function to specify the action to be taken if 'NA's are
          found. The default action is 'na.omit', which leads to
          rejection of cases with missing values on any required
          variable. An alternative is 'na.fail', which causes an error
          if 'NA' cases are found. (NOTE: If given, this argument must
          be named.)

Details:

     For multiclass-classification with k levels, k>2, 'libsvm' uses
     the `one-against-one'-approach, in which k(k-1)/2 binary
     classifiers are trained; the appropriate class is found by a
     voting scheme.

####################End of manual for svm()

#model-fitting summary:
summary(ir.svm0)

#Call:
# svm.formula(formula = Species ~ ., data = iris[tr, ]) 

#Parameters:
#   SVM-Type:  C-classification 
# SVM-Kernel:  radial 
#       cost:  1 
#      gamma:  0.25 

#Number of Support Vectors:  39
# ( 8 16 15 )

#Number of Classes:  3 

#Levels: 
# setosa versicolor virginica


#########doing prediction:
table(iris$Species[-tr], predict(ir.svm0, iris[-tr,]))
#             setosa versicolor virginica
#  setosa     11      0          0       
#  versicolor  0     19          1       
#  virginica   0      1         18

#########Using k-fold CV to select parameters (cost, gamma):
ir.svm0cv <- svm(Species ~., data=iris[tr,], cross=5)
summary(ir.svm0cv)
#...same as before...
#5-fold cross-validation on training data:
#Total Accuracy: 98 
#Single Accuracies:
# 100 95 95 100 100 

ir.svm1cv <- svm(Species ~., data=iris[tr,], gamma=0.5, cross=5)
summary(ir.svm1cv)
#5-fold cross-validation on training data:
#Total Accuracy: 96 
#Single Accuracies:
# 95 100 100 90 95

ir.svm2cv <- svm(Species ~., data=iris[tr,], cost=2, gamma=0.5, cross=5)
summary(ir.svm2cv)
#5-fold cross-validation on training data:
#Total Accuracy: 96 
#Single Accuracies:
# 90 90 100 100 100

ir.svm3cv <- svm(Species ~., data=iris[tr,], cost=2, gamma=0.25, cross=5)
summary(ir.svm3cv)
#Total Accuracy: 97 
#Single Accuracies:
# 95 100 90 100 100

#########using tune() to do automatic grid-search in CV:
ir.tune<-tune(svm, Species ~., data=iris, 
              ranges=list(gamma=2^(-5:5), cost=2^(-5:5)),
              control = tune.control(sampling="cross", cross=5))
summary(ir.tune)
#Parameter tuning of `svm':
#- sampling method: 5-fold cross validation 
#- best parameters:
#  gamma cost
# 0.0625    4
#- best performance: 0.02666667 
#- Detailed performance results:
#       gamma     cost      error
#1    0.03125  0.03125 0.66000000
#2    0.06250  0.03125 0.50000000
#...
#59   0.25000  1.00000 0.04000000
#...

#or
#plot(ir.tune)
# a scatter plot of performance vs (gamma, cost)

###########tune():
#    This generic function tunes hyperparameters of statistical methods
#     using a grid search over supplied parameter ranges.
# you can even try:
#tune.nnet(...,size,), 
#tune.randomForest(...,nodesize, mtry, ntree, ...) #nodesize=min size of terminal nodes
#tune.knn(...,k,...)

#########You may want to try other kernels for SVM too!

###############################################
## You may also try package "svmpath", which computes the entire
##    regulation path for the two-class SVM with almost the same cost as 
##    for a single SVM fit, thus facilitating CV for choosing tuning parameters.