## Stat Learning and Data Mining ## Convolutional Neural Networks in R ## by Mengli Xiao # Uncomment to install the package "keras" # install.packages("keras") set.seed(7475) library(keras) # Uncomment this if this is your first time installation of keras. It will provide you with default CPU-based installations of Keras and TensorFlow # install_keras() use_backend("tensorflow") batch_size <- 128 num_classes <- 10 epochs <- 100 # load the data -------------------------------------------------------- # shuffle and split between train and test sets mnist <- dataset_mnist() x_train <- mnist$train$x y_train <- mnist$train$y x_test <- mnist$test$x y_test <- mnist$test$y img_rows <- 28 img_cols <- 28 str(x_train) x_train <- array_reshape(x_train,c(nrow(x_train),img_rows,img_cols,1)) str(x_train) x_test <- array_reshape(x_test, c(nrow(x_test), img_rows, img_cols, 1)) input_shape <- c(img_rows, img_cols, 1) # Standardize the RGB values to (0,1) (Optional) x_train <- x_train/255 x_test <- x_test/255 str(y_train) # convert class vectors to binary class matrices y_train <- to_categorical(y_train, num_classes) y_test <- to_categorical(y_test, num_classes) # Define the model structure ----------------------------------------------------------------------------------- model <- keras_model_sequential()%>% # The first convolution layer with 32 filters/kernels and each kernel is a 3 by 3 matrix # The default padding setting is no-padding, and you can change the padding to be "same" in order to get the same # size of the output as the input. # The default strides setting is 1, and you can change the strides by setting strides= an integer number layer_conv_2d(filter=32, kernel_size=c(3,3),activation='relu',input_shape = input_shape) %>% # The second convolution layer with 32 filters/kernels and each kernel is a 3 by 3 matrix layer_conv_2d(filter=32, kernel_size=c(3,3),activation='relu') %>% # Max pooling reduces sparsity and coarse-grains the strong representation of the features layer_max_pooling_2d(pool_size=c(2,2)) %>% layer_flatten() %>% # fully-connected layer layer_dense(units=128,activation='relu')%>% # Regularization layer. In the training process, removing randomly selected 50% of the neurons, resulting in sampled subnetworks. The gradient are set to 0 for those removed neurons. The average gradient over the subnetworks is calculated for each neuron in each training batch # The dropout rate 0.5 and the dropout layer's location is also a tuning parameters (e.g., add flag_numeric=c('dropout_1',0.25,'first_dropout') into FLAGS) layer_dropout(rate=0.5)%>% # Softmax serves as the non-linearity at the end and it outputs the probability # for all the ten categories(ten digits) layer_dense(units=num_classes,activation='softmax') # Compile model ------------------------------------------------------------------------------------- # Keras has most common optimizers, and the choice of optimizer is tunable # Loss function is also a tuning parameter # The metrics can use any customized R function that takes the tensor as its input. The default will record both accuracy and loss model %>% compile( loss=loss_categorical_crossentropy,optimizer='sgd',metrics=c('accuracy') ) # Stop training when the validaion accuracy stops improving after 2 epochs early_stopping <- callback_early_stopping(monitor = 'val_acc', patience = 2) m1.hist <- model %>% fit( x_train,y_train, batch_size=batch_size, epochs=epochs, validation_split=0.2, # split the 20% of the training data to be the validation data, and the validation data is the same for every epoch callbacks=c(early_stopping) # delete this to run over all 100 epochs ) # Plot the accuracy and loss of the training and validation plot(m1.hist) # Store the result m1result <- as.data.frame(m1.hist) # Get the metrics of the test performance # Verbose means how you present the process in the progress bar test_metric <- model %>% evaluate( x_test,y_test,verbose=0 ) # loss test_metric[1] # $loss # [1] 0.0947 # accuracy test_metric[2] # $acc # [1] 0.9716 # Find which epoch the training stops at max(m1result$epoch[!is.na(m1result$value)]) # [1] 20