This tutorial shows the hyperparameter tuning for MNIST dataset.
library(keras)
library(tensorflow)
library(kerastuneR)
if(tensorflow::tf_gpu_configured()) {
physical_devices = tf$config$list_physical_devices('GPU')
tf$config$experimental$set_memory_growth(physical_devices[[1]],TRUE)
}
# The data, shuffled and split between train and test sets
mnist <- dataset_mnist()
x_train <- mnist$train$x
y_train <- mnist$train$y
x_test <- mnist$test$x
y_test <- mnist$test$y
augment_images = function(x, hp) {
use_rotation = hp$Boolean('use_rotation')
if(use_rotation) {
x = tf$keras$layers$experimental$preprocessing$RandomRotation(
hp$Float('rotation_factor', min_value=0.05, max_value=0.2)
)(x)
}
use_zoom = hp$Boolean('use_zoom')
if(use_zoom) {
x = tf$keras$layers$experimental$preprocessing$RandomZoom(
hp$Float('use_zoom', min_value=0.05, max_value=0.2)
)(x)
}
x
}
make_model = function(hp) {
inputs = layer_input(shape=c(28, 28, 1))
x = tf$keras$layers$experimental$preprocessing$Rescaling(1. / 255)(inputs)
x = tf$keras$layers$experimental$preprocessing$Resizing(64L, 64L)(x)
x = augment_images(x, hp)
num_block = hp$Int('num_block', min_value=2, max_value=5, step=1)
num_filters = hp$Int('num_filters', min_value=32, max_value=128, step=32)
for (i in 1:length(num_block)) {
x = x %>% layer_conv_2d(
num_filters,
kernel_size=3,
activation='relu',
padding='same'
) %>%
layer_conv_2d(
num_filters,
kernel_size=3,
activation='relu',
padding='same'
) %>% layer_max_pooling_2d(2)
}
reduction_type = hp$Choice('reduction_type', c('flatten', 'avg'))
if(reduction_type == 'flatten') {
x = x %>% layer_flatten()
} else {
x = x %>% layer_global_average_pooling_2d()
}
x = x %>% layer_dense(
units=hp$Int('num_dense_units', min_value=32, max_value=512, step=32),
activation='relu'
) %>% layer_dropout(
hp$Float('dense_dropout', min_value = 0., max_value = 0.7)
)
outputs = x %>% layer_dense(10)
model = keras_model(inputs, outputs)
learning_rate = hp$Float('learning_rate', min_value = 3e-4, max_value = 3e-3)
optimizer = optimizer_adam(lr=1e-3)
model %>% compile(loss = tf$keras$losses$SparseCategoricalCrossentropy(from_logits = TRUE),
optimizer = optimizer,
metrics = tf$keras$metrics$SparseCategoricalAccuracy(name='acc'))
model %>% summary()
return(model)
}
tuner = RandomSearch(
make_model,
objective='val_acc',
max_trials=2,
overwrite=TRUE)
callbacks=callback_early_stopping(monitor = 'val_acc', mode = 'max',
patience = 3, baseline = 0.9)
tuner %>% fit_tuner(x_train, y_train, validation_split = 0.2,
callbacks = list(callbacks), verbose=1, epochs=2)
Extract model and retrain:
best_hp = tuner %>% get_best_models(1)
history = model %>% fit(x_train, y_train, validation_split = 0.2, epochs = 2)