Skip to content

Instantly share code, notes, and snippets.

@cjekel
Created April 29, 2020 16:41
Show Gist options
  • Select an option

  • Save cjekel/6b223969b9fad82218dd92e833b23ee1 to your computer and use it in GitHub Desktop.

Select an option

Save cjekel/6b223969b9fad82218dd92e833b23ee1 to your computer and use it in GitHub Desktop.
Example of keras hyperparamter turning with bayesian optimization
# -- coding: utf-8 --
# MIT License
#
# Copyright (c) 2020 Charles Jekel (cj@jekel.me)
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
#
# keras_hyper_tune.py
# hyper-parameter tuning to select the best 10-fold CV accuracy
# This uses keras+tensorflow, and was run using a NVidia TitanXP.
# GPyOpt is the Bayesian Optimization strategy to find the best
# Neural Network architecture and hyper-parameters
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import Adam
import keras.backend as K
from GPyOpt.methods import BayesianOptimization
X = np.load('X_train.npy')
scaler = StandardScaler()
X = scaler.fit_transform(X)
Y = np.load('Y.npy')
Y = Y[:, 1:10]
print(Y.shape)
n_data, n_features = X.shape
n_data, n_classes = Y.shape
Y_mean = Y.mean()
def build_keras_sequential_model(n_layers, dropout_p, n_units, lr):
"""
Build a sequentail keras model
Input:
n_layers : number of layer (int)
dropout_p : dropout percentage (float)
n_units : number of neruons for each layer (int)
lr : Adam learning rate (float)
Returns:
Keras Sequential model
"""
model = Sequential()
for _ in range(n_layers):
model.add(Dense(n_units, activation='relu', input_dim=n_features))
model.add(Dropout(dropout_p))
model.add(Dense(n_classes, activation='sigmoid',
kernel_initializer='zeros',
bias_initializer=keras.initializers.Constant(value=Y_mean)
)) # noqa E5011
adam = Adam(lr=lr)
model.compile(loss='binary_crossentropy',
optimizer=adam,
metrics=['accuracy'],
)
return model
def compute_cv_score(n_layers, dropout_p, n_units, lr, epochs):
"""
Compute a sequential keras CV accuracy score
Input:
n_layers : number of layer (int)
dropout_p : dropout percentage (float)
n_units : number of neruons for each layer (int)
lr : Adam learning rate (float)
epochs : number of epochs (int)
Returns:
CV total accuracy score
"""
n_folds = 10
kf = KFold(n_splits=n_folds, random_state=121)
ind_scores = np.zeros((n_folds, 9))
# compute the CV score
for i, (train, test) in enumerate(kf.split(X)):
X_train, X_test, Y_train, Y_test = X[train], X[test], Y[train], Y[test]
model = build_keras_sequential_model(n_layers, dropout_p, n_units, lr)
model.fit(X_train, Y_train,
verbose=0,
epochs=epochs,
batch_size=n_data,
validation_data=(X_test, Y_test))
y_hat = model.predict(X_test)
y_hat = np.round(y_hat)
K.clear_session() # this prevents a memory leak
for j in range(9):
ind_scores[i, j] = accuracy_score(Y_test[:, j], y_hat[:, j])
my_score = ind_scores.mean()
print('\n Total accuracy CV score:', my_score)
return my_score
def convert_for_gpyopt(x):
f = np.zeros(x.shape[0])
for i, j in enumerate(x):
f[i] = compute_cv_score(int(j[0]), j[1], int(j[2]), j[3], int(j[4]))
return f
bounds = [{'name': 'n_layers', 'type': 'discrete', 'domain': np.arange(1, 7, dtype=np.int)}, # noqa E501
{'name': 'dropout_p', 'type': 'continuous', 'domain': [0.025, 0.5]},
{'name': 'n_units', 'type': 'discrete', 'domain': np.arange(2, 513, dtype=np.int)}, # noqa E501
{'name': 'lr', 'type': 'continuous', 'domain': [1e-5, 1e-2]},
{'name': 'epochs', 'type': 'discrete', 'domain': np.logspace(1, 3, dtype=np.int)} # noqa E501
]
initial_X = np.array([[4, 0.5, 20, 3e-4, 1000],
[4, 0.5, 20, 3e-4, 100],
[6, 0.5, 512, 3e-4, 100]])
initial_f = -1*convert_for_gpyopt(initial_X)
max_iter = 200
myBopt = BayesianOptimization(convert_for_gpyopt, domain=bounds,
model_type='GP',
initial_design_numdata=0,
exact_feval=True,
verbosity=True, verbosity_model=False,
maximize=True
)
myBopt.X = initial_X
myBopt.Y = -1*initial_f.reshape(-1, 1)
myBopt.run_optimization(max_iter=max_iter, eps=1e-6, verbosity=True,
report_file='gp_opt_results.txt',
evaluations_file='gp_evals.txt')
np.save('myBoptX.npy', myBopt.X)
np.save('myBoptY.npy', myBopt.Y)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment