cjekel · April 29, 2020 16:41
diff --git a/keras_hyper_tune.py b/keras_hyper_tune.py
 # -- coding: utf-8 --
 # MIT License
 #
 # Copyright (c) 2020 Charles Jekel (cj@jekel.me)
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
 # in the Software without restriction, including without limitation the rights
 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 # copies of the Software, and to permit persons to whom the Software is
 # furnished to do so, subject to the following conditions:
 #
 # The above copyright notice and this permission notice shall be included in
 # all copies or substantial portions of the Software.
 #
 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
 #
 # keras_hyper_tune.py
 # hyper-parameter tuning to select the best 10-fold CV accuracy
 # This uses keras+tensorflow, and was run using a NVidia TitanXP.
 # GPyOpt is the Bayesian Optimization strategy to find the best
 # Neural Network architecture and hyper-parameters
 import numpy as np
 from sklearn.preprocessing import StandardScaler
 from sklearn.model_selection import KFold
 from sklearn.metrics import accuracy_score

 import keras
 from keras.models import Sequential
 from keras.layers import Dense, Dropout
 from keras.optimizers import Adam
 import keras.backend as K

 from GPyOpt.methods import BayesianOptimization

 X = np.load('X_train.npy')
 scaler = StandardScaler()

 X = scaler.fit_transform(X)

 Y = np.load('Y.npy')
 Y = Y[:, 1:10]
 print(Y.shape)
 n_data, n_features = X.shape
 n_data, n_classes = Y.shape
 Y_mean = Y.mean()


 def build_keras_sequential_model(n_layers, dropout_p, n_units, lr):
    """
    Build a sequentail keras model

    Input:
    n_layers : number of layer (int)
    dropout_p : dropout percentage (float)
    n_units : number of neruons for each layer (int)
    lr : Adam learning rate (float)

    Returns:
    Keras Sequential model
    """
    model = Sequential()
    for _ in range(n_layers):
        model.add(Dense(n_units, activation='relu', input_dim=n_features))
        model.add(Dropout(dropout_p))
    model.add(Dense(n_classes, activation='sigmoid',
                    kernel_initializer='zeros',
                    bias_initializer=keras.initializers.Constant(value=Y_mean)
                    ))  # noqa E5011
    adam = Adam(lr=lr)
    model.compile(loss='binary_crossentropy',
                  optimizer=adam,
                  metrics=['accuracy'],
                  )
    return model


 def compute_cv_score(n_layers, dropout_p, n_units, lr, epochs):
    """
    Compute a sequential keras CV accuracy score

    Input:
    n_layers : number of layer (int)
    dropout_p : dropout percentage (float)
    n_units : number of neruons for each layer (int)
    lr : Adam learning rate (float)
    epochs : number of epochs (int)

    Returns:
    CV total accuracy score
    """
    n_folds = 10
    kf = KFold(n_splits=n_folds, random_state=121)
    ind_scores = np.zeros((n_folds, 9))
    # compute the CV score
    for i, (train, test) in enumerate(kf.split(X)):
        X_train, X_test, Y_train, Y_test = X[train], X[test], Y[train], Y[test]
        model = build_keras_sequential_model(n_layers, dropout_p, n_units, lr)
        model.fit(X_train, Y_train,
                  verbose=0,
                  epochs=epochs,
                  batch_size=n_data,
                  validation_data=(X_test, Y_test))
        y_hat = model.predict(X_test)
        y_hat = np.round(y_hat)
        K.clear_session()  # this prevents a memory leak
        for j in range(9):
            ind_scores[i, j] = accuracy_score(Y_test[:, j], y_hat[:, j])
    my_score = ind_scores.mean()
    print('\n Total accuracy CV score:', my_score)
    return my_score


 def convert_for_gpyopt(x):
    f = np.zeros(x.shape[0])
    for i, j in enumerate(x):
        f[i] = compute_cv_score(int(j[0]), j[1], int(j[2]), j[3], int(j[4]))
    return f


 bounds = [{'name': 'n_layers', 'type': 'discrete', 'domain': np.arange(1, 7, dtype=np.int)},  # noqa E501
          {'name': 'dropout_p', 'type': 'continuous', 'domain': [0.025, 0.5]},
          {'name': 'n_units', 'type': 'discrete', 'domain': np.arange(2, 513, dtype=np.int)},  # noqa E501
          {'name': 'lr', 'type': 'continuous', 'domain': [1e-5, 1e-2]},
          {'name': 'epochs', 'type': 'discrete', 'domain': np.logspace(1, 3, dtype=np.int)}  # noqa E501
          ]

 initial_X = np.array([[4, 0.5, 20, 3e-4, 1000],
                      [4, 0.5, 20, 3e-4, 100],
                      [6, 0.5, 512, 3e-4, 100]])

 initial_f = -1*convert_for_gpyopt(initial_X)

 max_iter = 200
 myBopt = BayesianOptimization(convert_for_gpyopt, domain=bounds,
                              model_type='GP',
                              initial_design_numdata=0,
                              exact_feval=True,
                              verbosity=True, verbosity_model=False,
                              maximize=True
                              )
 myBopt.X = initial_X
 myBopt.Y = -1*initial_f.reshape(-1, 1)
 myBopt.run_optimization(max_iter=max_iter, eps=1e-6, verbosity=True,
                        report_file='gp_opt_results.txt',
                        evaluations_file='gp_evals.txt')

 np.save('myBoptX.npy', myBopt.X)
 np.save('myBoptY.npy', myBopt.Y)
	# -- coding: utf-8 --
	# MIT License
	#
	# Copyright (c) 2020 Charles Jekel (cj@jekel.me)
	#
	# Permission is hereby granted, free of charge, to any person obtaining a copy
	# of this software and associated documentation files (the "Software"), to deal
	# in the Software without restriction, including without limitation the rights
	# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
	# copies of the Software, and to permit persons to whom the Software is
	# furnished to do so, subject to the following conditions:
	#
	# The above copyright notice and this permission notice shall be included in
	# all copies or substantial portions of the Software.
	#
	# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
	# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
	# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
	# SOFTWARE.
	#
	# keras_hyper_tune.py
	# hyper-parameter tuning to select the best 10-fold CV accuracy
	# This uses keras+tensorflow, and was run using a NVidia TitanXP.
	# GPyOpt is the Bayesian Optimization strategy to find the best
	# Neural Network architecture and hyper-parameters
	import numpy as np
	from sklearn.preprocessing import StandardScaler
	from sklearn.model_selection import KFold
	from sklearn.metrics import accuracy_score

	import keras
	from keras.models import Sequential
	from keras.layers import Dense, Dropout
	from keras.optimizers import Adam
	import keras.backend as K

	from GPyOpt.methods import BayesianOptimization

	X = np.load('X_train.npy')
	scaler = StandardScaler()

	X = scaler.fit_transform(X)

	Y = np.load('Y.npy')
	Y = Y[:, 1:10]
	print(Y.shape)
	n_data, n_features = X.shape
	n_data, n_classes = Y.shape
	Y_mean = Y.mean()


	def build_keras_sequential_model(n_layers, dropout_p, n_units, lr):
	"""
	Build a sequentail keras model

	Input:
	n_layers : number of layer (int)
	dropout_p : dropout percentage (float)
	n_units : number of neruons for each layer (int)
	lr : Adam learning rate (float)

	Returns:
	Keras Sequential model
	"""
	model = Sequential()
	for _ in range(n_layers):
	model.add(Dense(n_units, activation='relu', input_dim=n_features))
	model.add(Dropout(dropout_p))
	model.add(Dense(n_classes, activation='sigmoid',
	kernel_initializer='zeros',
	bias_initializer=keras.initializers.Constant(value=Y_mean)
	)) # noqa E5011
	adam = Adam(lr=lr)
	model.compile(loss='binary_crossentropy',
	optimizer=adam,
	metrics=['accuracy'],
	)
	return model


	def compute_cv_score(n_layers, dropout_p, n_units, lr, epochs):
	"""
	Compute a sequential keras CV accuracy score

	Input:
	n_layers : number of layer (int)
	dropout_p : dropout percentage (float)
	n_units : number of neruons for each layer (int)
	lr : Adam learning rate (float)
	epochs : number of epochs (int)

	Returns:
	CV total accuracy score
	"""
	n_folds = 10
	kf = KFold(n_splits=n_folds, random_state=121)
	ind_scores = np.zeros((n_folds, 9))
	# compute the CV score
	for i, (train, test) in enumerate(kf.split(X)):
	X_train, X_test, Y_train, Y_test = X[train], X[test], Y[train], Y[test]
	model = build_keras_sequential_model(n_layers, dropout_p, n_units, lr)
	model.fit(X_train, Y_train,
	verbose=0,
	epochs=epochs,
	batch_size=n_data,
	validation_data=(X_test, Y_test))
	y_hat = model.predict(X_test)
	y_hat = np.round(y_hat)
	K.clear_session() # this prevents a memory leak
	for j in range(9):
	ind_scores[i, j] = accuracy_score(Y_test[:, j], y_hat[:, j])
	my_score = ind_scores.mean()
	print('\n Total accuracy CV score:', my_score)
	return my_score


	def convert_for_gpyopt(x):
	f = np.zeros(x.shape[0])
	for i, j in enumerate(x):
	f[i] = compute_cv_score(int(j[0]), j[1], int(j[2]), j[3], int(j[4]))
	return f


	bounds = [{'name': 'n_layers', 'type': 'discrete', 'domain': np.arange(1, 7, dtype=np.int)}, # noqa E501
	{'name': 'dropout_p', 'type': 'continuous', 'domain': [0.025, 0.5]},
	{'name': 'n_units', 'type': 'discrete', 'domain': np.arange(2, 513, dtype=np.int)}, # noqa E501
	{'name': 'lr', 'type': 'continuous', 'domain': [1e-5, 1e-2]},
	{'name': 'epochs', 'type': 'discrete', 'domain': np.logspace(1, 3, dtype=np.int)} # noqa E501
	]

	initial_X = np.array([[4, 0.5, 20, 3e-4, 1000],
	[4, 0.5, 20, 3e-4, 100],
	[6, 0.5, 512, 3e-4, 100]])

	initial_f = -1*convert_for_gpyopt(initial_X)

	max_iter = 200
	myBopt = BayesianOptimization(convert_for_gpyopt, domain=bounds,
	model_type='GP',
	initial_design_numdata=0,
	exact_feval=True,
	verbosity=True, verbosity_model=False,
	maximize=True
	)
	myBopt.X = initial_X
	myBopt.Y = -1*initial_f.reshape(-1, 1)
	myBopt.run_optimization(max_iter=max_iter, eps=1e-6, verbosity=True,
	report_file='gp_opt_results.txt',
	evaluations_file='gp_evals.txt')

	np.save('myBoptX.npy', myBopt.X)
	np.save('myBoptY.npy', myBopt.Y)
No results found