Last active
March 10, 2018 02:36
-
-
Save valexandersaulys/0fbab2a04ba4d7ddaabc to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Timing it | |
| from time import time | |
| start_time = time(); | |
| # Import data | |
| from sklearn.datasets import fetch_mldata | |
| from sklearn.cross_validation import train_test_split | |
| mnist = fetch_mldata('MNIST original'); | |
| x_train,x_test,y_train,y_test = train_test_split(mnist.data, mnist.target); | |
| # Train Random Forest (picked arbitarily high number of trees) | |
| from sklearn.ensemble import RandomForestClassifier | |
| rf = RandomForestClassifier(n_estimators=10000, n_jobs=6, verbose=1); | |
| print "fitting the randomforest model" | |
| rf = rf.fit(x_train, y_train); | |
| print "----- took %f seconds -----" % float(time() - start_time); | |
| """ | |
| Around here the memory footprint starts to jump on my computer (32gb of RAM) | |
| Not sure exactly why its caused, but as an example I'll keep this as is | |
| Yes, there are a lot of little models in RandomForest, but I'm still surprised | |
| its not smaller as I've trained 1000 estimator models that have taken up a much | |
| smaller RAM footprint. | |
| """ | |
| # Get predictions --> fitting to the model's findings after all | |
| test_preds = rf.predict(x_test); | |
| train_preds = rf.predict(x_train); | |
| # Train Compressed Model | |
| from keras.models import Sequential | |
| from keras.layers.core import Dense, Activation | |
| print "compressing the model" | |
| compressed_model = Sequential(); | |
| compressed_model.add(Dense(2, init='uniform', input_dim=784)); | |
| compressed_model.add(Activation('softmax')); | |
| compressed_model.compile(optimizer='sgd',loss='mse'); | |
| print "----- took %f seconds -----" % float(time() - start_time); | |
| # Combine the predictions into one list | |
| import numpy as np | |
| target_values = np.concatenate((test_preds, train_preds),axis=1); | |
| x_values = np.concatenate((x_train + x_test),axis=1); | |
| print x_values.shape; | |
| print x_train.shape; | |
| print x_test.shape; | |
| print target_values.shape; | |
| # Fit the compressed model | |
| compressed_model.fit(x_values, target_values, nb_epoch=100, batch_size=32); | |
| # save both to get sizes | |
| from sklearn.externals import joblib | |
| joblib.dump(rf, 'randomForest.pkl'); | |
| compressed_model.save_weights("compressed_model.h5"); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment