Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Select an option

  • Save oftensmile/e53210d058d7222550136cad2507ea66 to your computer and use it in GitHub Desktop.

Select an option

Save oftensmile/e53210d058d7222550136cad2507ea66 to your computer and use it in GitHub Desktop.
# Timing it
from time import time
start_time = time();
# Import data
from sklearn.datasets import fetch_mldata
from sklearn.cross_validation import train_test_split
mnist = fetch_mldata('MNIST original');
x_train,x_test,y_train,y_test = train_test_split(mnist.data, mnist.target);
# Train Random Forest (picked arbitarily high number of trees)
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(n_estimators=10000, n_jobs=6, verbose=1);
print "fitting the randomforest model"
rf = rf.fit(x_train, y_train);
print "----- took %f seconds -----" % float(time() - start_time);
"""
Around here the memory footprint starts to jump on my computer (32gb of RAM)
Not sure exactly why its caused, but as an example I'll keep this as is
Yes, there are a lot of little models in RandomForest, but I'm still surprised
its not smaller as I've trained 1000 estimator models that have taken up a much
smaller RAM footprint.
"""
# Get predictions --> fitting to the model's findings after all
test_preds = rf.predict(x_test);
train_preds = rf.predict(x_train);
# Train Compressed Model
from keras.models import Sequential
from keras.layers.core import Dense, Activation
print "compressing the model"
compressed_model = Sequential();
compressed_model.add(Dense(2, init='uniform', input_dim=784));
compressed_model.add(Activation('softmax'));
compressed_model.compile(optimizer='sgd',loss='mse');
print "----- took %f seconds -----" % float(time() - start_time);
# Combine the predictions into one list
import numpy as np
target_values = np.concatenate((test_preds, train_preds),axis=1);
x_values = np.concatenate((x_train + x_test),axis=1);
print x_values.shape;
print x_train.shape;
print x_test.shape;
print target_values.shape;
# Fit the compressed model
compressed_model.fit(x_values, target_values, nb_epoch=100, batch_size=32);
# save both to get sizes
from sklearn.externals import joblib
joblib.dump(rf, 'randomForest.pkl');
compressed_model.save_weights("compressed_model.h5");
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment