oftensmile · April 5, 2016 20:25
diff --git a/model_compression.py b/model_compression.py
 # Timing it
 from time import time
 start_time = time();

 # Import data
 from sklearn.datasets import fetch_mldata
 from sklearn.cross_validation import train_test_split
 mnist = fetch_mldata('MNIST original');
 x_train,x_test,y_train,y_test = train_test_split(mnist.data, mnist.target);

 # Train Random Forest (picked arbitarily high number of trees)
 from sklearn.ensemble import RandomForestClassifier
 rf = RandomForestClassifier(n_estimators=10000, n_jobs=6, verbose=1);
 print "fitting the randomforest model"
 rf = rf.fit(x_train, y_train);
 print "----- took %f seconds -----" % float(time() - start_time);

 """
 Around here the memory footprint starts to jump on my computer (32gb of RAM)
 Not sure exactly why its caused, but as an example I'll keep this as is
 Yes, there are a lot of little models in RandomForest, but I'm still surprised
 its not smaller as I've trained 1000 estimator models that have taken up a much
 smaller RAM footprint.
 """

 # Get predictions --> fitting to the model's findings after all
 test_preds = rf.predict(x_test);
 train_preds = rf.predict(x_train);

 # Train Compressed Model
 from keras.models import Sequential
 from keras.layers.core import Dense, Activation
 print "compressing the model"
 compressed_model = Sequential();
 compressed_model.add(Dense(2, init='uniform', input_dim=784));
 compressed_model.add(Activation('softmax'));
 compressed_model.compile(optimizer='sgd',loss='mse');
 print "----- took %f seconds -----" % float(time() - start_time);

 # Combine the predictions into one list
 import numpy as np
 target_values = np.concatenate((test_preds, train_preds),axis=1);
 x_values = np.concatenate((x_train + x_test),axis=1);
 print x_values.shape;
 print x_train.shape;
 print x_test.shape;
 print target_values.shape;

 # Fit the compressed model
 compressed_model.fit(x_values, target_values, nb_epoch=100, batch_size=32);

 # save both to get sizes
 from sklearn.externals import joblib
 joblib.dump(rf, 'randomForest.pkl');
 compressed_model.save_weights("compressed_model.h5");
	# Timing it
	from time import time
	start_time = time();

	# Import data
	from sklearn.datasets import fetch_mldata
	from sklearn.cross_validation import train_test_split
	mnist = fetch_mldata('MNIST original');
	x_train,x_test,y_train,y_test = train_test_split(mnist.data, mnist.target);

	# Train Random Forest (picked arbitarily high number of trees)
	from sklearn.ensemble import RandomForestClassifier
	rf = RandomForestClassifier(n_estimators=10000, n_jobs=6, verbose=1);
	print "fitting the randomforest model"
	rf = rf.fit(x_train, y_train);
	print "----- took %f seconds -----" % float(time() - start_time);

	"""
	Around here the memory footprint starts to jump on my computer (32gb of RAM)
	Not sure exactly why its caused, but as an example I'll keep this as is
	Yes, there are a lot of little models in RandomForest, but I'm still surprised
	its not smaller as I've trained 1000 estimator models that have taken up a much
	smaller RAM footprint.
	"""

	# Get predictions --> fitting to the model's findings after all
	test_preds = rf.predict(x_test);
	train_preds = rf.predict(x_train);

	# Train Compressed Model
	from keras.models import Sequential
	from keras.layers.core import Dense, Activation
	print "compressing the model"
	compressed_model = Sequential();
	compressed_model.add(Dense(2, init='uniform', input_dim=784));
	compressed_model.add(Activation('softmax'));
	compressed_model.compile(optimizer='sgd',loss='mse');
	print "----- took %f seconds -----" % float(time() - start_time);

	# Combine the predictions into one list
	import numpy as np
	target_values = np.concatenate((test_preds, train_preds),axis=1);
	x_values = np.concatenate((x_train + x_test),axis=1);
	print x_values.shape;
	print x_train.shape;
	print x_test.shape;
	print target_values.shape;

	# Fit the compressed model
	compressed_model.fit(x_values, target_values, nb_epoch=100, batch_size=32);

	# save both to get sizes
	from sklearn.externals import joblib
	joblib.dump(rf, 'randomForest.pkl');
	compressed_model.save_weights("compressed_model.h5");
No results found