Last active
January 18, 2016 02:18
-
-
Save neilsummers/446b433e24ddf3466c0a to your computer and use it in GitHub Desktop.
GBM sklearn wrapper
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import numpy as np | |
| from sklearn.base import BaseEstimator, RegressorMixin | |
| import rpy2 | |
| from rpy2.robjects.numpy2ri import numpy2ri | |
| from rpy2.robjects.packages import importr | |
| gbm = importr('gbm') | |
| class GBMRegressor(BaseEstimator, RegressorMixin): | |
| def __init__(self, learning_rate=0.1, n_estimators=100, max_features=1.0, verbose=False, | |
| min_samples_leaf=1, max_depth=3, subsample=1.0, random_state=None): | |
| self.learning_rate = learning_rate | |
| self.n_estimators = n_estimators | |
| self.max_features = max_features | |
| self.verbose = verbose | |
| self.min_samples_leaf = min_samples_leaf | |
| self.max_depth = max_depth | |
| self.subsample = subsample | |
| self.random_state = random_state | |
| def get_params(self, deep=True): | |
| return {'learning_rate': self.learning_rate, | |
| 'n_estimators': self.n_estimators, | |
| 'max_features': self.max_features, | |
| 'verbose': self.verbose, | |
| 'min_samples_leaf': self.min_samples_leaf, | |
| 'max_depth': self.max_depth, | |
| 'subsample': self.subsample, | |
| 'random_state': self.random_state | |
| } | |
| def get_gbm_params(self): | |
| d = {'distribution': 'gaussian', | |
| 'shrinkage': self.learning_rate, | |
| 'n.tree': self.n_estimators, | |
| 'bag.fraction': self.max_features, | |
| 'verbose': self.verbose, | |
| 'n.minobsinnode': self.min_samples_leaf, | |
| 'train.fraction': self.subsample, | |
| 'interaction.depth': self.max_depth, | |
| } | |
| if self.subsample==1.0: del d['train.fraction'] | |
| return d | |
| def fit(self, X, y): | |
| X_ = numpy2ri(X) | |
| y_ = numpy2ri(y) | |
| if self.random_state is not None: | |
| rpy2.robjects.r('set.seed(%i)'%(self.random_state)) | |
| self.gbm = gbm.gbm_fit(X_, y_, **self.get_gbm_params()) | |
| return self | |
| def predict(self, X, ntree_limit=None): | |
| X_ = numpy2ri(X) | |
| if not ntree_limit: | |
| ntree_limit = self.n_estimators | |
| pred = gbm.predict_gbm(self.gbm, X_, **{'n.tree': ntree_limit}) | |
| pred = np.array(pred) | |
| return pred | |
| if __name__=='__main__': | |
| from sklearn.utils.testing import assert_less | |
| from sklearn import datasets | |
| from sklearn.metrics import mean_squared_error | |
| X, y = datasets.make_friedman1(n_samples=1200, random_state=0, noise=1.0) | |
| X_train, y_train = X[:200], y[:200] | |
| X_test, y_test = X[200:], y[200:] | |
| est = GBMRegressor(n_estimators=100, max_depth=4, min_samples_leaf=1, learning_rate=0.1, random_state=0) | |
| est.fit(X, y) | |
| mse = mean_squared_error(y_test, est.predict(X_test)) | |
| assert_less(mse, 5.0) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment