Skip to content

Instantly share code, notes, and snippets.

@neilsummers
Last active January 18, 2016 02:18
Show Gist options
  • Select an option

  • Save neilsummers/446b433e24ddf3466c0a to your computer and use it in GitHub Desktop.

Select an option

Save neilsummers/446b433e24ddf3466c0a to your computer and use it in GitHub Desktop.
GBM sklearn wrapper
import numpy as np
from sklearn.base import BaseEstimator, RegressorMixin
import rpy2
from rpy2.robjects.numpy2ri import numpy2ri
from rpy2.robjects.packages import importr
gbm = importr('gbm')
class GBMRegressor(BaseEstimator, RegressorMixin):
def __init__(self, learning_rate=0.1, n_estimators=100, max_features=1.0, verbose=False,
min_samples_leaf=1, max_depth=3, subsample=1.0, random_state=None):
self.learning_rate = learning_rate
self.n_estimators = n_estimators
self.max_features = max_features
self.verbose = verbose
self.min_samples_leaf = min_samples_leaf
self.max_depth = max_depth
self.subsample = subsample
self.random_state = random_state
def get_params(self, deep=True):
return {'learning_rate': self.learning_rate,
'n_estimators': self.n_estimators,
'max_features': self.max_features,
'verbose': self.verbose,
'min_samples_leaf': self.min_samples_leaf,
'max_depth': self.max_depth,
'subsample': self.subsample,
'random_state': self.random_state
}
def get_gbm_params(self):
d = {'distribution': 'gaussian',
'shrinkage': self.learning_rate,
'n.tree': self.n_estimators,
'bag.fraction': self.max_features,
'verbose': self.verbose,
'n.minobsinnode': self.min_samples_leaf,
'train.fraction': self.subsample,
'interaction.depth': self.max_depth,
}
if self.subsample==1.0: del d['train.fraction']
return d
def fit(self, X, y):
X_ = numpy2ri(X)
y_ = numpy2ri(y)
if self.random_state is not None:
rpy2.robjects.r('set.seed(%i)'%(self.random_state))
self.gbm = gbm.gbm_fit(X_, y_, **self.get_gbm_params())
return self
def predict(self, X, ntree_limit=None):
X_ = numpy2ri(X)
if not ntree_limit:
ntree_limit = self.n_estimators
pred = gbm.predict_gbm(self.gbm, X_, **{'n.tree': ntree_limit})
pred = np.array(pred)
return pred
if __name__=='__main__':
from sklearn.utils.testing import assert_less
from sklearn import datasets
from sklearn.metrics import mean_squared_error
X, y = datasets.make_friedman1(n_samples=1200, random_state=0, noise=1.0)
X_train, y_train = X[:200], y[:200]
X_test, y_test = X[200:], y[200:]
est = GBMRegressor(n_estimators=100, max_depth=4, min_samples_leaf=1, learning_rate=0.1, random_state=0)
est.fit(X, y)
mse = mean_squared_error(y_test, est.predict(X_test))
assert_less(mse, 5.0)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment