Skip to content

Instantly share code, notes, and snippets.

View Shikhargupta's full-sized avatar
:octocat:
Focusing

Shikhar Gupta Shikhargupta

:octocat:
Focusing
View GitHub Profile
#### Gradient Boosting Regressor (0.1134)####
gbregressor = GradientBoostingRegressor(learning_rate=0.1, n_estimators=180)
gbregressor.fit(X_train, np.log(y_train))
y_pred = np.exp(gbregressor.predict(X_test))
all_pred = np.concatenate((all_pred, 0.05*y_pred))
#### Lasso LarsIC (0.119)####
lassolars = LassoLarsIC(criterion='aic')
lassolars.fit(X_train, np.log(y_train))
y_pred = np.exp(lassolars.predict(X_test))
###################### Parameter sweeping ##########################
val_list = []
score_list = []
for x in np.arange(10,300,10):
model = Ridge(alpha=x)
model.fit(X_train,np.log(y_train))
y_pred = np.exp(model.predict(X_test))
score_list.append(np.sqrt(metrics.mean_squared_error(np.log(y_test), np.log(y_pred))))
val_list.append(x)
X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size=0.2, random_state=0)
xg_reg = xgb.XGBRegressor(base_score=0.5, colsample_bylevel=1, colsample_bytree=0.4,
gamma=0, learning_rate=0.07, max_delta_step=0, max_depth=3,
min_child_weight=1.5, missing=None, n_estimators=400, nthread=-1,
objective='reg:linear', reg_alpha=0.75, reg_lambda=0.45,
scale_pos_weight=1, seed=42, silent=True, subsample=0.6)
xg_reg.fit(X_train,np.log(y_train))
y_pred = np.exp(xg_reg.predict(X_test))
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(np.log(y_test), np.log(y_pred))))
xg_reg = xgb.XGBRegressor(base_score=0.5, colsample_bylevel=1, colsample_bytree=0.4,
gamma=0, learning_rate=0.07, max_delta_step=0, max_depth=3,
min_child_weight=1.5, missing=None, n_estimators=400, nthread=-1,
objective='reg:linear', reg_alpha=0.75, reg_lambda=0.45,
scale_pos_weight=1, seed=42, silent=True, subsample=0.6)
xg_reg.fit(X_train,np.log(y_train))
x_ax = np.arange(len(xg_reg.feature_importances_))
plt.figure(figsize=(90, 30))
sns.barplot(x=x_ax, y=xg_reg.feature_importances_)
#Square
for col in poly_cols:
df_cum[col + '_square'] = df_cum[col]**2
#Cube
for col in poly_cols:
df_cum[col + '_cube'] = df_cum[col]**3
#Square root
for col in poly_cols:
@Shikhargupta
Shikhargupta / nan.py
Created May 11, 2020 18:12
handling_nan
#For BsmtFinSF1 we observed the only datapoint for which the value was missing had BsmtFinType1 as NA i.e
#there is no basement. So we can fill the SF as 0
df_cum['BsmtFinSF1'] = df_cum['BsmtFinSF1'].fillna(0)
#Same goes for BsmtFinSF2
df_cum['BsmtFinSF2'] = df_cum['BsmtFinSF2'].fillna(0)
#Same data point is valid for BsmtUnfSF and TotalBsmtSF. We can fill them all with zeros.
df_cum['BsmtUnfSF'] = df_cum['BsmtUnfSF'].fillna(0)
df_cum['TotalBsmtSF'] = df_cum['TotalBsmtSF'].fillna(0)