Focusing

Shikhar Gupta Shikhargupta

Focusing

Shikhargupta / ensemble.py

Last active May 12, 2020 14:37

	#### Gradient Boosting Regressor (0.1134)####
	gbregressor = GradientBoostingRegressor(learning_rate=0.1, n_estimators=180)
	gbregressor.fit(X_train, np.log(y_train))
	y_pred = np.exp(gbregressor.predict(X_test))
	all_pred = np.concatenate((all_pred, 0.05*y_pred))

	#### Lasso LarsIC (0.119)####
	lassolars = LassoLarsIC(criterion='aic')
	lassolars.fit(X_train, np.log(y_train))
	y_pred = np.exp(lassolars.predict(X_test))

Shikhargupta / param_sweep.py

Created May 12, 2020 14:20

	###################### Parameter sweeping ##########################
	val_list = []
	score_list = []
	for x in np.arange(10,300,10):
	model = Ridge(alpha=x)
	model.fit(X_train,np.log(y_train))
	y_pred = np.exp(model.predict(X_test))
	score_list.append(np.sqrt(metrics.mean_squared_error(np.log(y_test), np.log(y_pred))))
	val_list.append(x)

Shikhargupta / train.py

Created May 12, 2020 13:54

	X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size=0.2, random_state=0)
	xg_reg = xgb.XGBRegressor(base_score=0.5, colsample_bylevel=1, colsample_bytree=0.4,
	gamma=0, learning_rate=0.07, max_delta_step=0, max_depth=3,
	min_child_weight=1.5, missing=None, n_estimators=400, nthread=-1,
	objective='reg:linear', reg_alpha=0.75, reg_lambda=0.45,
	scale_pos_weight=1, seed=42, silent=True, subsample=0.6)
	xg_reg.fit(X_train,np.log(y_train))
	y_pred = np.exp(xg_reg.predict(X_test))
	print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(np.log(y_test), np.log(y_pred))))

Shikhargupta / feat_imp.py

Created May 12, 2020 13:30

	xg_reg = xgb.XGBRegressor(base_score=0.5, colsample_bylevel=1, colsample_bytree=0.4,
	gamma=0, learning_rate=0.07, max_delta_step=0, max_depth=3,
	min_child_weight=1.5, missing=None, n_estimators=400, nthread=-1,
	objective='reg:linear', reg_alpha=0.75, reg_lambda=0.45,
	scale_pos_weight=1, seed=42, silent=True, subsample=0.6)
	xg_reg.fit(X_train,np.log(y_train))
	x_ax = np.arange(len(xg_reg.feature_importances_))
	plt.figure(figsize=(90, 30))
	sns.barplot(x=x_ax, y=xg_reg.feature_importances_)

Shikhargupta / poly.py

Created May 11, 2020 19:06

Shikhargupta / nan.py

Created May 11, 2020 18:12

handling_nan

	#For BsmtFinSF1 we observed the only datapoint for which the value was missing had BsmtFinType1 as NA i.e
	#there is no basement. So we can fill the SF as 0
	df_cum['BsmtFinSF1'] = df_cum['BsmtFinSF1'].fillna(0)
	#Same goes for BsmtFinSF2
	df_cum['BsmtFinSF2'] = df_cum['BsmtFinSF2'].fillna(0)
	#Same data point is valid for BsmtUnfSF and TotalBsmtSF. We can fill them all with zeros.
	df_cum['BsmtUnfSF'] = df_cum['BsmtUnfSF'].fillna(0)
	df_cum['TotalBsmtSF'] = df_cum['TotalBsmtSF'].fillna(0)