Skip to content

Instantly share code, notes, and snippets.

@KatsuyaITO
Created September 1, 2019 03:55
Show Gist options
  • Select an option

  • Save KatsuyaITO/dab0d94af31767907c58952dac1451b1 to your computer and use it in GitHub Desktop.

Select an option

Save KatsuyaITO/dab0d94af31767907c58952dac1451b1 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.model_selection import train_test_split\n",
"from sklearn import preprocessing\n",
"import numpy as np\n",
"import pandas as pd \n",
"import os\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"def reduce_mem_usage(df):\n",
" \"\"\" iterate through all the columns of a dataframe and modify the data type\n",
" to reduce memory usage. \n",
" \"\"\"\n",
" start_mem = df.memory_usage().sum() / 1024**2\n",
" print('Memory usage of dataframe is {:.2f} MB'.format(start_mem))\n",
" \n",
" for col in df.columns:\n",
" col_type = df[col].dtype\n",
" \n",
" if col_type != object:\n",
" c_min = df[col].min()\n",
" c_max = df[col].max()\n",
" if str(col_type)[:3] == 'int':\n",
" if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:\n",
" df[col] = df[col].astype(np.int8)\n",
" elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:\n",
" df[col] = df[col].astype(np.int16)\n",
" elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:\n",
" df[col] = df[col].astype(np.int32)\n",
" elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:\n",
" df[col] = df[col].astype(np.int64) \n",
" else:\n",
" if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:\n",
" df[col] = df[col].astype(np.float16)\n",
" elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:\n",
" df[col] = df[col].astype(np.float32)\n",
" else:\n",
" df[col] = df[col].astype(np.float64)\n",
" else:\n",
" df[col] = df[col].astype('category')\n",
"\n",
" end_mem = df.memory_usage().sum() / 1024**2\n",
" print('Memory usage after optimization is: {:.2f} MB'.format(end_mem))\n",
" print('Decreased by {:.1f}%'.format(100 * (start_mem - end_mem) / start_mem))\n",
" \n",
" return df"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"train_identity = pd.read_csv('./train_identity.csv', index_col='TransactionID')\n",
"#train_identity = train_identity.dropna(thresh=10)\n",
"train_transaction = pd.read_csv('./train_transaction.csv', index_col='TransactionID')\n",
"#train_transaction = train_transaction.dropna(thresh=10)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"test_identity = pd.read_csv('./test_identity.csv', index_col='TransactionID')\n",
"#test_identity = train_identity.dropna(thresh=10)\n",
"test_transaction = pd.read_csv('./test_transaction.csv', index_col='TransactionID')\n",
"#test_transaction = test_transaction.dropna(thresh=10)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"train_transaction_0 = train_transaction[train_transaction['isFraud'] == 0].sample(frac=0.1)\n",
"train_transaction_1 = train_transaction[train_transaction['isFraud'] == 1]\n",
"train_trans_reduced = pd.concat([train_transaction_0, train_transaction_1])\n",
"\n",
"train = train_trans_reduced.merge(train_identity,how='left', left_index=True, right_index=True)\n",
"train_y = train['isFraud']\n",
"\n",
"# TEST\n",
"test_x = test_transaction.merge(test_identity,how='left', left_index=True, right_index=True)\n"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"train = train.reset_index()\n",
"test_x = test_x.reset_index()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"rm_cols = ['TransactionID','TransactionDT','isFraud']\n",
"\n",
"# Drop unnecessary columns\n",
"for col in rm_cols:\n",
" train = train.drop(col, axis=1)\n",
" if col != \"isFraud\":\n",
" test_x = test_x.drop(col, axis=1)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"del train_transaction\n",
"del train_identity\n",
"\n",
"del test_transaction\n",
"del test_identity"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"p = 'P_emaildomain'\n",
"r = 'R_emaildomain'\n",
"uknown = 'email_not_provided'\n",
"\n",
"for df in [train, test_x]:\n",
" df[p] = df[p].fillna(uknown)\n",
" df[r] = df[r].fillna(uknown)\n",
" \n",
" df['email_check'] = np.where((df[p]==df[r])&(df[p]!=uknown),1,0)\n",
"\n",
" df[p+'_prefix'] = df[p].apply(lambda x: x.split('.')[0])\n",
" df[r+'_prefix'] = df[r].apply(lambda x: x.split('.')[0])"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"ProductCD\n",
"card4\n",
"card6\n",
"P_emaildomain\n",
"R_emaildomain\n",
"M1\n",
"M2\n",
"M3\n",
"M4\n",
"M5\n",
"M6\n",
"M7\n",
"M8\n",
"M9\n",
"id_12\n",
"id_15\n",
"id_16\n",
"id_23\n",
"id_27\n",
"id_28\n",
"id_29\n",
"id_30\n",
"id_31\n",
"id_33\n",
"id_34\n",
"id_35\n",
"id_36\n",
"id_37\n",
"id_38\n",
"DeviceType\n",
"DeviceInfo\n",
"P_emaildomain_prefix\n",
"R_emaildomain_prefix\n"
]
}
],
"source": [
"for col in list(train):\n",
" if train[col].dtype=='O' or train[col].dtype=='object':\n",
" print(col)\n",
" train[col] = train[col].fillna('unseen_before_label')\n",
" test_x[col] = test_x[col].fillna('unseen_before_label')\n",
" \n",
" le = preprocessing.LabelEncoder()\n",
" le.fit(list(train[col])+list(test_x[col]))\n",
" train[col] = le.transform(train[col])\n",
" test_x[col] = le.transform(test_x[col])\n",
" \n",
" train[col] = train[col]\n",
" test_x[col] = test_x[col]"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Memory usage of dataframe is 257.11 MB\n",
"Memory usage after optimization is: 65.83 MB\n",
"Decreased by 74.4%\n",
"Memory usage of dataframe is 1677.73 MB\n",
"Memory usage after optimization is: 455.67 MB\n",
"Decreased by 72.8%\n"
]
}
],
"source": [
"train_x = reduce_mem_usage(train)\n",
"test_x = reduce_mem_usage(test_x)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"train_x.to_pickle(\"./train_x.pkl\")\n",
"train_y.to_pickle(\"./train_y.pkl\")\n",
"test_x.to_pickle(\"./test_x.pkl\")"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"train_x=pd.read_pickle(\"./train_x.pkl\").values\n",
"train_y=pd.read_pickle(\"./train_y.pkl\").values\n",
"test_x=pd.read_pickle(\"./test_x.pkl\").values"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/sklearn/ensemble/weight_boosting.py:29: DeprecationWarning: numpy.core.umath_tests is an internal NumPy module and should not be imported. It will be removed in a future NumPy release.\n",
" from numpy.core.umath_tests import inner1d\n",
"/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n",
" Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n",
"/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n",
" Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n",
"/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n",
" Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n",
"/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n",
" Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n",
"/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n",
" Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[WARNING] [2019-09-01 03:22:42,814:EnsembleBuilder(4282876139):c5cb6f35b9d1dcf4fd39b3b2bf90d23a] No models better than random - using Dummy Score!\n",
"[WARNING] [2019-09-01 03:22:42,833:EnsembleBuilder(4282876139):c5cb6f35b9d1dcf4fd39b3b2bf90d23a] No models better than random - using Dummy Score!\n",
"[WARNING] [2019-09-01 03:22:44,839:EnsembleBuilder(4282876139):c5cb6f35b9d1dcf4fd39b3b2bf90d23a] No models better than random - using Dummy Score!\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n",
" Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n",
"/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n",
" Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n",
"/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n",
" Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n",
"/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n",
" Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[WARNING] [2019-09-01 03:22:46,863:EnsembleBuilder(4282876139):c5cb6f35b9d1dcf4fd39b3b2bf90d23a] No models better than random - using Dummy Score!\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n",
" Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n",
"/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n",
" Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[WARNING] [2019-09-01 03:22:48,881:EnsembleBuilder(4282876139):c5cb6f35b9d1dcf4fd39b3b2bf90d23a] No models better than random - using Dummy Score!\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n",
" Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n",
"/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n",
" Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n",
"/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n",
" Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[WARNING] [2019-09-01 03:22:50,898:EnsembleBuilder(4282876139):c5cb6f35b9d1dcf4fd39b3b2bf90d23a] No models better than random - using Dummy Score!\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n",
" Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[WARNING] [2019-09-01 03:22:52,906:EnsembleBuilder(4282876139):c5cb6f35b9d1dcf4fd39b3b2bf90d23a] No models better than random - using Dummy Score!\n",
"[WARNING] [2019-09-01 03:22:54,919:EnsembleBuilder(4282876139):c5cb6f35b9d1dcf4fd39b3b2bf90d23a] No models better than random - using Dummy Score!\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n",
" Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n",
"/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n",
" Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n",
"/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n",
" Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n",
"/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n",
" Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n",
"/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n",
" Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n",
"/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n",
" Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n",
"/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n",
" Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n",
"/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n",
" Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n",
"/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n",
" Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n",
"/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n",
" Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n",
"/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n",
" Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n",
"/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n",
" Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n",
"/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n",
" Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n",
"/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n",
" Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n",
"/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n",
" Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n",
"/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n",
" Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n",
"/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n",
" Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n",
"/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n",
" Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n",
"/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n",
" Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n",
"/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n",
" Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n",
"/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n",
" Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n",
"/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n",
" Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n",
"/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n",
" Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n",
"/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n",
" Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n",
"/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n",
" Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n",
"/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n",
" Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n",
"/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n",
" Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n",
"/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n",
" Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n",
"/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n",
" Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n",
"/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n",
" Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n",
"/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n",
" Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n",
"/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n",
" Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n",
"/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n",
" Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n",
"/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n",
" Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n",
"/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n",
" Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n",
"/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n",
" Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n",
"/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n",
" Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n",
"/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n",
" Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n",
"/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n",
" Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n",
"/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n",
" Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n",
"/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n",
" Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n",
"/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n",
" Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n",
"/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n",
" Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n",
"/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n",
" Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n",
"/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n",
" Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n",
"/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n",
" Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n",
"/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n",
" Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n",
"/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n",
" Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n",
"/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n",
" Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n",
"/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n",
" Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n",
"/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n",
" Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n",
"/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n",
" Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n",
"/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n",
" Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n",
"/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n",
" Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n",
"/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n",
" Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[WARNING] [2019-09-01 03:37:15,909:smac.intensification.intensification.Intensifier] Challenger was the same as the current incumbent; Skipping challenger\n",
"[WARNING] [2019-09-01 03:37:15,909:smac.intensification.intensification.Intensifier] Challenger was the same as the current incumbent; Skipping challenger\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n",
" Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n",
"/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n",
" Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n",
"/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n",
" Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n",
"/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n",
" Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n",
"/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n",
" Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n",
"/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n",
" Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n",
"/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n",
" Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n",
"/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n",
" Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n",
"/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n",
" Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n",
"/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n",
" Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n",
"/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n",
" Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n",
"/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n",
" Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n",
"/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n",
" Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n",
"/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n",
" Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n",
"/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n",
" Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n",
"/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n",
" Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n",
"/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n",
" Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n",
"/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n",
" Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n",
"/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n",
" Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"-1\n",
"['/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000000.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000001.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000002.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000003.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000004.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000005.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000006.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000007.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000008.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000009.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000010.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000011.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000012.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000013.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000014.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000015.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000016.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000017.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000018.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000019.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000020.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000021.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000022.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000023.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000024.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000025.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000026.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000027.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000028.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000029.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000030.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000031.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000032.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000033.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000034.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000035.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000036.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000037.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000038.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000039.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000040.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000041.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000042.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000043.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000044.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000045.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000046.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000047.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000048.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000049.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000050.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000051.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000052.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000053.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000054.ensemble']\n"
]
},
{
"data": {
"text/plain": [
"AutoSklearnClassifier(delete_output_folder_after_terminate=False,\n",
" delete_tmp_folder_after_terminate=False,\n",
" disable_evaluator_output=False, ensemble_memory_limit=24576,\n",
" ensemble_nbest=50, ensemble_size=50, exclude_estimators=None,\n",
" exclude_preprocessors=None, get_smac_object_callback=None,\n",
" include_estimators=None, include_preprocessors=None,\n",
" initial_configurations_via_metalearning=25, logging_config=None,\n",
" metadata_directory=None, ml_memory_limit=24576, n_jobs=16,\n",
" output_folder=None, per_run_time_limit=360,\n",
" resampling_strategy='holdout',\n",
" resampling_strategy_arguments=None, seed=1, shared_mode=False,\n",
" smac_scenario_args=None, time_left_for_this_task=1200,\n",
" tmp_folder=None)"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import autosklearn.classification\n",
"\n",
"cls = autosklearn.classification.AutoSklearnClassifier(\n",
" time_left_for_this_task=1200,\n",
" n_jobs=16,\n",
" ml_memory_limit=24*1024,\n",
" ensemble_memory_limit=24*1024,\n",
")\n",
"cls.fit(train_x, train_y)\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"auto-sklearn results:\n",
" Dataset name: c5cb6f35b9d1dcf4fd39b3b2bf90d23a\n",
" Metric: accuracy\n",
" Best validation score: 0.890771\n",
" Number of target algorithm runs: 127\n",
" Number of successful target algorithm runs: 73\n",
" Number of crashed target algorithm runs: 12\n",
" Number of target algorithms that exceeded the time limit: 28\n",
" Number of target algorithms that exceeded the memory limit: 14\n",
"\n"
]
}
],
"source": [
"print(cls.sprint_statistics())"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[(0.400000, SimpleClassificationPipeline({'balancing:strategy': 'none', 'categorical_encoding:__choice__': 'no_encoding', 'classifier:__choice__': 'adaboost', 'imputation:strategy': 'median', 'preprocessor:__choice__': 'random_trees_embedding', 'rescaling:__choice__': 'minmax', 'classifier:adaboost:algorithm': 'SAMME', 'classifier:adaboost:learning_rate': 0.2718026025501933, 'classifier:adaboost:max_depth': 1, 'classifier:adaboost:n_estimators': 484, 'preprocessor:random_trees_embedding:bootstrap': 'False', 'preprocessor:random_trees_embedding:max_depth': 4, 'preprocessor:random_trees_embedding:max_leaf_nodes': 'None', 'preprocessor:random_trees_embedding:min_samples_leaf': 11, 'preprocessor:random_trees_embedding:min_samples_split': 17, 'preprocessor:random_trees_embedding:min_weight_fraction_leaf': 1.0, 'preprocessor:random_trees_embedding:n_estimators': 79},\n",
"dataset_properties={\n",
" 'task': 1,\n",
" 'sparse': False,\n",
" 'multilabel': False,\n",
" 'multiclass': False,\n",
" 'target_type': 'classification',\n",
" 'signed': False})),\n",
"(0.140000, SimpleClassificationPipeline({'balancing:strategy': 'none', 'categorical_encoding:__choice__': 'one_hot_encoding', 'classifier:__choice__': 'random_forest', 'imputation:strategy': 'mean', 'preprocessor:__choice__': 'no_preprocessing', 'rescaling:__choice__': 'standardize', 'categorical_encoding:one_hot_encoding:use_minimum_fraction': 'True', 'classifier:random_forest:bootstrap': 'True', 'classifier:random_forest:criterion': 'gini', 'classifier:random_forest:max_depth': 'None', 'classifier:random_forest:max_features': 0.5, 'classifier:random_forest:max_leaf_nodes': 'None', 'classifier:random_forest:min_impurity_decrease': 0.0, 'classifier:random_forest:min_samples_leaf': 1, 'classifier:random_forest:min_samples_split': 2, 'classifier:random_forest:min_weight_fraction_leaf': 0.0, 'classifier:random_forest:n_estimators': 100, 'categorical_encoding:one_hot_encoding:minimum_fraction': 0.01},\n",
"dataset_properties={\n",
" 'task': 1,\n",
" 'sparse': False,\n",
" 'multilabel': False,\n",
" 'multiclass': False,\n",
" 'target_type': 'classification',\n",
" 'signed': False})),\n",
"(0.080000, SimpleClassificationPipeline({'balancing:strategy': 'weighting', 'categorical_encoding:__choice__': 'one_hot_encoding', 'classifier:__choice__': 'random_forest', 'imputation:strategy': 'mean', 'preprocessor:__choice__': 'liblinear_svc_preprocessor', 'rescaling:__choice__': 'standardize', 'categorical_encoding:one_hot_encoding:use_minimum_fraction': 'True', 'classifier:random_forest:bootstrap': 'False', 'classifier:random_forest:criterion': 'gini', 'classifier:random_forest:max_depth': 'None', 'classifier:random_forest:max_features': 0.7983157215145903, 'classifier:random_forest:max_leaf_nodes': 'None', 'classifier:random_forest:min_impurity_decrease': 0.0, 'classifier:random_forest:min_samples_leaf': 4, 'classifier:random_forest:min_samples_split': 15, 'classifier:random_forest:min_weight_fraction_leaf': 0.0, 'classifier:random_forest:n_estimators': 100, 'preprocessor:liblinear_svc_preprocessor:C': 0.4971515945303584, 'preprocessor:liblinear_svc_preprocessor:dual': 'False', 'preprocessor:liblinear_svc_preprocessor:fit_intercept': 'True', 'preprocessor:liblinear_svc_preprocessor:intercept_scaling': 1, 'preprocessor:liblinear_svc_preprocessor:loss': 'squared_hinge', 'preprocessor:liblinear_svc_preprocessor:multi_class': 'ovr', 'preprocessor:liblinear_svc_preprocessor:penalty': 'l1', 'preprocessor:liblinear_svc_preprocessor:tol': 0.00010268311046018636, 'categorical_encoding:one_hot_encoding:minimum_fraction': 0.001856820833094005},\n",
"dataset_properties={\n",
" 'task': 1,\n",
" 'sparse': False,\n",
" 'multilabel': False,\n",
" 'multiclass': False,\n",
" 'target_type': 'classification',\n",
" 'signed': False})),\n",
"(0.080000, SimpleClassificationPipeline({'balancing:strategy': 'weighting', 'categorical_encoding:__choice__': 'no_encoding', 'classifier:__choice__': 'k_nearest_neighbors', 'imputation:strategy': 'mean', 'preprocessor:__choice__': 'pca', 'rescaling:__choice__': 'minmax', 'classifier:k_nearest_neighbors:n_neighbors': 1, 'classifier:k_nearest_neighbors:p': 1, 'classifier:k_nearest_neighbors:weights': 'uniform', 'preprocessor:pca:keep_variance': 0.7623284783701136, 'preprocessor:pca:whiten': 'False'},\n",
"dataset_properties={\n",
" 'task': 1,\n",
" 'sparse': False,\n",
" 'multilabel': False,\n",
" 'multiclass': False,\n",
" 'target_type': 'classification',\n",
" 'signed': False})),\n",
"(0.060000, SimpleClassificationPipeline({'balancing:strategy': 'none', 'categorical_encoding:__choice__': 'no_encoding', 'classifier:__choice__': 'random_forest', 'imputation:strategy': 'most_frequent', 'preprocessor:__choice__': 'extra_trees_preproc_for_classification', 'rescaling:__choice__': 'quantile_transformer', 'classifier:random_forest:bootstrap': 'False', 'classifier:random_forest:criterion': 'gini', 'classifier:random_forest:max_depth': 'None', 'classifier:random_forest:max_features': 0.8525082104325516, 'classifier:random_forest:max_leaf_nodes': 'None', 'classifier:random_forest:min_impurity_decrease': 0.0, 'classifier:random_forest:min_samples_leaf': 14, 'classifier:random_forest:min_samples_split': 7, 'classifier:random_forest:min_weight_fraction_leaf': 0.0, 'classifier:random_forest:n_estimators': 100, 'preprocessor:extra_trees_preproc_for_classification:bootstrap': 'True', 'preprocessor:extra_trees_preproc_for_classification:criterion': 'gini', 'preprocessor:extra_trees_preproc_for_classification:max_depth': 'None', 'preprocessor:extra_trees_preproc_for_classification:max_features': 0.7397951606097709, 'preprocessor:extra_trees_preproc_for_classification:max_leaf_nodes': 'None', 'preprocessor:extra_trees_preproc_for_classification:min_impurity_decrease': 0.0, 'preprocessor:extra_trees_preproc_for_classification:min_samples_leaf': 17, 'preprocessor:extra_trees_preproc_for_classification:min_samples_split': 19, 'preprocessor:extra_trees_preproc_for_classification:min_weight_fraction_leaf': 0.0, 'preprocessor:extra_trees_preproc_for_classification:n_estimators': 100, 'rescaling:quantile_transformer:n_quantiles': 642, 'rescaling:quantile_transformer:output_distribution': 'normal'},\n",
"dataset_properties={\n",
" 'task': 1,\n",
" 'sparse': False,\n",
" 'multilabel': False,\n",
" 'multiclass': False,\n",
" 'target_type': 'classification',\n",
" 'signed': False})),\n",
"(0.060000, SimpleClassificationPipeline({'balancing:strategy': 'weighting', 'categorical_encoding:__choice__': 'no_encoding', 'classifier:__choice__': 'extra_trees', 'imputation:strategy': 'median', 'preprocessor:__choice__': 'feature_agglomeration', 'rescaling:__choice__': 'quantile_transformer', 'classifier:extra_trees:bootstrap': 'False', 'classifier:extra_trees:criterion': 'gini', 'classifier:extra_trees:max_depth': 'None', 'classifier:extra_trees:max_features': 0.7961585059091191, 'classifier:extra_trees:max_leaf_nodes': 'None', 'classifier:extra_trees:min_impurity_decrease': 0.0, 'classifier:extra_trees:min_samples_leaf': 3, 'classifier:extra_trees:min_samples_split': 19, 'classifier:extra_trees:min_weight_fraction_leaf': 0.0, 'classifier:extra_trees:n_estimators': 100, 'preprocessor:feature_agglomeration:affinity': 'euclidean', 'preprocessor:feature_agglomeration:linkage': 'average', 'preprocessor:feature_agglomeration:n_clusters': 304, 'preprocessor:feature_agglomeration:pooling_func': 'max', 'rescaling:quantile_transformer:n_quantiles': 1529, 'rescaling:quantile_transformer:output_distribution': 'normal'},\n",
"dataset_properties={\n",
" 'task': 1,\n",
" 'sparse': False,\n",
" 'multilabel': False,\n",
" 'multiclass': False,\n",
" 'target_type': 'classification',\n",
" 'signed': False})),\n",
"(0.060000, SimpleClassificationPipeline({'balancing:strategy': 'weighting', 'categorical_encoding:__choice__': 'no_encoding', 'classifier:__choice__': 'random_forest', 'imputation:strategy': 'most_frequent', 'preprocessor:__choice__': 'feature_agglomeration', 'rescaling:__choice__': 'quantile_transformer', 'classifier:random_forest:bootstrap': 'True', 'classifier:random_forest:criterion': 'gini', 'classifier:random_forest:max_depth': 'None', 'classifier:random_forest:max_features': 0.9017016635679949, 'classifier:random_forest:max_leaf_nodes': 'None', 'classifier:random_forest:min_impurity_decrease': 0.0, 'classifier:random_forest:min_samples_leaf': 10, 'classifier:random_forest:min_samples_split': 19, 'classifier:random_forest:min_weight_fraction_leaf': 0.0, 'classifier:random_forest:n_estimators': 100, 'preprocessor:feature_agglomeration:affinity': 'manhattan', 'preprocessor:feature_agglomeration:linkage': 'complete', 'preprocessor:feature_agglomeration:n_clusters': 350, 'preprocessor:feature_agglomeration:pooling_func': 'median', 'rescaling:quantile_transformer:n_quantiles': 1453, 'rescaling:quantile_transformer:output_distribution': 'normal'},\n",
"dataset_properties={\n",
" 'task': 1,\n",
" 'sparse': False,\n",
" 'multilabel': False,\n",
" 'multiclass': False,\n",
" 'target_type': 'classification',\n",
" 'signed': False})),\n",
"(0.040000, SimpleClassificationPipeline({'balancing:strategy': 'none', 'categorical_encoding:__choice__': 'no_encoding', 'classifier:__choice__': 'decision_tree', 'imputation:strategy': 'most_frequent', 'preprocessor:__choice__': 'feature_agglomeration', 'rescaling:__choice__': 'quantile_transformer', 'classifier:decision_tree:criterion': 'entropy', 'classifier:decision_tree:max_depth_factor': 1.5392086826574303, 'classifier:decision_tree:max_features': 1.0, 'classifier:decision_tree:max_leaf_nodes': 'None', 'classifier:decision_tree:min_impurity_decrease': 0.0, 'classifier:decision_tree:min_samples_leaf': 4, 'classifier:decision_tree:min_samples_split': 2, 'classifier:decision_tree:min_weight_fraction_leaf': 0.0, 'preprocessor:feature_agglomeration:affinity': 'euclidean', 'preprocessor:feature_agglomeration:linkage': 'ward', 'preprocessor:feature_agglomeration:n_clusters': 333, 'preprocessor:feature_agglomeration:pooling_func': 'mean', 'rescaling:quantile_transformer:n_quantiles': 1873, 'rescaling:quantile_transformer:output_distribution': 'uniform'},\n",
"dataset_properties={\n",
" 'task': 1,\n",
" 'sparse': False,\n",
" 'multilabel': False,\n",
" 'multiclass': False,\n",
" 'target_type': 'classification',\n",
" 'signed': False})),\n",
"(0.020000, SimpleClassificationPipeline({'balancing:strategy': 'none', 'categorical_encoding:__choice__': 'no_encoding', 'classifier:__choice__': 'random_forest', 'imputation:strategy': 'mean', 'preprocessor:__choice__': 'feature_agglomeration', 'rescaling:__choice__': 'quantile_transformer', 'classifier:random_forest:bootstrap': 'True', 'classifier:random_forest:criterion': 'gini', 'classifier:random_forest:max_depth': 'None', 'classifier:random_forest:max_features': 0.8753505367267883, 'classifier:random_forest:max_leaf_nodes': 'None', 'classifier:random_forest:min_impurity_decrease': 0.0, 'classifier:random_forest:min_samples_leaf': 13, 'classifier:random_forest:min_samples_split': 18, 'classifier:random_forest:min_weight_fraction_leaf': 0.0, 'classifier:random_forest:n_estimators': 100, 'preprocessor:feature_agglomeration:affinity': 'cosine', 'preprocessor:feature_agglomeration:linkage': 'complete', 'preprocessor:feature_agglomeration:n_clusters': 385, 'preprocessor:feature_agglomeration:pooling_func': 'median', 'rescaling:quantile_transformer:n_quantiles': 1713, 'rescaling:quantile_transformer:output_distribution': 'normal'},\n",
"dataset_properties={\n",
" 'task': 1,\n",
" 'sparse': False,\n",
" 'multilabel': False,\n",
" 'multiclass': False,\n",
" 'target_type': 'classification',\n",
" 'signed': False})),\n",
"(0.020000, SimpleClassificationPipeline({'balancing:strategy': 'none', 'categorical_encoding:__choice__': 'one_hot_encoding', 'classifier:__choice__': 'decision_tree', 'imputation:strategy': 'median', 'preprocessor:__choice__': 'feature_agglomeration', 'rescaling:__choice__': 'quantile_transformer', 'categorical_encoding:one_hot_encoding:use_minimum_fraction': 'True', 'classifier:decision_tree:criterion': 'entropy', 'classifier:decision_tree:max_depth_factor': 1.8933795609650959, 'classifier:decision_tree:max_features': 1.0, 'classifier:decision_tree:max_leaf_nodes': 'None', 'classifier:decision_tree:min_impurity_decrease': 0.0, 'classifier:decision_tree:min_samples_leaf': 1, 'classifier:decision_tree:min_samples_split': 16, 'classifier:decision_tree:min_weight_fraction_leaf': 0.0, 'preprocessor:feature_agglomeration:affinity': 'euclidean', 'preprocessor:feature_agglomeration:linkage': 'ward', 'preprocessor:feature_agglomeration:n_clusters': 113, 'preprocessor:feature_agglomeration:pooling_func': 'median', 'rescaling:quantile_transformer:n_quantiles': 1913, 'rescaling:quantile_transformer:output_distribution': 'uniform', 'categorical_encoding:one_hot_encoding:minimum_fraction': 0.00014651481404583772},\n",
"dataset_properties={\n",
" 'task': 1,\n",
" 'sparse': False,\n",
" 'multilabel': False,\n",
" 'multiclass': False,\n",
" 'target_type': 'classification',\n",
" 'signed': False})),\n",
"(0.020000, SimpleClassificationPipeline({'balancing:strategy': 'none', 'categorical_encoding:__choice__': 'one_hot_encoding', 'classifier:__choice__': 'decision_tree', 'imputation:strategy': 'most_frequent', 'preprocessor:__choice__': 'extra_trees_preproc_for_classification', 'rescaling:__choice__': 'minmax', 'categorical_encoding:one_hot_encoding:use_minimum_fraction': 'False', 'classifier:decision_tree:criterion': 'gini', 'classifier:decision_tree:max_depth_factor': 1.3402306722746462, 'classifier:decision_tree:max_features': 1.0, 'classifier:decision_tree:max_leaf_nodes': 'None', 'classifier:decision_tree:min_impurity_decrease': 0.0, 'classifier:decision_tree:min_samples_leaf': 16, 'classifier:decision_tree:min_samples_split': 9, 'classifier:decision_tree:min_weight_fraction_leaf': 0.0, 'preprocessor:extra_trees_preproc_for_classification:bootstrap': 'False', 'preprocessor:extra_trees_preproc_for_classification:criterion': 'gini', 'preprocessor:extra_trees_preproc_for_classification:max_depth': 'None', 'preprocessor:extra_trees_preproc_for_classification:max_features': 0.3494934758427515, 'preprocessor:extra_trees_preproc_for_classification:max_leaf_nodes': 'None', 'preprocessor:extra_trees_preproc_for_classification:min_impurity_decrease': 0.0, 'preprocessor:extra_trees_preproc_for_classification:min_samples_leaf': 18, 'preprocessor:extra_trees_preproc_for_classification:min_samples_split': 17, 'preprocessor:extra_trees_preproc_for_classification:min_weight_fraction_leaf': 0.0, 'preprocessor:extra_trees_preproc_for_classification:n_estimators': 100},\n",
"dataset_properties={\n",
" 'task': 1,\n",
" 'sparse': False,\n",
" 'multilabel': False,\n",
" 'multiclass': False,\n",
" 'target_type': 'classification',\n",
" 'signed': False})),\n",
"(0.020000, SimpleClassificationPipeline({'balancing:strategy': 'none', 'categorical_encoding:__choice__': 'no_encoding', 'classifier:__choice__': 'lda', 'imputation:strategy': 'median', 'preprocessor:__choice__': 'select_percentile_classification', 'rescaling:__choice__': 'quantile_transformer', 'classifier:lda:n_components': 149, 'classifier:lda:shrinkage': 'auto', 'classifier:lda:tol': 0.06031486481933271, 'preprocessor:select_percentile_classification:percentile': 50.0, 'preprocessor:select_percentile_classification:score_func': 'chi2', 'rescaling:quantile_transformer:n_quantiles': 1765, 'rescaling:quantile_transformer:output_distribution': 'normal'},\n",
"dataset_properties={\n",
" 'task': 1,\n",
" 'sparse': False,\n",
" 'multilabel': False,\n",
" 'multiclass': False,\n",
" 'target_type': 'classification',\n",
" 'signed': False})),\n",
"]\n"
]
}
],
"source": [
"print(cls.show_models())"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"predictions = cls.predict_proba(test_x)[:, 1]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"submit = pd.read_csv('./sample_submission.csv')\n",
"submit['isFraud'] = predictions\n",
"submit"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"submit.to_csv('submission1.csv', index=False)\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment