Created
September 1, 2019 03:55
-
-
Save KatsuyaITO/dab0d94af31767907c58952dac1451b1 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "code", | |
| "execution_count": 1, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "from sklearn.model_selection import train_test_split\n", | |
| "from sklearn import preprocessing\n", | |
| "import numpy as np\n", | |
| "import pandas as pd \n", | |
| "import os\n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 2, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "def reduce_mem_usage(df):\n", | |
| " \"\"\" iterate through all the columns of a dataframe and modify the data type\n", | |
| " to reduce memory usage. \n", | |
| " \"\"\"\n", | |
| " start_mem = df.memory_usage().sum() / 1024**2\n", | |
| " print('Memory usage of dataframe is {:.2f} MB'.format(start_mem))\n", | |
| " \n", | |
| " for col in df.columns:\n", | |
| " col_type = df[col].dtype\n", | |
| " \n", | |
| " if col_type != object:\n", | |
| " c_min = df[col].min()\n", | |
| " c_max = df[col].max()\n", | |
| " if str(col_type)[:3] == 'int':\n", | |
| " if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:\n", | |
| " df[col] = df[col].astype(np.int8)\n", | |
| " elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:\n", | |
| " df[col] = df[col].astype(np.int16)\n", | |
| " elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:\n", | |
| " df[col] = df[col].astype(np.int32)\n", | |
| " elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:\n", | |
| " df[col] = df[col].astype(np.int64) \n", | |
| " else:\n", | |
| " if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:\n", | |
| " df[col] = df[col].astype(np.float16)\n", | |
| " elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:\n", | |
| " df[col] = df[col].astype(np.float32)\n", | |
| " else:\n", | |
| " df[col] = df[col].astype(np.float64)\n", | |
| " else:\n", | |
| " df[col] = df[col].astype('category')\n", | |
| "\n", | |
| " end_mem = df.memory_usage().sum() / 1024**2\n", | |
| " print('Memory usage after optimization is: {:.2f} MB'.format(end_mem))\n", | |
| " print('Decreased by {:.1f}%'.format(100 * (start_mem - end_mem) / start_mem))\n", | |
| " \n", | |
| " return df" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 3, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "train_identity = pd.read_csv('./train_identity.csv', index_col='TransactionID')\n", | |
| "#train_identity = train_identity.dropna(thresh=10)\n", | |
| "train_transaction = pd.read_csv('./train_transaction.csv', index_col='TransactionID')\n", | |
| "#train_transaction = train_transaction.dropna(thresh=10)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 4, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "test_identity = pd.read_csv('./test_identity.csv', index_col='TransactionID')\n", | |
| "#test_identity = train_identity.dropna(thresh=10)\n", | |
| "test_transaction = pd.read_csv('./test_transaction.csv', index_col='TransactionID')\n", | |
| "#test_transaction = test_transaction.dropna(thresh=10)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 5, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "train_transaction_0 = train_transaction[train_transaction['isFraud'] == 0].sample(frac=0.1)\n", | |
| "train_transaction_1 = train_transaction[train_transaction['isFraud'] == 1]\n", | |
| "train_trans_reduced = pd.concat([train_transaction_0, train_transaction_1])\n", | |
| "\n", | |
| "train = train_trans_reduced.merge(train_identity,how='left', left_index=True, right_index=True)\n", | |
| "train_y = train['isFraud']\n", | |
| "\n", | |
| "# TEST\n", | |
| "test_x = test_transaction.merge(test_identity,how='left', left_index=True, right_index=True)\n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 6, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "train = train.reset_index()\n", | |
| "test_x = test_x.reset_index()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 7, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "rm_cols = ['TransactionID','TransactionDT','isFraud']\n", | |
| "\n", | |
| "# Drop unnecessary columns\n", | |
| "for col in rm_cols:\n", | |
| " train = train.drop(col, axis=1)\n", | |
| " if col != \"isFraud\":\n", | |
| " test_x = test_x.drop(col, axis=1)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 8, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "del train_transaction\n", | |
| "del train_identity\n", | |
| "\n", | |
| "del test_transaction\n", | |
| "del test_identity" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 9, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "p = 'P_emaildomain'\n", | |
| "r = 'R_emaildomain'\n", | |
| "uknown = 'email_not_provided'\n", | |
| "\n", | |
| "for df in [train, test_x]:\n", | |
| " df[p] = df[p].fillna(uknown)\n", | |
| " df[r] = df[r].fillna(uknown)\n", | |
| " \n", | |
| " df['email_check'] = np.where((df[p]==df[r])&(df[p]!=uknown),1,0)\n", | |
| "\n", | |
| " df[p+'_prefix'] = df[p].apply(lambda x: x.split('.')[0])\n", | |
| " df[r+'_prefix'] = df[r].apply(lambda x: x.split('.')[0])" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 10, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "ProductCD\n", | |
| "card4\n", | |
| "card6\n", | |
| "P_emaildomain\n", | |
| "R_emaildomain\n", | |
| "M1\n", | |
| "M2\n", | |
| "M3\n", | |
| "M4\n", | |
| "M5\n", | |
| "M6\n", | |
| "M7\n", | |
| "M8\n", | |
| "M9\n", | |
| "id_12\n", | |
| "id_15\n", | |
| "id_16\n", | |
| "id_23\n", | |
| "id_27\n", | |
| "id_28\n", | |
| "id_29\n", | |
| "id_30\n", | |
| "id_31\n", | |
| "id_33\n", | |
| "id_34\n", | |
| "id_35\n", | |
| "id_36\n", | |
| "id_37\n", | |
| "id_38\n", | |
| "DeviceType\n", | |
| "DeviceInfo\n", | |
| "P_emaildomain_prefix\n", | |
| "R_emaildomain_prefix\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "for col in list(train):\n", | |
| " if train[col].dtype=='O' or train[col].dtype=='object':\n", | |
| " print(col)\n", | |
| " train[col] = train[col].fillna('unseen_before_label')\n", | |
| " test_x[col] = test_x[col].fillna('unseen_before_label')\n", | |
| " \n", | |
| " le = preprocessing.LabelEncoder()\n", | |
| " le.fit(list(train[col])+list(test_x[col]))\n", | |
| " train[col] = le.transform(train[col])\n", | |
| " test_x[col] = le.transform(test_x[col])\n", | |
| " \n", | |
| " train[col] = train[col]\n", | |
| " test_x[col] = test_x[col]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 11, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Memory usage of dataframe is 257.11 MB\n", | |
| "Memory usage after optimization is: 65.83 MB\n", | |
| "Decreased by 74.4%\n", | |
| "Memory usage of dataframe is 1677.73 MB\n", | |
| "Memory usage after optimization is: 455.67 MB\n", | |
| "Decreased by 72.8%\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "train_x = reduce_mem_usage(train)\n", | |
| "test_x = reduce_mem_usage(test_x)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 12, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "train_x.to_pickle(\"./train_x.pkl\")\n", | |
| "train_y.to_pickle(\"./train_y.pkl\")\n", | |
| "test_x.to_pickle(\"./test_x.pkl\")" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 2, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "train_x=pd.read_pickle(\"./train_x.pkl\").values\n", | |
| "train_y=pd.read_pickle(\"./train_y.pkl\").values\n", | |
| "test_x=pd.read_pickle(\"./test_x.pkl\").values" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 3, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/sklearn/ensemble/weight_boosting.py:29: DeprecationWarning: numpy.core.umath_tests is an internal NumPy module and should not be imported. It will be removed in a future NumPy release.\n", | |
| " from numpy.core.umath_tests import inner1d\n", | |
| "/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n", | |
| " Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n", | |
| "/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n", | |
| " Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n", | |
| "/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n", | |
| " Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n", | |
| "/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n", | |
| " Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n", | |
| "/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n", | |
| " Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "[WARNING] [2019-09-01 03:22:42,814:EnsembleBuilder(4282876139):c5cb6f35b9d1dcf4fd39b3b2bf90d23a] No models better than random - using Dummy Score!\n", | |
| "[WARNING] [2019-09-01 03:22:42,833:EnsembleBuilder(4282876139):c5cb6f35b9d1dcf4fd39b3b2bf90d23a] No models better than random - using Dummy Score!\n", | |
| "[WARNING] [2019-09-01 03:22:44,839:EnsembleBuilder(4282876139):c5cb6f35b9d1dcf4fd39b3b2bf90d23a] No models better than random - using Dummy Score!\n" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n", | |
| " Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n", | |
| "/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n", | |
| " Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n", | |
| "/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n", | |
| " Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n", | |
| "/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n", | |
| " Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "[WARNING] [2019-09-01 03:22:46,863:EnsembleBuilder(4282876139):c5cb6f35b9d1dcf4fd39b3b2bf90d23a] No models better than random - using Dummy Score!\n" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n", | |
| " Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n", | |
| "/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n", | |
| " Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "[WARNING] [2019-09-01 03:22:48,881:EnsembleBuilder(4282876139):c5cb6f35b9d1dcf4fd39b3b2bf90d23a] No models better than random - using Dummy Score!\n" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n", | |
| " Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n", | |
| "/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n", | |
| " Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n", | |
| "/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n", | |
| " Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "[WARNING] [2019-09-01 03:22:50,898:EnsembleBuilder(4282876139):c5cb6f35b9d1dcf4fd39b3b2bf90d23a] No models better than random - using Dummy Score!\n" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n", | |
| " Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "[WARNING] [2019-09-01 03:22:52,906:EnsembleBuilder(4282876139):c5cb6f35b9d1dcf4fd39b3b2bf90d23a] No models better than random - using Dummy Score!\n", | |
| "[WARNING] [2019-09-01 03:22:54,919:EnsembleBuilder(4282876139):c5cb6f35b9d1dcf4fd39b3b2bf90d23a] No models better than random - using Dummy Score!\n" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n", | |
| " Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n", | |
| "/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n", | |
| " Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n", | |
| "/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n", | |
| " Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n", | |
| "/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n", | |
| " Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n", | |
| "/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n", | |
| " Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n", | |
| "/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n", | |
| " Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n", | |
| "/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n", | |
| " Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n", | |
| "/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n", | |
| " Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n", | |
| "/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n", | |
| " Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n", | |
| "/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n", | |
| " Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n", | |
| "/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n", | |
| " Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n", | |
| "/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n", | |
| " Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n", | |
| "/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n", | |
| " Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n", | |
| "/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n", | |
| " Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n", | |
| "/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n", | |
| " Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n", | |
| "/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n", | |
| " Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n", | |
| "/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n", | |
| " Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n", | |
| "/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n", | |
| " Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n", | |
| "/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n", | |
| " Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n", | |
| "/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n", | |
| " Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n", | |
| "/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n", | |
| " Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n", | |
| "/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n", | |
| " Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n", | |
| "/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n", | |
| " Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n", | |
| "/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n", | |
| " Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n", | |
| "/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n", | |
| " Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n", | |
| "/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n", | |
| " Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n", | |
| "/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n", | |
| " Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n", | |
| "/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n", | |
| " Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n", | |
| "/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n", | |
| " Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n", | |
| "/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n", | |
| " Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n", | |
| "/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n", | |
| " Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n", | |
| "/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n", | |
| " Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n", | |
| "/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n", | |
| " Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n", | |
| "/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n", | |
| " Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n", | |
| "/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n", | |
| " Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n", | |
| "/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n", | |
| " Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n", | |
| "/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n", | |
| " Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n", | |
| "/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n", | |
| " Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n", | |
| "/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n", | |
| " Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n", | |
| "/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n", | |
| " Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n", | |
| "/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n", | |
| " Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n", | |
| "/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n", | |
| " Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n", | |
| "/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n", | |
| " Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n", | |
| "/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n", | |
| " Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n", | |
| "/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n", | |
| " Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n", | |
| "/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n", | |
| " Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n", | |
| "/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n", | |
| " Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n", | |
| "/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n", | |
| " Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n", | |
| "/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n", | |
| " Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n", | |
| "/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n", | |
| " Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n", | |
| "/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n", | |
| " Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n", | |
| "/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n", | |
| " Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n", | |
| "/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n", | |
| " Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n", | |
| "/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n", | |
| " Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n", | |
| "/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n", | |
| " Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "[WARNING] [2019-09-01 03:37:15,909:smac.intensification.intensification.Intensifier] Challenger was the same as the current incumbent; Skipping challenger\n", | |
| "[WARNING] [2019-09-01 03:37:15,909:smac.intensification.intensification.Intensifier] Challenger was the same as the current incumbent; Skipping challenger\n" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n", | |
| " Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n", | |
| "/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n", | |
| " Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n", | |
| "/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n", | |
| " Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n", | |
| "/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n", | |
| " Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n", | |
| "/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n", | |
| " Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n", | |
| "/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n", | |
| " Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n", | |
| "/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n", | |
| " Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n", | |
| "/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n", | |
| " Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n", | |
| "/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n", | |
| " Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n", | |
| "/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n", | |
| " Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n", | |
| "/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n", | |
| " Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n", | |
| "/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n", | |
| " Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n", | |
| "/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n", | |
| " Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n", | |
| "/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n", | |
| " Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n", | |
| "/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n", | |
| " Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n", | |
| "/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n", | |
| " Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n", | |
| "/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n", | |
| " Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n", | |
| "/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n", | |
| " Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n", | |
| "/usr/local/pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/autosklearn/evaluation/train_evaluator.py:197: RuntimeWarning: Mean of empty slice\n", | |
| " Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "-1\n", | |
| "['/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000000.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000001.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000002.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000003.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000004.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000005.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000006.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000007.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000008.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000009.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000010.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000011.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000012.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000013.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000014.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000015.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000016.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000017.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000018.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000019.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000020.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000021.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000022.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000023.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000024.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000025.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000026.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000027.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000028.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000029.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000030.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000031.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000032.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000033.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000034.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000035.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000036.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000037.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000038.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000039.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000040.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000041.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000042.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000043.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000044.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000045.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000046.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000047.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000048.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000049.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000050.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000051.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000052.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000053.ensemble', '/tmp/autosklearn_tmp_54_5576/.auto-sklearn/ensembles/4282876139.0000000054.ensemble']\n" | |
| ] | |
| }, | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "AutoSklearnClassifier(delete_output_folder_after_terminate=False,\n", | |
| " delete_tmp_folder_after_terminate=False,\n", | |
| " disable_evaluator_output=False, ensemble_memory_limit=24576,\n", | |
| " ensemble_nbest=50, ensemble_size=50, exclude_estimators=None,\n", | |
| " exclude_preprocessors=None, get_smac_object_callback=None,\n", | |
| " include_estimators=None, include_preprocessors=None,\n", | |
| " initial_configurations_via_metalearning=25, logging_config=None,\n", | |
| " metadata_directory=None, ml_memory_limit=24576, n_jobs=16,\n", | |
| " output_folder=None, per_run_time_limit=360,\n", | |
| " resampling_strategy='holdout',\n", | |
| " resampling_strategy_arguments=None, seed=1, shared_mode=False,\n", | |
| " smac_scenario_args=None, time_left_for_this_task=1200,\n", | |
| " tmp_folder=None)" | |
| ] | |
| }, | |
| "execution_count": 3, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "import autosklearn.classification\n", | |
| "\n", | |
| "cls = autosklearn.classification.AutoSklearnClassifier(\n", | |
| " time_left_for_this_task=1200,\n", | |
| " n_jobs=16,\n", | |
| " ml_memory_limit=24*1024,\n", | |
| " ensemble_memory_limit=24*1024,\n", | |
| ")\n", | |
| "cls.fit(train_x, train_y)\n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 4, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "auto-sklearn results:\n", | |
| " Dataset name: c5cb6f35b9d1dcf4fd39b3b2bf90d23a\n", | |
| " Metric: accuracy\n", | |
| " Best validation score: 0.890771\n", | |
| " Number of target algorithm runs: 127\n", | |
| " Number of successful target algorithm runs: 73\n", | |
| " Number of crashed target algorithm runs: 12\n", | |
| " Number of target algorithms that exceeded the time limit: 28\n", | |
| " Number of target algorithms that exceeded the memory limit: 14\n", | |
| "\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "print(cls.sprint_statistics())" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 5, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "[(0.400000, SimpleClassificationPipeline({'balancing:strategy': 'none', 'categorical_encoding:__choice__': 'no_encoding', 'classifier:__choice__': 'adaboost', 'imputation:strategy': 'median', 'preprocessor:__choice__': 'random_trees_embedding', 'rescaling:__choice__': 'minmax', 'classifier:adaboost:algorithm': 'SAMME', 'classifier:adaboost:learning_rate': 0.2718026025501933, 'classifier:adaboost:max_depth': 1, 'classifier:adaboost:n_estimators': 484, 'preprocessor:random_trees_embedding:bootstrap': 'False', 'preprocessor:random_trees_embedding:max_depth': 4, 'preprocessor:random_trees_embedding:max_leaf_nodes': 'None', 'preprocessor:random_trees_embedding:min_samples_leaf': 11, 'preprocessor:random_trees_embedding:min_samples_split': 17, 'preprocessor:random_trees_embedding:min_weight_fraction_leaf': 1.0, 'preprocessor:random_trees_embedding:n_estimators': 79},\n", | |
| "dataset_properties={\n", | |
| " 'task': 1,\n", | |
| " 'sparse': False,\n", | |
| " 'multilabel': False,\n", | |
| " 'multiclass': False,\n", | |
| " 'target_type': 'classification',\n", | |
| " 'signed': False})),\n", | |
| "(0.140000, SimpleClassificationPipeline({'balancing:strategy': 'none', 'categorical_encoding:__choice__': 'one_hot_encoding', 'classifier:__choice__': 'random_forest', 'imputation:strategy': 'mean', 'preprocessor:__choice__': 'no_preprocessing', 'rescaling:__choice__': 'standardize', 'categorical_encoding:one_hot_encoding:use_minimum_fraction': 'True', 'classifier:random_forest:bootstrap': 'True', 'classifier:random_forest:criterion': 'gini', 'classifier:random_forest:max_depth': 'None', 'classifier:random_forest:max_features': 0.5, 'classifier:random_forest:max_leaf_nodes': 'None', 'classifier:random_forest:min_impurity_decrease': 0.0, 'classifier:random_forest:min_samples_leaf': 1, 'classifier:random_forest:min_samples_split': 2, 'classifier:random_forest:min_weight_fraction_leaf': 0.0, 'classifier:random_forest:n_estimators': 100, 'categorical_encoding:one_hot_encoding:minimum_fraction': 0.01},\n", | |
| "dataset_properties={\n", | |
| " 'task': 1,\n", | |
| " 'sparse': False,\n", | |
| " 'multilabel': False,\n", | |
| " 'multiclass': False,\n", | |
| " 'target_type': 'classification',\n", | |
| " 'signed': False})),\n", | |
| "(0.080000, SimpleClassificationPipeline({'balancing:strategy': 'weighting', 'categorical_encoding:__choice__': 'one_hot_encoding', 'classifier:__choice__': 'random_forest', 'imputation:strategy': 'mean', 'preprocessor:__choice__': 'liblinear_svc_preprocessor', 'rescaling:__choice__': 'standardize', 'categorical_encoding:one_hot_encoding:use_minimum_fraction': 'True', 'classifier:random_forest:bootstrap': 'False', 'classifier:random_forest:criterion': 'gini', 'classifier:random_forest:max_depth': 'None', 'classifier:random_forest:max_features': 0.7983157215145903, 'classifier:random_forest:max_leaf_nodes': 'None', 'classifier:random_forest:min_impurity_decrease': 0.0, 'classifier:random_forest:min_samples_leaf': 4, 'classifier:random_forest:min_samples_split': 15, 'classifier:random_forest:min_weight_fraction_leaf': 0.0, 'classifier:random_forest:n_estimators': 100, 'preprocessor:liblinear_svc_preprocessor:C': 0.4971515945303584, 'preprocessor:liblinear_svc_preprocessor:dual': 'False', 'preprocessor:liblinear_svc_preprocessor:fit_intercept': 'True', 'preprocessor:liblinear_svc_preprocessor:intercept_scaling': 1, 'preprocessor:liblinear_svc_preprocessor:loss': 'squared_hinge', 'preprocessor:liblinear_svc_preprocessor:multi_class': 'ovr', 'preprocessor:liblinear_svc_preprocessor:penalty': 'l1', 'preprocessor:liblinear_svc_preprocessor:tol': 0.00010268311046018636, 'categorical_encoding:one_hot_encoding:minimum_fraction': 0.001856820833094005},\n", | |
| "dataset_properties={\n", | |
| " 'task': 1,\n", | |
| " 'sparse': False,\n", | |
| " 'multilabel': False,\n", | |
| " 'multiclass': False,\n", | |
| " 'target_type': 'classification',\n", | |
| " 'signed': False})),\n", | |
| "(0.080000, SimpleClassificationPipeline({'balancing:strategy': 'weighting', 'categorical_encoding:__choice__': 'no_encoding', 'classifier:__choice__': 'k_nearest_neighbors', 'imputation:strategy': 'mean', 'preprocessor:__choice__': 'pca', 'rescaling:__choice__': 'minmax', 'classifier:k_nearest_neighbors:n_neighbors': 1, 'classifier:k_nearest_neighbors:p': 1, 'classifier:k_nearest_neighbors:weights': 'uniform', 'preprocessor:pca:keep_variance': 0.7623284783701136, 'preprocessor:pca:whiten': 'False'},\n", | |
| "dataset_properties={\n", | |
| " 'task': 1,\n", | |
| " 'sparse': False,\n", | |
| " 'multilabel': False,\n", | |
| " 'multiclass': False,\n", | |
| " 'target_type': 'classification',\n", | |
| " 'signed': False})),\n", | |
| "(0.060000, SimpleClassificationPipeline({'balancing:strategy': 'none', 'categorical_encoding:__choice__': 'no_encoding', 'classifier:__choice__': 'random_forest', 'imputation:strategy': 'most_frequent', 'preprocessor:__choice__': 'extra_trees_preproc_for_classification', 'rescaling:__choice__': 'quantile_transformer', 'classifier:random_forest:bootstrap': 'False', 'classifier:random_forest:criterion': 'gini', 'classifier:random_forest:max_depth': 'None', 'classifier:random_forest:max_features': 0.8525082104325516, 'classifier:random_forest:max_leaf_nodes': 'None', 'classifier:random_forest:min_impurity_decrease': 0.0, 'classifier:random_forest:min_samples_leaf': 14, 'classifier:random_forest:min_samples_split': 7, 'classifier:random_forest:min_weight_fraction_leaf': 0.0, 'classifier:random_forest:n_estimators': 100, 'preprocessor:extra_trees_preproc_for_classification:bootstrap': 'True', 'preprocessor:extra_trees_preproc_for_classification:criterion': 'gini', 'preprocessor:extra_trees_preproc_for_classification:max_depth': 'None', 'preprocessor:extra_trees_preproc_for_classification:max_features': 0.7397951606097709, 'preprocessor:extra_trees_preproc_for_classification:max_leaf_nodes': 'None', 'preprocessor:extra_trees_preproc_for_classification:min_impurity_decrease': 0.0, 'preprocessor:extra_trees_preproc_for_classification:min_samples_leaf': 17, 'preprocessor:extra_trees_preproc_for_classification:min_samples_split': 19, 'preprocessor:extra_trees_preproc_for_classification:min_weight_fraction_leaf': 0.0, 'preprocessor:extra_trees_preproc_for_classification:n_estimators': 100, 'rescaling:quantile_transformer:n_quantiles': 642, 'rescaling:quantile_transformer:output_distribution': 'normal'},\n", | |
| "dataset_properties={\n", | |
| " 'task': 1,\n", | |
| " 'sparse': False,\n", | |
| " 'multilabel': False,\n", | |
| " 'multiclass': False,\n", | |
| " 'target_type': 'classification',\n", | |
| " 'signed': False})),\n", | |
| "(0.060000, SimpleClassificationPipeline({'balancing:strategy': 'weighting', 'categorical_encoding:__choice__': 'no_encoding', 'classifier:__choice__': 'extra_trees', 'imputation:strategy': 'median', 'preprocessor:__choice__': 'feature_agglomeration', 'rescaling:__choice__': 'quantile_transformer', 'classifier:extra_trees:bootstrap': 'False', 'classifier:extra_trees:criterion': 'gini', 'classifier:extra_trees:max_depth': 'None', 'classifier:extra_trees:max_features': 0.7961585059091191, 'classifier:extra_trees:max_leaf_nodes': 'None', 'classifier:extra_trees:min_impurity_decrease': 0.0, 'classifier:extra_trees:min_samples_leaf': 3, 'classifier:extra_trees:min_samples_split': 19, 'classifier:extra_trees:min_weight_fraction_leaf': 0.0, 'classifier:extra_trees:n_estimators': 100, 'preprocessor:feature_agglomeration:affinity': 'euclidean', 'preprocessor:feature_agglomeration:linkage': 'average', 'preprocessor:feature_agglomeration:n_clusters': 304, 'preprocessor:feature_agglomeration:pooling_func': 'max', 'rescaling:quantile_transformer:n_quantiles': 1529, 'rescaling:quantile_transformer:output_distribution': 'normal'},\n", | |
| "dataset_properties={\n", | |
| " 'task': 1,\n", | |
| " 'sparse': False,\n", | |
| " 'multilabel': False,\n", | |
| " 'multiclass': False,\n", | |
| " 'target_type': 'classification',\n", | |
| " 'signed': False})),\n", | |
| "(0.060000, SimpleClassificationPipeline({'balancing:strategy': 'weighting', 'categorical_encoding:__choice__': 'no_encoding', 'classifier:__choice__': 'random_forest', 'imputation:strategy': 'most_frequent', 'preprocessor:__choice__': 'feature_agglomeration', 'rescaling:__choice__': 'quantile_transformer', 'classifier:random_forest:bootstrap': 'True', 'classifier:random_forest:criterion': 'gini', 'classifier:random_forest:max_depth': 'None', 'classifier:random_forest:max_features': 0.9017016635679949, 'classifier:random_forest:max_leaf_nodes': 'None', 'classifier:random_forest:min_impurity_decrease': 0.0, 'classifier:random_forest:min_samples_leaf': 10, 'classifier:random_forest:min_samples_split': 19, 'classifier:random_forest:min_weight_fraction_leaf': 0.0, 'classifier:random_forest:n_estimators': 100, 'preprocessor:feature_agglomeration:affinity': 'manhattan', 'preprocessor:feature_agglomeration:linkage': 'complete', 'preprocessor:feature_agglomeration:n_clusters': 350, 'preprocessor:feature_agglomeration:pooling_func': 'median', 'rescaling:quantile_transformer:n_quantiles': 1453, 'rescaling:quantile_transformer:output_distribution': 'normal'},\n", | |
| "dataset_properties={\n", | |
| " 'task': 1,\n", | |
| " 'sparse': False,\n", | |
| " 'multilabel': False,\n", | |
| " 'multiclass': False,\n", | |
| " 'target_type': 'classification',\n", | |
| " 'signed': False})),\n", | |
| "(0.040000, SimpleClassificationPipeline({'balancing:strategy': 'none', 'categorical_encoding:__choice__': 'no_encoding', 'classifier:__choice__': 'decision_tree', 'imputation:strategy': 'most_frequent', 'preprocessor:__choice__': 'feature_agglomeration', 'rescaling:__choice__': 'quantile_transformer', 'classifier:decision_tree:criterion': 'entropy', 'classifier:decision_tree:max_depth_factor': 1.5392086826574303, 'classifier:decision_tree:max_features': 1.0, 'classifier:decision_tree:max_leaf_nodes': 'None', 'classifier:decision_tree:min_impurity_decrease': 0.0, 'classifier:decision_tree:min_samples_leaf': 4, 'classifier:decision_tree:min_samples_split': 2, 'classifier:decision_tree:min_weight_fraction_leaf': 0.0, 'preprocessor:feature_agglomeration:affinity': 'euclidean', 'preprocessor:feature_agglomeration:linkage': 'ward', 'preprocessor:feature_agglomeration:n_clusters': 333, 'preprocessor:feature_agglomeration:pooling_func': 'mean', 'rescaling:quantile_transformer:n_quantiles': 1873, 'rescaling:quantile_transformer:output_distribution': 'uniform'},\n", | |
| "dataset_properties={\n", | |
| " 'task': 1,\n", | |
| " 'sparse': False,\n", | |
| " 'multilabel': False,\n", | |
| " 'multiclass': False,\n", | |
| " 'target_type': 'classification',\n", | |
| " 'signed': False})),\n", | |
| "(0.020000, SimpleClassificationPipeline({'balancing:strategy': 'none', 'categorical_encoding:__choice__': 'no_encoding', 'classifier:__choice__': 'random_forest', 'imputation:strategy': 'mean', 'preprocessor:__choice__': 'feature_agglomeration', 'rescaling:__choice__': 'quantile_transformer', 'classifier:random_forest:bootstrap': 'True', 'classifier:random_forest:criterion': 'gini', 'classifier:random_forest:max_depth': 'None', 'classifier:random_forest:max_features': 0.8753505367267883, 'classifier:random_forest:max_leaf_nodes': 'None', 'classifier:random_forest:min_impurity_decrease': 0.0, 'classifier:random_forest:min_samples_leaf': 13, 'classifier:random_forest:min_samples_split': 18, 'classifier:random_forest:min_weight_fraction_leaf': 0.0, 'classifier:random_forest:n_estimators': 100, 'preprocessor:feature_agglomeration:affinity': 'cosine', 'preprocessor:feature_agglomeration:linkage': 'complete', 'preprocessor:feature_agglomeration:n_clusters': 385, 'preprocessor:feature_agglomeration:pooling_func': 'median', 'rescaling:quantile_transformer:n_quantiles': 1713, 'rescaling:quantile_transformer:output_distribution': 'normal'},\n", | |
| "dataset_properties={\n", | |
| " 'task': 1,\n", | |
| " 'sparse': False,\n", | |
| " 'multilabel': False,\n", | |
| " 'multiclass': False,\n", | |
| " 'target_type': 'classification',\n", | |
| " 'signed': False})),\n", | |
| "(0.020000, SimpleClassificationPipeline({'balancing:strategy': 'none', 'categorical_encoding:__choice__': 'one_hot_encoding', 'classifier:__choice__': 'decision_tree', 'imputation:strategy': 'median', 'preprocessor:__choice__': 'feature_agglomeration', 'rescaling:__choice__': 'quantile_transformer', 'categorical_encoding:one_hot_encoding:use_minimum_fraction': 'True', 'classifier:decision_tree:criterion': 'entropy', 'classifier:decision_tree:max_depth_factor': 1.8933795609650959, 'classifier:decision_tree:max_features': 1.0, 'classifier:decision_tree:max_leaf_nodes': 'None', 'classifier:decision_tree:min_impurity_decrease': 0.0, 'classifier:decision_tree:min_samples_leaf': 1, 'classifier:decision_tree:min_samples_split': 16, 'classifier:decision_tree:min_weight_fraction_leaf': 0.0, 'preprocessor:feature_agglomeration:affinity': 'euclidean', 'preprocessor:feature_agglomeration:linkage': 'ward', 'preprocessor:feature_agglomeration:n_clusters': 113, 'preprocessor:feature_agglomeration:pooling_func': 'median', 'rescaling:quantile_transformer:n_quantiles': 1913, 'rescaling:quantile_transformer:output_distribution': 'uniform', 'categorical_encoding:one_hot_encoding:minimum_fraction': 0.00014651481404583772},\n", | |
| "dataset_properties={\n", | |
| " 'task': 1,\n", | |
| " 'sparse': False,\n", | |
| " 'multilabel': False,\n", | |
| " 'multiclass': False,\n", | |
| " 'target_type': 'classification',\n", | |
| " 'signed': False})),\n", | |
| "(0.020000, SimpleClassificationPipeline({'balancing:strategy': 'none', 'categorical_encoding:__choice__': 'one_hot_encoding', 'classifier:__choice__': 'decision_tree', 'imputation:strategy': 'most_frequent', 'preprocessor:__choice__': 'extra_trees_preproc_for_classification', 'rescaling:__choice__': 'minmax', 'categorical_encoding:one_hot_encoding:use_minimum_fraction': 'False', 'classifier:decision_tree:criterion': 'gini', 'classifier:decision_tree:max_depth_factor': 1.3402306722746462, 'classifier:decision_tree:max_features': 1.0, 'classifier:decision_tree:max_leaf_nodes': 'None', 'classifier:decision_tree:min_impurity_decrease': 0.0, 'classifier:decision_tree:min_samples_leaf': 16, 'classifier:decision_tree:min_samples_split': 9, 'classifier:decision_tree:min_weight_fraction_leaf': 0.0, 'preprocessor:extra_trees_preproc_for_classification:bootstrap': 'False', 'preprocessor:extra_trees_preproc_for_classification:criterion': 'gini', 'preprocessor:extra_trees_preproc_for_classification:max_depth': 'None', 'preprocessor:extra_trees_preproc_for_classification:max_features': 0.3494934758427515, 'preprocessor:extra_trees_preproc_for_classification:max_leaf_nodes': 'None', 'preprocessor:extra_trees_preproc_for_classification:min_impurity_decrease': 0.0, 'preprocessor:extra_trees_preproc_for_classification:min_samples_leaf': 18, 'preprocessor:extra_trees_preproc_for_classification:min_samples_split': 17, 'preprocessor:extra_trees_preproc_for_classification:min_weight_fraction_leaf': 0.0, 'preprocessor:extra_trees_preproc_for_classification:n_estimators': 100},\n", | |
| "dataset_properties={\n", | |
| " 'task': 1,\n", | |
| " 'sparse': False,\n", | |
| " 'multilabel': False,\n", | |
| " 'multiclass': False,\n", | |
| " 'target_type': 'classification',\n", | |
| " 'signed': False})),\n", | |
| "(0.020000, SimpleClassificationPipeline({'balancing:strategy': 'none', 'categorical_encoding:__choice__': 'no_encoding', 'classifier:__choice__': 'lda', 'imputation:strategy': 'median', 'preprocessor:__choice__': 'select_percentile_classification', 'rescaling:__choice__': 'quantile_transformer', 'classifier:lda:n_components': 149, 'classifier:lda:shrinkage': 'auto', 'classifier:lda:tol': 0.06031486481933271, 'preprocessor:select_percentile_classification:percentile': 50.0, 'preprocessor:select_percentile_classification:score_func': 'chi2', 'rescaling:quantile_transformer:n_quantiles': 1765, 'rescaling:quantile_transformer:output_distribution': 'normal'},\n", | |
| "dataset_properties={\n", | |
| " 'task': 1,\n", | |
| " 'sparse': False,\n", | |
| " 'multilabel': False,\n", | |
| " 'multiclass': False,\n", | |
| " 'target_type': 'classification',\n", | |
| " 'signed': False})),\n", | |
| "]\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "print(cls.show_models())" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "predictions = cls.predict_proba(test_x)[:, 1]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "submit = pd.read_csv('./sample_submission.csv')\n", | |
| "submit['isFraud'] = predictions\n", | |
| "submit" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "submit.to_csv('submission1.csv', index=False)\n" | |
| ] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 3", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.6.3" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 2 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment