dinya · May 20, 2021 05:20
diff --git a/columns_as_multiindex_example.ipynb b/columns_as_multiindex_example.ipynb
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from collections import Counter\n",
    "\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "\n",
    "\n",
    "def index_as_multiindex(index, suffix_on_overlap=True, strip_empty_ends=False):\n",
    "    \"\"\"Replace flat index (:obj:`pandas.Index`) with hierarchical (:obj:`pandas.MultiIndex`).\n",
    "\n",
    "    Parameters\n",
    "    ----------\n",
    "    index : pandas.Index\n",
    "        :obj:`pandas.Index`-object to be converted to :obj:`pandas.MultiIndex`-object\n",
    "    suffix_on_overlap : bool (default True)\n",
    "        Add suffixes (_0, _1, ...) to overlapping column names. To raise an exception on overlapping columns use False.\n",
    "    strip_empty_ends : bool (default False)\n",
    "        Strip \"\" in the tuples (empty str).\n",
    "\n",
    "    See Also\n",
    "    --------\n",
    "    columns_as_multiindex\n",
    "\n",
    "    Examples\n",
    "    --------\n",
    "    See :func:`columns_as_multiindex`\n",
    "    \"\"\"\n",
    "\n",
    "    # TODO: To be reviewed with the pd.Index(index.values.tolist(), tupleize_cols=True).\n",
    "    # It is required that the elements of the list index.values.tolist() are reduced to the same shape!\n",
    "    columns = []\n",
    "    for idx in index.tolist():\n",
    "        if isinstance(idx, str) or not np.iterable(idx):\n",
    "            idx = (idx, )\n",
    "        if strip_empty_ends:\n",
    "            idx = list_rstrip(idx)\n",
    "        columns.append(idx)\n",
    "\n",
    "    max_len = max([len(c) for c in columns])\n",
    "    columns = [tuple(c + (\"\", ) * (max_len - len(c))) for c in columns]\n",
    "    # If same named columns exists then add suffixes _0, _1, _2, ... to the top level\n",
    "    duplicates = [item for item, count in Counter(columns).items() if count > 1]\n",
    "    if len(duplicates):\n",
    "        if suffix_on_overlap:\n",
    "            for dup in duplicates:\n",
    "                indexes = [i for i, x in enumerate(columns) if x == dup]\n",
    "                for i, idx in enumerate(indexes):\n",
    "                    col = list(columns[idx])\n",
    "                    col[0] = col[0] + \"_%i\" % i\n",
    "                    columns[idx] = tuple(col)\n",
    "        else:\n",
    "            raise ValueError(\"columns overlap, but the auto-renaming is disabled\")\n",
    "\n",
    "    return pd.MultiIndex.from_tuples(columns)\n",
    "\n",
    "\n",
    "def columns_as_multiindex(df, inplace=False, suffix_on_overlap=True, strip_empty_ends=False):\n",
    "    \"\"\"Replace flat columns (:obj:`pandas.Index`) with hierarchical (:obj:`pandas.MultiIndex`).\n",
    "\n",
    "    Parameters\n",
    "    ----------\n",
    "    df : pandas.DataFrame\n",
    "        Dataframe which contains the columns to be converted to MultiIndex.\n",
    "    suffix_on_overlap : bool (default True)\n",
    "        Add suffixes (_0, _1, ...) to overlapping column names. To raise an exception on overlapping columns use False.\n",
    "    inplace : bool (default False)\n",
    "        If ``True``, do operation inplace and return ``None``.\n",
    "    strip_empty_ends : bool (default False)\n",
    "        Strip \"\" in the tuples (empty str).\n",
    "\n",
    "    Returns\n",
    "    -------\n",
    "    pandas.DataFrame or None\n",
    "        Returns dataframe with modifed columns or ``None`` (depends on `inplace` parameter value).\n",
    "\n",
    "    Notes\n",
    "    -----\n",
    "    Reverse operation (hierarchical to flat)::\n",
    "\n",
    "        df.columns = [tuple(col) for col in df.columns.values]\n",
    "\n",
    "    See for details https://stackoverflow.com/questions/14507794/pandas-how-to-flatten-a-hierarchical-index-in-columns\n",
    "\n",
    "    Examples\n",
    "    --------\n",
    "    >>> df0 = pd.DataFrame([[1,2], [10, 20], [100, 200]], columns=[\"A\", \"B\"])\n",
    "    >>> df0\n",
    "    ...      A    B\n",
    "    ... 0    1    2\n",
    "    ... 1   10   20\n",
    "    ... 2  100  200\n",
    "\n",
    "    >>> columns = pd.MultiIndex.from_tuples([(\"C\", \"C0\"), (\"D\", \"D0\")])\n",
    "    >>> df1 = pd.DataFrame([[1,3], [10, 30], [100, 300]], columns=columns)\n",
    "    >>> df1\n",
    "    ...      C    D\n",
    "    ...     C0   D0\n",
    "    ... 0    1    3\n",
    "    ... 1   10   30\n",
    "    ... 2  100  300\n",
    "\n",
    "    Merging ``df0`` with ``df1`` returns the dataframe contains tuples in the column names\n",
    "\n",
    "    >>> df = pd.merge(df0, df1, left_on=\"A\", right_on=[(\"C\", \"C0\")])\n",
    "    >>> df\n",
    "    ...      A    B  (C, C0)  (D, D0)\n",
    "    ... 0    1    2        1        3\n",
    "    ... 1   10   20       10       30\n",
    "    ... 2  100  200      100      300\n",
    "\n",
    "    It's a bit ugly. Let's beautify the header\n",
    "\n",
    "    >>> columns_as_multiindex(df)\n",
    "    >>> df\n",
    "    ...      A    B    C    D\n",
    "    ...               C0   D0\n",
    "    ... 0    1    2    1    3\n",
    "    ... 1   10   20   10   30\n",
    "    ... 2  100  200  100  300\n",
    "\n",
    "    If same named columns exists then add suffixes _0, _1, _2, ... to the top level\n",
    "\n",
    "    >>> df1_columns = pd.MultiIndex.from_tuples([(\"A0\", \"B0\", \"C0\"), (\"A1\", \"B1\", \"C1\"), (\"A2\", \"B2\", \"\")])\n",
    "    >>> df1 = pd.DataFrame([[1, 2, 3], [10, 20, 30]], columns=df1_columns)\n",
    "    >>> df1\n",
    "    ...    A0  A1  A2\n",
    "    ...    B0  B1  B2\n",
    "    ...    C0  C1\n",
    "    ... 0   1   2   3\n",
    "    ... 1  10  20  30\n",
    "    >>> df2_columns = pd.MultiIndex.from_tuples([(\"X0\", \"Y0\"), (\"X1\", \"Y1\"), (\"A2\", \"B2\")])\n",
    "    >>> df2 = pd.DataFrame([[1, 200, 300], [10, 200, 300]], columns=df2_columns)\n",
    "    >>> df2\n",
    "    ...    X0   X1   A2\n",
    "    ...    Y0   Y1   B2\n",
    "    ... 0   1  200  300\n",
    "    ... 1  10  200  300\n",
    "    >>> df_merge = pd.merge(df1, df2, left_on=[(\"A0\", \"B0\",\"C0\")], right_on=[(\"X0\", \"Y0\")])\n",
    "    >>> df_merge\n",
    "    ...    (A0, B0, C0)  (A1, B1, C1)  (A2, B2, )  (X0, Y0)  (X1, Y1)  (A2, B2)\n",
    "    ... 0             1             2           3         1       200       300\n",
    "    ... 1            10            20          30        10       200       300\n",
    "    >>> columns_as_multiindex(df_merge)\n",
    "    ...    A0  A1 A2_0  X0   X1 A2_1\n",
    "    ...    B0  B1   B2  Y0   Y1   B2\n",
    "    ...    C0  C1\n",
    "    ... 0   1   2    3   1  200  300\n",
    "    ... 1  10  20   30  10  200  300\n",
    "    >>> columns_as_multiindex(df_merge, suffix_on_overlap=False)\n",
    "    ... Traceback (most recent call last)\n",
    "    ... ...\n",
    "    ... ValueError: columns overlap, but the auto-renaming is disabled\n",
    "    \"\"\"\n",
    "\n",
    "    columns_new = index_as_multiindex(df.columns, suffix_on_overlap=suffix_on_overlap,\n",
    "                                      strip_empty_ends=strip_empty_ends)\n",
    "\n",
    "    if inplace:\n",
    "        df.columns = columns_new\n",
    "    else:\n",
    "        df_new = df.copy()\n",
    "        df_new.columns = columns_new\n",
    "        return df_new"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'1.1.3'"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "pd.__version__"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr th {\n",
       "        text-align: left;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>A0</th>\n",
       "      <th>A1</th>\n",
       "      <th>A2</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>B0</th>\n",
       "      <th>B1</th>\n",
       "      <th>B2</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>C0</th>\n",
       "      <th>C1</th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>10</td>\n",
       "      <td>20</td>\n",
       "      <td>30</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   A0  A1  A2\n",
       "   B0  B1  B2\n",
       "   C0  C1    \n",
       "0   1   2   3\n",
       "1  10  20  30"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df1_columns = pd.MultiIndex.from_tuples([(\"A0\", \"B0\", \"C0\"), (\"A1\", \"B1\", \"C1\"), (\"A2\", \"B2\", \"\")])\n",
    "df1 = pd.DataFrame([[1, 2, 3], [10, 20, 30]], columns=df1_columns)\n",
    "df1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr th {\n",
       "        text-align: left;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>X0</th>\n",
       "      <th>X1</th>\n",
       "      <th>A2</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>Y0</th>\n",
       "      <th>Y1</th>\n",
       "      <th>B2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>200</td>\n",
       "      <td>300</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>10</td>\n",
       "      <td>200</td>\n",
       "      <td>300</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   X0   X1   A2\n",
       "   Y0   Y1   B2\n",
       "0   1  200  300\n",
       "1  10  200  300"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df2_columns = pd.MultiIndex.from_tuples([(\"X0\", \"Y0\"), (\"X1\", \"Y1\"), (\"A2\", \"B2\")])\n",
    "df2 = pd.DataFrame([[1, 200, 300], [10, 200, 300]], columns=df2_columns)\n",
    "df2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "d:\\00data\\opt\\miniconda3\\lib\\site-packages\\pandas\\core\\reshape\\merge.py:643: UserWarning: merging between different levels can give an unintended result (3 levels on the left,2 on the right)\n",
      "  warnings.warn(msg, UserWarning)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>(A0, B0, C0)</th>\n",
       "      <th>(A1, B1, C1)</th>\n",
       "      <th>(A2, B2, )</th>\n",
       "      <th>(X0, Y0)</th>\n",
       "      <th>(X1, Y1)</th>\n",
       "      <th>(A2, B2)</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>200</td>\n",
       "      <td>300</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>10</td>\n",
       "      <td>20</td>\n",
       "      <td>30</td>\n",
       "      <td>10</td>\n",
       "      <td>200</td>\n",
       "      <td>300</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   (A0, B0, C0)  (A1, B1, C1)  (A2, B2, )  (X0, Y0)  (X1, Y1)  (A2, B2)\n",
       "0             1             2           3         1       200       300\n",
       "1            10            20          30        10       200       300"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_merge = pd.merge(df1, df2, left_on=[(\"A0\", \"B0\",\"C0\")], right_on=[(\"X0\", \"Y0\")])\n",
    "df_merge"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr th {\n",
       "        text-align: left;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>A0</th>\n",
       "      <th>A1</th>\n",
       "      <th>A2_0</th>\n",
       "      <th>X0</th>\n",
       "      <th>X1</th>\n",
       "      <th>A2_1</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>B0</th>\n",
       "      <th>B1</th>\n",
       "      <th>B2</th>\n",
       "      <th>Y0</th>\n",
       "      <th>Y1</th>\n",
       "      <th>B2</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>C0</th>\n",
       "      <th>C1</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>200</td>\n",
       "      <td>300</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>10</td>\n",
       "      <td>20</td>\n",
       "      <td>30</td>\n",
       "      <td>10</td>\n",
       "      <td>200</td>\n",
       "      <td>300</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   A0  A1 A2_0  X0   X1 A2_1\n",
       "   B0  B1   B2  Y0   Y1   B2\n",
       "   C0  C1                   \n",
       "0   1   2    3   1  200  300\n",
       "1  10  20   30  10  200  300"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "columns_as_multiindex(df_merge)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "ename": "ValueError",
     "evalue": "columns overlap, but the auto-renaming is disabled",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mValueError\u001b[0m                                Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-7-0857603b176a>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mcolumns_as_multiindex\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdf_merge\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0msuffix_on_overlap\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mFalse\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[1;32m<ipython-input-1-ae2f2a30fb5d>\u001b[0m in \u001b[0;36mcolumns_as_multiindex\u001b[1;34m(df, inplace, suffix_on_overlap, strip_empty_ends)\u001b[0m\n\u001b[0;32m    153\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    154\u001b[0m     columns_new = index_as_multiindex(df.columns, suffix_on_overlap=suffix_on_overlap,\n\u001b[1;32m--> 155\u001b[1;33m                                       strip_empty_ends=strip_empty_ends)\n\u001b[0m\u001b[0;32m    156\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    157\u001b[0m     \u001b[1;32mif\u001b[0m \u001b[0minplace\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m<ipython-input-1-ae2f2a30fb5d>\u001b[0m in \u001b[0;36mindex_as_multiindex\u001b[1;34m(index, suffix_on_overlap, strip_empty_ends)\u001b[0m\n\u001b[0;32m     49\u001b[0m                     \u001b[0mcolumns\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0midx\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtuple\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcol\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     50\u001b[0m         \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 51\u001b[1;33m             \u001b[1;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"columns overlap, but the auto-renaming is disabled\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     52\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     53\u001b[0m     \u001b[1;32mreturn\u001b[0m \u001b[0mpd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mMultiIndex\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfrom_tuples\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mValueError\u001b[0m: columns overlap, but the auto-renaming is disabled"
     ]
    }
   ],
   "source": [
    "columns_as_multiindex(df_merge, suffix_on_overlap=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "See also issue https://github.com/pandas-dev/pandas/issues/26699"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
 }
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 1,
	"metadata": {},
	"outputs": [],
	"source": [
	"from collections import Counter\n",
	"\n",
	"import pandas as pd\n",
	"import numpy as np\n",
	"\n",
	"\n",
	"def index_as_multiindex(index, suffix_on_overlap=True, strip_empty_ends=False):\n",
	" \"\"\"Replace flat index (:obj:`pandas.Index`) with hierarchical (:obj:`pandas.MultiIndex`).\n",
	"\n",
	" Parameters\n",
	" ----------\n",
	" index : pandas.Index\n",
	" :obj:`pandas.Index`-object to be converted to :obj:`pandas.MultiIndex`-object\n",
	" suffix_on_overlap : bool (default True)\n",
	" Add suffixes (_0, _1, ...) to overlapping column names. To raise an exception on overlapping columns use False.\n",
	" strip_empty_ends : bool (default False)\n",
	" Strip \"\" in the tuples (empty str).\n",
	"\n",
	" See Also\n",
	" --------\n",
	" columns_as_multiindex\n",
	"\n",
	" Examples\n",
	" --------\n",
	" See :func:`columns_as_multiindex`\n",
	" \"\"\"\n",
	"\n",
	" # TODO: To be reviewed with the pd.Index(index.values.tolist(), tupleize_cols=True).\n",
	" # It is required that the elements of the list index.values.tolist() are reduced to the same shape!\n",
	" columns = []\n",
	" for idx in index.tolist():\n",
	" if isinstance(idx, str) or not np.iterable(idx):\n",
	" idx = (idx, )\n",
	" if strip_empty_ends:\n",
	" idx = list_rstrip(idx)\n",
	" columns.append(idx)\n",
	"\n",
	" max_len = max([len(c) for c in columns])\n",
	" columns = [tuple(c + (\"\", ) * (max_len - len(c))) for c in columns]\n",
	" # If same named columns exists then add suffixes _0, _1, _2, ... to the top level\n",
	" duplicates = [item for item, count in Counter(columns).items() if count > 1]\n",
	" if len(duplicates):\n",
	" if suffix_on_overlap:\n",
	" for dup in duplicates:\n",
	" indexes = [i for i, x in enumerate(columns) if x == dup]\n",
	" for i, idx in enumerate(indexes):\n",
	" col = list(columns[idx])\n",
	" col[0] = col[0] + \"_%i\" % i\n",
	" columns[idx] = tuple(col)\n",
	" else:\n",
	" raise ValueError(\"columns overlap, but the auto-renaming is disabled\")\n",
	"\n",
	" return pd.MultiIndex.from_tuples(columns)\n",
	"\n",
	"\n",
	"def columns_as_multiindex(df, inplace=False, suffix_on_overlap=True, strip_empty_ends=False):\n",
	" \"\"\"Replace flat columns (:obj:`pandas.Index`) with hierarchical (:obj:`pandas.MultiIndex`).\n",
	"\n",
	" Parameters\n",
	" ----------\n",
	" df : pandas.DataFrame\n",
	" Dataframe which contains the columns to be converted to MultiIndex.\n",
	" suffix_on_overlap : bool (default True)\n",
	" Add suffixes (_0, _1, ...) to overlapping column names. To raise an exception on overlapping columns use False.\n",
	" inplace : bool (default False)\n",
	" If ``True``, do operation inplace and return ``None``.\n",
	" strip_empty_ends : bool (default False)\n",
	" Strip \"\" in the tuples (empty str).\n",
	"\n",
	" Returns\n",
	" -------\n",
	" pandas.DataFrame or None\n",
	" Returns dataframe with modifed columns or ``None`` (depends on `inplace` parameter value).\n",
	"\n",
	" Notes\n",
	" -----\n",
	" Reverse operation (hierarchical to flat)::\n",
	"\n",
	" df.columns = [tuple(col) for col in df.columns.values]\n",
	"\n",
	" See for details https://stackoverflow.com/questions/14507794/pandas-how-to-flatten-a-hierarchical-index-in-columns\n",
	"\n",
	" Examples\n",
	" --------\n",
	" >>> df0 = pd.DataFrame([[1,2], [10, 20], [100, 200]], columns=[\"A\", \"B\"])\n",
	" >>> df0\n",
	" ... A B\n",
	" ... 0 1 2\n",
	" ... 1 10 20\n",
	" ... 2 100 200\n",
	"\n",
	" >>> columns = pd.MultiIndex.from_tuples([(\"C\", \"C0\"), (\"D\", \"D0\")])\n",
	" >>> df1 = pd.DataFrame([[1,3], [10, 30], [100, 300]], columns=columns)\n",
	" >>> df1\n",
	" ... C D\n",
	" ... C0 D0\n",
	" ... 0 1 3\n",
	" ... 1 10 30\n",
	" ... 2 100 300\n",
	"\n",
	" Merging ``df0`` with ``df1`` returns the dataframe contains tuples in the column names\n",
	"\n",
	" >>> df = pd.merge(df0, df1, left_on=\"A\", right_on=[(\"C\", \"C0\")])\n",
	" >>> df\n",
	" ... A B (C, C0) (D, D0)\n",
	" ... 0 1 2 1 3\n",
	" ... 1 10 20 10 30\n",
	" ... 2 100 200 100 300\n",
	"\n",
	" It's a bit ugly. Let's beautify the header\n",
	"\n",
	" >>> columns_as_multiindex(df)\n",
	" >>> df\n",
	" ... A B C D\n",
	" ... C0 D0\n",
	" ... 0 1 2 1 3\n",
	" ... 1 10 20 10 30\n",
	" ... 2 100 200 100 300\n",
	"\n",
	" If same named columns exists then add suffixes _0, _1, _2, ... to the top level\n",
	"\n",
	" >>> df1_columns = pd.MultiIndex.from_tuples([(\"A0\", \"B0\", \"C0\"), (\"A1\", \"B1\", \"C1\"), (\"A2\", \"B2\", \"\")])\n",
	" >>> df1 = pd.DataFrame([[1, 2, 3], [10, 20, 30]], columns=df1_columns)\n",
	" >>> df1\n",
	" ... A0 A1 A2\n",
	" ... B0 B1 B2\n",
	" ... C0 C1\n",
	" ... 0 1 2 3\n",
	" ... 1 10 20 30\n",
	" >>> df2_columns = pd.MultiIndex.from_tuples([(\"X0\", \"Y0\"), (\"X1\", \"Y1\"), (\"A2\", \"B2\")])\n",
	" >>> df2 = pd.DataFrame([[1, 200, 300], [10, 200, 300]], columns=df2_columns)\n",
	" >>> df2\n",
	" ... X0 X1 A2\n",
	" ... Y0 Y1 B2\n",
	" ... 0 1 200 300\n",
	" ... 1 10 200 300\n",
	" >>> df_merge = pd.merge(df1, df2, left_on=[(\"A0\", \"B0\",\"C0\")], right_on=[(\"X0\", \"Y0\")])\n",
	" >>> df_merge\n",
	" ... (A0, B0, C0) (A1, B1, C1) (A2, B2, ) (X0, Y0) (X1, Y1) (A2, B2)\n",
	" ... 0 1 2 3 1 200 300\n",
	" ... 1 10 20 30 10 200 300\n",
	" >>> columns_as_multiindex(df_merge)\n",
	" ... A0 A1 A2_0 X0 X1 A2_1\n",
	" ... B0 B1 B2 Y0 Y1 B2\n",
	" ... C0 C1\n",
	" ... 0 1 2 3 1 200 300\n",
	" ... 1 10 20 30 10 200 300\n",
	" >>> columns_as_multiindex(df_merge, suffix_on_overlap=False)\n",
	" ... Traceback (most recent call last)\n",
	" ... ...\n",
	" ... ValueError: columns overlap, but the auto-renaming is disabled\n",
	" \"\"\"\n",
	"\n",
	" columns_new = index_as_multiindex(df.columns, suffix_on_overlap=suffix_on_overlap,\n",
	" strip_empty_ends=strip_empty_ends)\n",
	"\n",
	" if inplace:\n",
	" df.columns = columns_new\n",
	" else:\n",
	" df_new = df.copy()\n",
	" df_new.columns = columns_new\n",
	" return df_new"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 2,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"'1.1.3'"
	]
	},
	"execution_count": 2,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"import pandas as pd\n",
	"pd.__version__"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 3,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<style scoped>\n",
	" .dataframe tbody tr th:only-of-type {\n",
	" vertical-align: middle;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"\n",
	" .dataframe thead tr th {\n",
	" text-align: left;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr>\n",
	" <th></th>\n",
	" <th>A0</th>\n",
	" <th>A1</th>\n",
	" <th>A2</th>\n",
	" </tr>\n",
	" <tr>\n",
	" <th></th>\n",
	" <th>B0</th>\n",
	" <th>B1</th>\n",
	" <th>B2</th>\n",
	" </tr>\n",
	" <tr>\n",
	" <th></th>\n",
	" <th>C0</th>\n",
	" <th>C1</th>\n",
	" <th></th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>0</th>\n",
	" <td>1</td>\n",
	" <td>2</td>\n",
	" <td>3</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>1</th>\n",
	" <td>10</td>\n",
	" <td>20</td>\n",
	" <td>30</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" A0 A1 A2\n",
	" B0 B1 B2\n",
	" C0 C1 \n",
	"0 1 2 3\n",
	"1 10 20 30"
	]
	},
	"execution_count": 3,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"df1_columns = pd.MultiIndex.from_tuples([(\"A0\", \"B0\", \"C0\"), (\"A1\", \"B1\", \"C1\"), (\"A2\", \"B2\", \"\")])\n",
	"df1 = pd.DataFrame([[1, 2, 3], [10, 20, 30]], columns=df1_columns)\n",
	"df1"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 4,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<style scoped>\n",
	" .dataframe tbody tr th:only-of-type {\n",
	" vertical-align: middle;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"\n",
	" .dataframe thead tr th {\n",
	" text-align: left;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr>\n",
	" <th></th>\n",
	" <th>X0</th>\n",
	" <th>X1</th>\n",
	" <th>A2</th>\n",
	" </tr>\n",
	" <tr>\n",
	" <th></th>\n",
	" <th>Y0</th>\n",
	" <th>Y1</th>\n",
	" <th>B2</th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>0</th>\n",
	" <td>1</td>\n",
	" <td>200</td>\n",
	" <td>300</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>1</th>\n",
	" <td>10</td>\n",
	" <td>200</td>\n",
	" <td>300</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" X0 X1 A2\n",
	" Y0 Y1 B2\n",
	"0 1 200 300\n",
	"1 10 200 300"
	]
	},
	"execution_count": 4,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"df2_columns = pd.MultiIndex.from_tuples([(\"X0\", \"Y0\"), (\"X1\", \"Y1\"), (\"A2\", \"B2\")])\n",
	"df2 = pd.DataFrame([[1, 200, 300], [10, 200, 300]], columns=df2_columns)\n",
	"df2"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 5,
	"metadata": {},
	"outputs": [
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"d:\\00data\\opt\\miniconda3\\lib\\site-packages\\pandas\\core\\reshape\\merge.py:643: UserWarning: merging between different levels can give an unintended result (3 levels on the left,2 on the right)\n",
	" warnings.warn(msg, UserWarning)\n"
	]
	},
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<style scoped>\n",
	" .dataframe tbody tr th:only-of-type {\n",
	" vertical-align: middle;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: right;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>(A0, B0, C0)</th>\n",
	" <th>(A1, B1, C1)</th>\n",
	" <th>(A2, B2, )</th>\n",
	" <th>(X0, Y0)</th>\n",
	" <th>(X1, Y1)</th>\n",
	" <th>(A2, B2)</th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>0</th>\n",
	" <td>1</td>\n",
	" <td>2</td>\n",
	" <td>3</td>\n",
	" <td>1</td>\n",
	" <td>200</td>\n",
	" <td>300</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>1</th>\n",
	" <td>10</td>\n",
	" <td>20</td>\n",
	" <td>30</td>\n",
	" <td>10</td>\n",
	" <td>200</td>\n",
	" <td>300</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" (A0, B0, C0) (A1, B1, C1) (A2, B2, ) (X0, Y0) (X1, Y1) (A2, B2)\n",
	"0 1 2 3 1 200 300\n",
	"1 10 20 30 10 200 300"
	]
	},
	"execution_count": 5,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"df_merge = pd.merge(df1, df2, left_on=[(\"A0\", \"B0\",\"C0\")], right_on=[(\"X0\", \"Y0\")])\n",
	"df_merge"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 6,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<style scoped>\n",
	" .dataframe tbody tr th:only-of-type {\n",
	" vertical-align: middle;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"\n",
	" .dataframe thead tr th {\n",
	" text-align: left;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr>\n",
	" <th></th>\n",
	" <th>A0</th>\n",
	" <th>A1</th>\n",
	" <th>A2_0</th>\n",
	" <th>X0</th>\n",
	" <th>X1</th>\n",
	" <th>A2_1</th>\n",
	" </tr>\n",
	" <tr>\n",
	" <th></th>\n",
	" <th>B0</th>\n",
	" <th>B1</th>\n",
	" <th>B2</th>\n",
	" <th>Y0</th>\n",
	" <th>Y1</th>\n",
	" <th>B2</th>\n",
	" </tr>\n",
	" <tr>\n",
	" <th></th>\n",
	" <th>C0</th>\n",
	" <th>C1</th>\n",
	" <th></th>\n",
	" <th></th>\n",
	" <th></th>\n",
	" <th></th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>0</th>\n",
	" <td>1</td>\n",
	" <td>2</td>\n",
	" <td>3</td>\n",
	" <td>1</td>\n",
	" <td>200</td>\n",
	" <td>300</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>1</th>\n",
	" <td>10</td>\n",
	" <td>20</td>\n",
	" <td>30</td>\n",
	" <td>10</td>\n",
	" <td>200</td>\n",
	" <td>300</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" A0 A1 A2_0 X0 X1 A2_1\n",
	" B0 B1 B2 Y0 Y1 B2\n",
	" C0 C1 \n",
	"0 1 2 3 1 200 300\n",
	"1 10 20 30 10 200 300"
	]
	},
	"execution_count": 6,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"columns_as_multiindex(df_merge)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 7,
	"metadata": {},
	"outputs": [
	{
	"ename": "ValueError",
	"evalue": "columns overlap, but the auto-renaming is disabled",
	"output_type": "error",
	"traceback": [
	"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
	"\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)",
	"\u001b[1;32m<ipython-input-7-0857603b176a>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mcolumns_as_multiindex\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdf_merge\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0msuffix_on_overlap\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mFalse\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
	"\u001b[1;32m<ipython-input-1-ae2f2a30fb5d>\u001b[0m in \u001b[0;36mcolumns_as_multiindex\u001b[1;34m(df, inplace, suffix_on_overlap, strip_empty_ends)\u001b[0m\n\u001b[0;32m 153\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 154\u001b[0m columns_new = index_as_multiindex(df.columns, suffix_on_overlap=suffix_on_overlap,\n\u001b[1;32m--> 155\u001b[1;33m strip_empty_ends=strip_empty_ends)\n\u001b[0m\u001b[0;32m 156\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 157\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0minplace\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
	"\u001b[1;32m<ipython-input-1-ae2f2a30fb5d>\u001b[0m in \u001b[0;36mindex_as_multiindex\u001b[1;34m(index, suffix_on_overlap, strip_empty_ends)\u001b[0m\n\u001b[0;32m 49\u001b[0m \u001b[0mcolumns\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0midx\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtuple\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcol\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 50\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 51\u001b[1;33m \u001b[1;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"columns overlap, but the auto-renaming is disabled\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 52\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 53\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mpd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mMultiIndex\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfrom_tuples\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
	"\u001b[1;31mValueError\u001b[0m: columns overlap, but the auto-renaming is disabled"
	]
	}
	],
	"source": [
	"columns_as_multiindex(df_merge, suffix_on_overlap=False)"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"See also issue https://github.com/pandas-dev/pandas/issues/26699"
	]
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.7.7"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 4
	}
No results found