Last active
December 19, 2024 19:10
-
-
Save petigura/b0c539295689a4b4aacc5d3bb0ce9a5c to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "code", | |
| "execution_count": 124, | |
| "id": "306c92c8", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th>r</th>\n", | |
| " <th>d</th>\n", | |
| " <th>s</th>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>r_id</th>\n", | |
| " <th>d_id</th>\n", | |
| " <th>s_id</th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th rowspan=\"5\" valign=\"top\">0</th>\n", | |
| " <th rowspan=\"5\" valign=\"top\">0</th>\n", | |
| " <th>30</th>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>30</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>52</th>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>52</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>94</th>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>94</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>106</th>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>106</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>108</th>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>108</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>...</th>\n", | |
| " <th>...</th>\n", | |
| " <th>...</th>\n", | |
| " <td>...</td>\n", | |
| " <td>...</td>\n", | |
| " <td>...</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th rowspan=\"5\" valign=\"top\">99</th>\n", | |
| " <th rowspan=\"5\" valign=\"top\">184</th>\n", | |
| " <th>115</th>\n", | |
| " <td>99</td>\n", | |
| " <td>184</td>\n", | |
| " <td>115</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>139</th>\n", | |
| " <td>99</td>\n", | |
| " <td>184</td>\n", | |
| " <td>139</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>163</th>\n", | |
| " <td>99</td>\n", | |
| " <td>184</td>\n", | |
| " <td>163</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>184</th>\n", | |
| " <td>99</td>\n", | |
| " <td>184</td>\n", | |
| " <td>184</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>191</th>\n", | |
| " <td>99</td>\n", | |
| " <td>184</td>\n", | |
| " <td>191</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "<p>200000 rows × 3 columns</p>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " r d s\n", | |
| "r_id d_id s_id \n", | |
| "0 0 30 0 0 30\n", | |
| " 52 0 0 52\n", | |
| " 94 0 0 94\n", | |
| " 106 0 0 106\n", | |
| " 108 0 0 108\n", | |
| "... .. ... ...\n", | |
| "99 184 115 99 184 115\n", | |
| " 139 99 184 139\n", | |
| " 163 99 184 163\n", | |
| " 184 99 184 184\n", | |
| " 191 99 184 191\n", | |
| "\n", | |
| "[200000 rows x 3 columns]" | |
| ] | |
| }, | |
| "execution_count": 124, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "# Create a MultiIndex DataFrame\n", | |
| "n_large_rows = 200_000\n", | |
| "df = pd.DataFrame({\n", | |
| " 'r': np.random.randint(0, 100, size=n_large_rows),\n", | |
| " 'd': np.random.randint(0, 185, size=n_large_rows),\n", | |
| " 's': np.random.randint(0, 200, size=n_large_rows)\n", | |
| "})\n", | |
| "df2 = df.set_index(['r','d','s'])\n", | |
| "df2 = df2.rename_axis(index={'r': 'r_id', 'd': 'd_id','s':'s_id'})\n", | |
| "\n", | |
| "df = pd.DataFrame(df.values, columns=df.columns, index=df2.index)\n", | |
| "df = df.sort_values(by=['r','d','s'])\n", | |
| "df" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 128, | |
| "id": "d4412080", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "[[0, 0],\n", | |
| " [0, 1],\n", | |
| " [0, 2],\n", | |
| " [0, 3],\n", | |
| " [0, 4],\n", | |
| " [0, 5],\n", | |
| " [0, 6],\n", | |
| " [0, 7],\n", | |
| " [0, 8],\n", | |
| " [0, 9],\n", | |
| " [0, 10],\n", | |
| " [0, 11],\n", | |
| " [0, 12],\n", | |
| " [0, 13],\n", | |
| " [0, 14],\n", | |
| " [0, 15],\n", | |
| " [0, 16],\n", | |
| " [0, 17],\n", | |
| " [0, 18],\n", | |
| " [0, 19]]" | |
| ] | |
| }, | |
| "execution_count": 128, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "# all unique (r,d). Note remove head(20) to get full list\n", | |
| "df[['r','d']].drop_duplicates().head(20).values.tolist()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "id": "0e779616", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "df[['r','d']]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 132, | |
| "id": "9ed1fb6d", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "[(10, 11, 1),\n", | |
| " (10, 11, 2),\n", | |
| " (10, 11, 39),\n", | |
| " (10, 11, 61),\n", | |
| " (10, 11, 66),\n", | |
| " (10, 11, 113),\n", | |
| " (10, 11, 125),\n", | |
| " (10, 11, 145),\n", | |
| " (10, 11, 158),\n", | |
| " (10, 11, 160),\n", | |
| " (10, 11, 166),\n", | |
| " (10, 11, 168),\n", | |
| " (10, 12, 5),\n", | |
| " (10, 12, 8),\n", | |
| " (10, 12, 29),\n", | |
| " (10, 12, 79),\n", | |
| " (10, 12, 86),\n", | |
| " (10, 12, 100),\n", | |
| " (10, 12, 184),\n", | |
| " (10, 13, 15)]" | |
| ] | |
| }, | |
| "execution_count": 132, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "# all (r,d,s) where r = 10 and d = 10 + delta where delta = {1,...,10} Note remove head(20) to get full list\n", | |
| "list(df[(df.r == 10) & (df.d.between(10+1,10+10))].head(20).index)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 200, | |
| "id": "f342ea6a", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "[(30, 178, 188), (30, 178, 191), (30, 178, 192), (30, 178, 194)]\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "# Let's suppose requests 20 and 30 is the only target with an intra-night cadence request\n", | |
| "# we'll need to loop over these requests, days, and slots For each (r,d,s) we'll need to query the \n", | |
| "# set of (r,d,s) slots that are within s + delta, delta = {1,...,10}\n", | |
| "df2 = df[df.r.isin([20,30])]\n", | |
| "for idx, row in df2.iterrows():\n", | |
| " cut = df2[(df2.r==row.r) & (df2.d==row.d) & df2.s.between(row.s+1, row.s+10)]\n", | |
| " if len(cut) > 3:\n", | |
| " cut2 = cut.copy()\n", | |
| " \n", | |
| "# print an example\n", | |
| "print(list(cut2.index))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "id": "a731ec15", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 3 (ipykernel)", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.9.19" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 5 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment