Skip to content

Instantly share code, notes, and snippets.

@petigura
Last active December 19, 2024 19:10
Show Gist options
  • Select an option

  • Save petigura/b0c539295689a4b4aacc5d3bb0ce9a5c to your computer and use it in GitHub Desktop.

Select an option

Save petigura/b0c539295689a4b4aacc5d3bb0ce9a5c to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 124,
"id": "306c92c8",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th>r</th>\n",
" <th>d</th>\n",
" <th>s</th>\n",
" </tr>\n",
" <tr>\n",
" <th>r_id</th>\n",
" <th>d_id</th>\n",
" <th>s_id</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th rowspan=\"5\" valign=\"top\">0</th>\n",
" <th rowspan=\"5\" valign=\"top\">0</th>\n",
" <th>30</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>30</td>\n",
" </tr>\n",
" <tr>\n",
" <th>52</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>52</td>\n",
" </tr>\n",
" <tr>\n",
" <th>94</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>94</td>\n",
" </tr>\n",
" <tr>\n",
" <th>106</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>106</td>\n",
" </tr>\n",
" <tr>\n",
" <th>108</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>108</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <th>...</th>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"5\" valign=\"top\">99</th>\n",
" <th rowspan=\"5\" valign=\"top\">184</th>\n",
" <th>115</th>\n",
" <td>99</td>\n",
" <td>184</td>\n",
" <td>115</td>\n",
" </tr>\n",
" <tr>\n",
" <th>139</th>\n",
" <td>99</td>\n",
" <td>184</td>\n",
" <td>139</td>\n",
" </tr>\n",
" <tr>\n",
" <th>163</th>\n",
" <td>99</td>\n",
" <td>184</td>\n",
" <td>163</td>\n",
" </tr>\n",
" <tr>\n",
" <th>184</th>\n",
" <td>99</td>\n",
" <td>184</td>\n",
" <td>184</td>\n",
" </tr>\n",
" <tr>\n",
" <th>191</th>\n",
" <td>99</td>\n",
" <td>184</td>\n",
" <td>191</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>200000 rows × 3 columns</p>\n",
"</div>"
],
"text/plain": [
" r d s\n",
"r_id d_id s_id \n",
"0 0 30 0 0 30\n",
" 52 0 0 52\n",
" 94 0 0 94\n",
" 106 0 0 106\n",
" 108 0 0 108\n",
"... .. ... ...\n",
"99 184 115 99 184 115\n",
" 139 99 184 139\n",
" 163 99 184 163\n",
" 184 99 184 184\n",
" 191 99 184 191\n",
"\n",
"[200000 rows x 3 columns]"
]
},
"execution_count": 124,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Create a MultiIndex DataFrame\n",
"n_large_rows = 200_000\n",
"df = pd.DataFrame({\n",
" 'r': np.random.randint(0, 100, size=n_large_rows),\n",
" 'd': np.random.randint(0, 185, size=n_large_rows),\n",
" 's': np.random.randint(0, 200, size=n_large_rows)\n",
"})\n",
"df2 = df.set_index(['r','d','s'])\n",
"df2 = df2.rename_axis(index={'r': 'r_id', 'd': 'd_id','s':'s_id'})\n",
"\n",
"df = pd.DataFrame(df.values, columns=df.columns, index=df2.index)\n",
"df = df.sort_values(by=['r','d','s'])\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 128,
"id": "d4412080",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[[0, 0],\n",
" [0, 1],\n",
" [0, 2],\n",
" [0, 3],\n",
" [0, 4],\n",
" [0, 5],\n",
" [0, 6],\n",
" [0, 7],\n",
" [0, 8],\n",
" [0, 9],\n",
" [0, 10],\n",
" [0, 11],\n",
" [0, 12],\n",
" [0, 13],\n",
" [0, 14],\n",
" [0, 15],\n",
" [0, 16],\n",
" [0, 17],\n",
" [0, 18],\n",
" [0, 19]]"
]
},
"execution_count": 128,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# all unique (r,d). Note remove head(20) to get full list\n",
"df[['r','d']].drop_duplicates().head(20).values.tolist()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0e779616",
"metadata": {},
"outputs": [],
"source": [
"df[['r','d']]"
]
},
{
"cell_type": "code",
"execution_count": 132,
"id": "9ed1fb6d",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[(10, 11, 1),\n",
" (10, 11, 2),\n",
" (10, 11, 39),\n",
" (10, 11, 61),\n",
" (10, 11, 66),\n",
" (10, 11, 113),\n",
" (10, 11, 125),\n",
" (10, 11, 145),\n",
" (10, 11, 158),\n",
" (10, 11, 160),\n",
" (10, 11, 166),\n",
" (10, 11, 168),\n",
" (10, 12, 5),\n",
" (10, 12, 8),\n",
" (10, 12, 29),\n",
" (10, 12, 79),\n",
" (10, 12, 86),\n",
" (10, 12, 100),\n",
" (10, 12, 184),\n",
" (10, 13, 15)]"
]
},
"execution_count": 132,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# all (r,d,s) where r = 10 and d = 10 + delta where delta = {1,...,10} Note remove head(20) to get full list\n",
"list(df[(df.r == 10) & (df.d.between(10+1,10+10))].head(20).index)"
]
},
{
"cell_type": "code",
"execution_count": 200,
"id": "f342ea6a",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[(30, 178, 188), (30, 178, 191), (30, 178, 192), (30, 178, 194)]\n"
]
}
],
"source": [
"# Let's suppose requests 20 and 30 is the only target with an intra-night cadence request\n",
"# we'll need to loop over these requests, days, and slots For each (r,d,s) we'll need to query the \n",
"# set of (r,d,s) slots that are within s + delta, delta = {1,...,10}\n",
"df2 = df[df.r.isin([20,30])]\n",
"for idx, row in df2.iterrows():\n",
" cut = df2[(df2.r==row.r) & (df2.d==row.d) & df2.s.between(row.s+1, row.s+10)]\n",
" if len(cut) > 3:\n",
" cut2 = cut.copy()\n",
" \n",
"# print an example\n",
"print(list(cut2.index))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a731ec15",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.19"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment