{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Benchmark CopyFromBuffer" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import geopandas\n", "import pygeos\n", "import pyogrio\n", "\n", "import pyproj\n", "pyproj.datadir.set_data_dir(\"/home/joris/miniconda3/envs/geo-dev/share/proj/\")" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "df = geopandas.read_file(geopandas.datasets.get_path('naturalearth_lowres'))" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "# arr = pygeos.from_shapely(df.geometry.array.data)\n", "arr = df.geometry.array.data" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Get flat array of rings:" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "arr_rings = pygeos.get_rings(pygeos.get_parts(arr))" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "coords = pygeos.get_coordinates(arr_rings)\n", "offsets = np.insert(np.cumsum(pygeos.get_num_coordinates(arr_rings)*2), 0, 0)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Recreate the linearrings:" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "ring_lengths = np.diff((offsets / 2).astype(int))\n", "ring_indices = np.repeat(np.arange(len(ring_lengths)), ring_lengths)\n", "rings = pygeos.linearrings(coords, indices=ring_indices)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pygeos.equals(arr_rings, rings).all()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Test performance on larger dataset" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
GEOID10geometry
043451POLYGON ((-83.70873 41.32733, -83.70815 41.327...
143452POLYGON ((-83.08698 41.53780, -83.08256 41.537...
243456MULTIPOLYGON (((-82.83558 41.71082, -82.83515 ...
343457POLYGON ((-83.49650 41.25371, -83.48382 41.253...
443458POLYGON ((-83.22229 41.53102, -83.22228 41.532...
.........
3313984044POLYGON ((-112.26022 40.76909, -112.25333 40.7...
3314084045MULTIPOLYGON (((-111.92421 40.17034, -111.9240...
3314184046POLYGON ((-110.00072 40.99745, -110.00036 40.9...
3314284047POLYGON ((-111.92141 40.62772, -111.92134 40.6...
3314384049POLYGON ((-111.59394 40.57707, -111.59386 40.5...
\n", "

33144 rows × 2 columns

\n", "
" ], "text/plain": [ " GEOID10 geometry\n", "0 43451 POLYGON ((-83.70873 41.32733, -83.70815 41.327...\n", "1 43452 POLYGON ((-83.08698 41.53780, -83.08256 41.537...\n", "2 43456 MULTIPOLYGON (((-82.83558 41.71082, -82.83515 ...\n", "3 43457 POLYGON ((-83.49650 41.25371, -83.48382 41.253...\n", "4 43458 POLYGON ((-83.22229 41.53102, -83.22228 41.532...\n", "... ... ...\n", "33139 84044 POLYGON ((-112.26022 40.76909, -112.25333 40.7...\n", "33140 84045 MULTIPOLYGON (((-111.92421 40.17034, -111.9240...\n", "33141 84046 POLYGON ((-110.00072 40.99745, -110.00036 40.9...\n", "33142 84047 POLYGON ((-111.92141 40.62772, -111.92134 40.6...\n", "33143 84049 POLYGON ((-111.59394 40.57707, -111.59386 40.5...\n", "\n", "[33144 rows x 2 columns]" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_tl = pyogrio.read_dataframe(\"benchmark-data/tl_2019_us_zcta510/tl_2019_us_zcta510.shp\", columns=[\"GEOID10\"])\n", "df_tl" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "arr_tl = df_tl.geometry.array.data" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "33144" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(arr_tl)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Get flat array of rings:" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "arr_rings = pygeos.get_rings(pygeos.get_parts(arr_tl))" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "coords = pygeos.get_coordinates(arr_rings)\n", "offsets = np.insert(np.cumsum(pygeos.get_num_coordinates(arr_rings)*2), 0, 0)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Recreate the linearrings:" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "ring_lengths = np.diff((offsets / 2).astype(int))\n", "ring_indices = np.repeat(np.arange(len(ring_lengths)), ring_lengths)\n", "rings = pygeos.linearrings(coords, indices=ring_indices)" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pygeos.equals_exact(arr_rings, rings).all()" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [], "source": [ "np.savez(\"pygeos-benchmark-linearrings.npz\", offsets=offsets, coords=coords)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**Only testing performance**:" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pygeos" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "with np.load(\"pygeos-benchmark-linearrings.npz\") as data:\n", " offsets = data[\"offsets\"]\n", " coords = data[\"coords\"]" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "ring_lengths = np.diff((offsets / 2).astype(int))\n", "ring_indices = np.repeat(np.arange(len(ring_lengths)), ring_lengths)\n", "rings = pygeos.linearrings(coords, indices=ring_indices)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Using pygeos master:" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1.13 s ± 6.86 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" ] } ], "source": [ "%timeit pygeos.linearrings(coords, indices=ring_indices)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Using PR with CopyFromBuffer:" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "558 ms ± 2.35 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" ] } ], "source": [ "%timeit pygeos.linearrings(coords, indices=ring_indices)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python (geo-dev)", "language": "python", "name": "geo-dev" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.7" } }, "nbformat": 4, "nbformat_minor": 4 }