Created
November 3, 2025 08:34
-
-
Save Lkruitwagen/4391dd25e3991099fafb73014817b55e to your computer and use it in GitHub Desktop.
pure python vs pandas
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| """ | |
| A comparison between a pure Python implementation and a Pandas-based implementation | |
| to find the max profit from buying and selling at different times. | |
| >>> n_prices= 100 | pure python: 0.0006s | pandas: 0.0088s | |
| >>> n_prices= 1000 | pure python: 0.0623s | pandas: 0.3232s | |
| >>> n_prices= 10000 | pure python: 6.5082s | pandas: 40.0255s | |
| Constructing our pd.DataFrames and using the vectorised C bindings does not improve iteration time! | |
| """ | |
| import timeit | |
| import pandas as pd | |
| import numpy as np | |
| def max_profit_python(prices: list[float]) -> tuple[int,int,float]: | |
| # O(n^2) | |
| max_profit = 0.0 | |
| best_charge_time = 0 | |
| best_discharge_time = 0 | |
| for i in range(len(prices)): | |
| for j in range(i, len(prices)): | |
| profit = prices[j] - prices[i] | |
| if profit > max_profit: | |
| max_profit = profit | |
| best_charge_time = i | |
| best_discharge_time = j | |
| return best_charge_time, best_discharge_time, max_profit | |
| def max_profit_pd(prices: list[float]) -> tuple[int,int,float]: | |
| profit = ( | |
| pd.DataFrame( | |
| index=pd.Index(range(len(prices)), name="charge_time"), | |
| data = [prices]*len(prices), | |
| columns=pd.Index(range(len(prices)), name="discharge_time") | |
| ) - | |
| pd.DataFrame( | |
| index=pd.Index(range(len(prices)), name="discharge_time"), | |
| data = [prices]*len(prices), | |
| columns=pd.Index(range(len(prices)), name="charge_time") | |
| ).T | |
| ) | |
| valid_times = np.triu(np.ones((len(prices), len(prices))), k=0) | |
| valid_times[valid_times<1] = np.nan | |
| profit *= valid_times | |
| charge_time, discharge_time = profit.stack().idxmax() | |
| return charge_time, discharge_time, profit.at[charge_time, discharge_time] | |
| if __name__ == "__main__": | |
| for n_prices in [100, 1000, 10000]: | |
| python_time = timeit.timeit(lambda: max_profit_python(np.random.rand(n_prices).tolist()), number=5) | |
| pd_time = timeit.timeit(lambda: max_profit_pd(np.random.rand(n_prices).tolist()), number=5) | |
| print(f"{n_prices=:6d} | pure python: {python_time:.4f}s | pandas: {pd_time:.4f}s") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment