jim-my · March 1, 2025 02:43
diff --git a/pandas-discrete_stats.py b/pandas-discrete_stats.py
 import pandas as pd
 from typing import Any, Optional

 def discrete_median(series: pd.Series) -> Optional[Any]:
    """Compute the discrete median for any sortable Series (numeric, string, datetime, categorical).
    
    - For numbers, returns the middle value (without interpolation).
    - For strings, returns the lexicographically middle element.
    - For datetime, returns the middle timestamp.
    - For categorical, uses category order if available.
    
    Args:
        series (pd.Series): The input pandas Series.
    
    Returns:
        The discrete median value or None if empty.
    """
    if series.empty:
        return None  # Handle empty case

    sorted_values = series.dropna().sort_values().reset_index(drop=True)
    median_index = len(sorted_values) // 2
    return sorted_values.iloc[median_index]

 def discrete_quantile(series: pd.Series, q: float = 0.5) -> Optional[Any]:
    """Compute the discrete quantile for any sortable Series (numeric, string, datetime, categorical).
    
    - Unlike standard quantile, this picks the closest rank without interpolation.
    
    Args:
        series (pd.Series): The input pandas Series.
        q (float): The quantile to compute (between 0 and 1).
    
    Returns:
        The discrete quantile value or None if empty.
    """
    if series.empty or not (0 <= q <= 1):
        return None  # Handle invalid cases

    sorted_values = series.dropna().sort_values().reset_index(drop=True)
    index = int(q * (len(sorted_values) - 1))  # Exact position without interpolation
    return sorted_values.iloc[index]

 # Example DataFrame with mixed types
 df = pd.DataFrame({
    "numbers": [10, 20, 30, 40, 50],
    "strings": ["apple", "banana", "cherry", "date", "elderberry"],
    "dates": pd.to_datetime(["2023-01-01", "2023-02-01", "2023-03-01", "2023-04-01", "2023-05-01"]),
    "categories": pd.Categorical(["low", "medium", "high", "medium", "low"], categories=["low", "medium", "high"], ordered=True),
 })

 # Test with different sortable types
 print(discrete_median(df["numbers"]))      # Output: 30
 print(discrete_median(df["strings"]))      # Output: "cherry"
 print(discrete_median(df["dates"]))        # Output: 2023-03-01 00:00:00
 print(discrete_median(df["categories"]))   # Output: "medium"

 print(discrete_quantile(df["numbers"], 0.25))  # Output: 20
 print(discrete_quantile(df["strings"], 0.75))  # Output: "date"
 print(discrete_quantile(df["dates"], 0.5))     # Output: 2023-03-01 00:00:00
 print(discrete_quantile(df["categories"], 0.25))  # Output: "low"
	import pandas as pd
	from typing import Any, Optional

	def discrete_median(series: pd.Series) -> Optional[Any]:
	"""Compute the discrete median for any sortable Series (numeric, string, datetime, categorical).

	- For numbers, returns the middle value (without interpolation).
	- For strings, returns the lexicographically middle element.
	- For datetime, returns the middle timestamp.
	- For categorical, uses category order if available.

	Args:
	series (pd.Series): The input pandas Series.

	Returns:
	The discrete median value or None if empty.
	"""
	if series.empty:
	return None # Handle empty case

	sorted_values = series.dropna().sort_values().reset_index(drop=True)
	median_index = len(sorted_values) // 2
	return sorted_values.iloc[median_index]

	def discrete_quantile(series: pd.Series, q: float = 0.5) -> Optional[Any]:
	"""Compute the discrete quantile for any sortable Series (numeric, string, datetime, categorical).

	- Unlike standard quantile, this picks the closest rank without interpolation.

	Args:
	series (pd.Series): The input pandas Series.
	q (float): The quantile to compute (between 0 and 1).

	Returns:
	The discrete quantile value or None if empty.
	"""
	if series.empty or not (0 <= q <= 1):
	return None # Handle invalid cases

	sorted_values = series.dropna().sort_values().reset_index(drop=True)
	index = int(q * (len(sorted_values) - 1)) # Exact position without interpolation
	return sorted_values.iloc[index]

	# Example DataFrame with mixed types
	df = pd.DataFrame({
	"numbers": [10, 20, 30, 40, 50],
	"strings": ["apple", "banana", "cherry", "date", "elderberry"],
	"dates": pd.to_datetime(["2023-01-01", "2023-02-01", "2023-03-01", "2023-04-01", "2023-05-01"]),
	"categories": pd.Categorical(["low", "medium", "high", "medium", "low"], categories=["low", "medium", "high"], ordered=True),
	})

	# Test with different sortable types
	print(discrete_median(df["numbers"])) # Output: 30
	print(discrete_median(df["strings"])) # Output: "cherry"
	print(discrete_median(df["dates"])) # Output: 2023-03-01 00:00:00
	print(discrete_median(df["categories"])) # Output: "medium"

	print(discrete_quantile(df["numbers"], 0.25)) # Output: 20
	print(discrete_quantile(df["strings"], 0.75)) # Output: "date"
	print(discrete_quantile(df["dates"], 0.5)) # Output: 2023-03-01 00:00:00
	print(discrete_quantile(df["categories"], 0.25)) # Output: "low"
No results found