Skip to content

Instantly share code, notes, and snippets.

@wwyqianqian
Created February 18, 2025 10:01
Show Gist options
  • Select an option

  • Save wwyqianqian/c3c986ed0f909a38e670a5f1ee218a36 to your computer and use it in GitHub Desktop.

Select an option

Save wwyqianqian/c3c986ed0f909a38e670a5f1ee218a36 to your computer and use it in GitHub Desktop.
import pandas as pd
import re
import matplotlib.pyplot as plt
import numpy as np
from datetime import datetime
import matplotlib.dates as mdates
# Read Excel file
file_path = "Functional Mailbox_20250212.xlsx"
df = pd.read_excel(file_path, sheet_name='Functional Email')
# Regex pattern for DD/MM/YYYY or D/M/YYYY format
date_pattern = r"(\d{1,2})/(\d{1,2})/(\d{4})"
# Dictionary to store completed cases per day
completed_cases_per_day = {}
# Iterate through each row
for index, row in df.iterrows():
case_status = str(row['Status']).strip().lower()
# Only process "completed" cases
if not case_status.startswith("completed"):
continue
# Handle date column
if pd.isna(row['V2 Date']):
continue
date_str = str(row['V2 Date']).strip()
case_date = None
try:
# Try parsing YYYY-MM-DD HH:MM:SS format
case_date = datetime.strptime(date_str, "%Y-%m-%d %H:%M:%S").date()
except ValueError:
match = re.search(date_pattern, date_str)
if match:
day, month, year = match.groups()
try:
case_date = datetime.strptime(f"{day}/{month}/{year}", "%d/%m/%Y").date()
except ValueError:
print(f"Skipping invalid date at row {index}: {date_str}")
else:
print(f"⚠️ No valid date found at row {index}: {date_str}")
# Count completed cases per day
if case_date:
completed_cases_per_day[case_date] = completed_cases_per_day.get(case_date, 0) + 1
# ** Compute cumulative completed cases**
sorted_dates = sorted(completed_cases_per_day.keys())
cumulative_counts = np.cumsum([completed_cases_per_day[date] for date in sorted_dates])
# ** Plot the cumulative bar chart**
plt.figure(figsize=(14, 7))
# ** Use gradient green colors**
colors = plt.cm.Greens(np.linspace(0.4, 1, len(sorted_dates)))
plt.bar(sorted_dates, cumulative_counts, color=colors, label="Cumulative Completed Cases")
# ** Style the X and Y axes**
plt.gca().spines['bottom'].set_color('#444') # Darker bottom axis
plt.gca().spines['left'].set_color('#444') # Darker left axis
plt.gca().xaxis.label.set_color('#333') # Darker x-axis label
plt.gca().yaxis.label.set_color('#333') # Darker y-axis label
plt.xticks(fontsize=10, color='#222') # Darker x-axis ticks
plt.yticks(fontsize=10, color='#222') # Darker y-axis ticks
# ** Adjust X-axis to show only the 1st and 15th of each month**
plt.gca().xaxis.set_major_locator(mdates.DayLocator(bymonthday=[1, 15]))
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
plt.xticks(rotation=45) # Rotate labels for better visibility
# ** Add total completed cases inside the chart**
total_completed_cases = cumulative_counts[-1] if cumulative_counts.size > 0 else 0
plt.text(sorted_dates[-1], cumulative_counts[-1] + 20, f"Total: {total_completed_cases}",
ha='right', va='bottom', fontsize=12, fontweight="bold", color="black")
# ** Label every 10th bar**
for i in range(0, len(sorted_dates), 10):
plt.text(sorted_dates[i], cumulative_counts[i], f"{cumulative_counts[i]}",
ha='center', va='bottom', fontsize=9, fontweight="bold", color="black")
# ** Add title and labels**
plt.xlabel("Date", fontsize=12, fontweight="bold")
plt.ylabel("Cumulative Completed Cases", fontsize=12, fontweight="bold")
plt.title("Cumulative Completed Cases Over Time", fontsize=14, fontweight="bold")
plt.grid(True, linestyle="--", alpha=0.5) # Improve readability
plt.legend()
# ** Show the chart**
plt.show()
# ** Debugging output remains unchanged**
print(f"✅ Total completed cases: {total_completed_cases}")
print(f"📅 Number of unique days with completed cases: {len(completed_cases_per_day)}")
@wwyqianqian
Copy link
Author

wwyqianqian commented Feb 18, 2025

import pandas as pd
import re
import matplotlib.pyplot as plt
import numpy as np
from datetime import datetime
import matplotlib.dates as mdates

# Read Excel file
file_path = "Functional Mailbox_20250212.xlsx"
df = pd.read_excel(file_path, sheet_name='Functional Email')

# Regex pattern for DD/MM/YYYY or D/M/YYYY format
date_pattern = r"(\d{1,2})/(\d{1,2})/(\d{4})"

# Dictionary to store completed cases per day
completed_cases_per_day = {}

# Iterate through each row
for index, row in df.iterrows():
    case_status = str(row['Status']).strip().lower()


    # Only process "completed" cases
    if not case_status.startswith("completed"):
        continue
 

    # Handle date column
    if pd.isna(row['V2 Date']):
        continue

    date_str = str(row['V2 Date']).strip()
    case_date = None

    try:
        # Try parsing YYYY-MM-DD HH:MM:SS format
        case_date = datetime.strptime(date_str, "%Y-%m-%d %H:%M:%S").date()
        
        
    except ValueError:
        match = re.search(date_pattern, date_str)
        if match:
            day, month, year = match.groups()
            try:
                case_date = datetime.strptime(f"{day}/{month}/{year}", "%d/%m/%Y").date()
            except ValueError:
                print(f"Skipping invalid date at row {index}: {date_str}")
        else:
            print(f"⚠️ No valid date found at row {index}: {date_str}")

    # Count completed cases per day
    if case_date:
        completed_cases_per_day[case_date] = completed_cases_per_day.get(case_date, 0) + 1




# ** 計算累積案件數,但 X 軸僅顯示 2025-01-01 之後的日期 **
start_date = datetime(2025, 1, 1).date()

# ** 計算完整累積數據(不篩選日期)**
sorted_dates = sorted(completed_cases_per_day.keys())
cumulative_counts = np.cumsum([completed_cases_per_day[date] for date in sorted_dates])


# ** 確定 2025-01-01 之前的累積案件數 **
index = np.searchsorted(sorted_dates, start_date)
pre_2025_total = cumulative_counts[index - 1] if index > 0 else 0
print(f"📊 Cases completed before {start_date}: {pre_2025_total}")


# ** 過濾出 2025-01-01 及之後的數據 **
filtered_dates = [date for date in sorted_dates if date >= start_date]
print(filtered_dates)
filtered_cumulative_counts = cumulative_counts[len(sorted_dates) - len(filtered_dates):] 
print(filtered_cumulative_counts)

# ** 重新繪製累積條形圖 **
plt.figure(figsize=(14, 7))
colors = plt.cm.Greens(np.linspace(0.4, 1, len(filtered_dates)))
plt.bar(filtered_dates, filtered_cumulative_counts, color=colors, label="Cumulative Completed Cases")


# ** 設置 X 軸只顯示 2025-01-01 之後的日期,並且每天顯示一個標籤 **
plt.gca().xaxis.set_major_locator(mdates.DayLocator(interval=1))  # 每一天顯示
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))  # 格式化為 YYYY-MM-DD
plt.xlim(start_date, filtered_dates[-1])  # 限制 X 軸範圍


# ** 旋轉 X 軸標籤以避免擁擠 **
plt.xticks(rotation=45, fontsize=8, color='#222')

# ** 其他 UI 設置保持不變 **
plt.gca().spines['bottom'].set_color('#444')  
plt.gca().spines['left'].set_color('#444')  
plt.gca().xaxis.label.set_color('#333')  
plt.gca().yaxis.label.set_color('#333')  
plt.yticks(fontsize=10, color='#222')

# ** 顯示總計完成案件數 **
total_completed_cases = filtered_cumulative_counts[-1] if filtered_cumulative_counts.size > 0 else 0
plt.text(filtered_dates[-1], filtered_cumulative_counts[-1] + 20, f"Total: {total_completed_cases}", 
         ha='right', va='bottom', fontsize=12, fontweight="bold", color="black")

# ** 添加標籤 (每 1 天標記一次數值)**
for i in range(0, len(filtered_dates), 1):
    plt.text(filtered_dates[i], filtered_cumulative_counts[i], f"{filtered_cumulative_counts[i]}", 
             ha='center', va='bottom', fontsize=9, fontweight="bold", color="black")

# ** 添加標題與標籤 **
plt.xlabel("Date", fontsize=12, fontweight="bold")
plt.ylabel("Cumulative Completed Cases", fontsize=12, fontweight="bold")
plt.title("Cumulative Completed Cases Over Time (From 2025-01-01)", fontsize=14, fontweight="bold")
plt.grid(True, linestyle="--", alpha=0.5)
plt.legend()

# ** 顯示圖表 **
plt.show()



Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment