Created
February 18, 2025 10:01
-
-
Save wwyqianqian/c3c986ed0f909a38e670a5f1ee218a36 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import pandas as pd | |
| import re | |
| import matplotlib.pyplot as plt | |
| import numpy as np | |
| from datetime import datetime | |
| import matplotlib.dates as mdates | |
| # Read Excel file | |
| file_path = "Functional Mailbox_20250212.xlsx" | |
| df = pd.read_excel(file_path, sheet_name='Functional Email') | |
| # Regex pattern for DD/MM/YYYY or D/M/YYYY format | |
| date_pattern = r"(\d{1,2})/(\d{1,2})/(\d{4})" | |
| # Dictionary to store completed cases per day | |
| completed_cases_per_day = {} | |
| # Iterate through each row | |
| for index, row in df.iterrows(): | |
| case_status = str(row['Status']).strip().lower() | |
| # Only process "completed" cases | |
| if not case_status.startswith("completed"): | |
| continue | |
| # Handle date column | |
| if pd.isna(row['V2 Date']): | |
| continue | |
| date_str = str(row['V2 Date']).strip() | |
| case_date = None | |
| try: | |
| # Try parsing YYYY-MM-DD HH:MM:SS format | |
| case_date = datetime.strptime(date_str, "%Y-%m-%d %H:%M:%S").date() | |
| except ValueError: | |
| match = re.search(date_pattern, date_str) | |
| if match: | |
| day, month, year = match.groups() | |
| try: | |
| case_date = datetime.strptime(f"{day}/{month}/{year}", "%d/%m/%Y").date() | |
| except ValueError: | |
| print(f"Skipping invalid date at row {index}: {date_str}") | |
| else: | |
| print(f"⚠️ No valid date found at row {index}: {date_str}") | |
| # Count completed cases per day | |
| if case_date: | |
| completed_cases_per_day[case_date] = completed_cases_per_day.get(case_date, 0) + 1 | |
| # ** Compute cumulative completed cases** | |
| sorted_dates = sorted(completed_cases_per_day.keys()) | |
| cumulative_counts = np.cumsum([completed_cases_per_day[date] for date in sorted_dates]) | |
| # ** Plot the cumulative bar chart** | |
| plt.figure(figsize=(14, 7)) | |
| # ** Use gradient green colors** | |
| colors = plt.cm.Greens(np.linspace(0.4, 1, len(sorted_dates))) | |
| plt.bar(sorted_dates, cumulative_counts, color=colors, label="Cumulative Completed Cases") | |
| # ** Style the X and Y axes** | |
| plt.gca().spines['bottom'].set_color('#444') # Darker bottom axis | |
| plt.gca().spines['left'].set_color('#444') # Darker left axis | |
| plt.gca().xaxis.label.set_color('#333') # Darker x-axis label | |
| plt.gca().yaxis.label.set_color('#333') # Darker y-axis label | |
| plt.xticks(fontsize=10, color='#222') # Darker x-axis ticks | |
| plt.yticks(fontsize=10, color='#222') # Darker y-axis ticks | |
| # ** Adjust X-axis to show only the 1st and 15th of each month** | |
| plt.gca().xaxis.set_major_locator(mdates.DayLocator(bymonthday=[1, 15])) | |
| plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d')) | |
| plt.xticks(rotation=45) # Rotate labels for better visibility | |
| # ** Add total completed cases inside the chart** | |
| total_completed_cases = cumulative_counts[-1] if cumulative_counts.size > 0 else 0 | |
| plt.text(sorted_dates[-1], cumulative_counts[-1] + 20, f"Total: {total_completed_cases}", | |
| ha='right', va='bottom', fontsize=12, fontweight="bold", color="black") | |
| # ** Label every 10th bar** | |
| for i in range(0, len(sorted_dates), 10): | |
| plt.text(sorted_dates[i], cumulative_counts[i], f"{cumulative_counts[i]}", | |
| ha='center', va='bottom', fontsize=9, fontweight="bold", color="black") | |
| # ** Add title and labels** | |
| plt.xlabel("Date", fontsize=12, fontweight="bold") | |
| plt.ylabel("Cumulative Completed Cases", fontsize=12, fontweight="bold") | |
| plt.title("Cumulative Completed Cases Over Time", fontsize=14, fontweight="bold") | |
| plt.grid(True, linestyle="--", alpha=0.5) # Improve readability | |
| plt.legend() | |
| # ** Show the chart** | |
| plt.show() | |
| # ** Debugging output remains unchanged** | |
| print(f"✅ Total completed cases: {total_completed_cases}") | |
| print(f"📅 Number of unique days with completed cases: {len(completed_cases_per_day)}") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Uh oh!
There was an error while loading. Please reload this page.