wwyqianqian · February 18, 2025 10:01 · wwyqianqian · Feb 18, 2025
diff --git a/gistfile1.txt b/gistfile1.txt
 import pandas as pd
 import re
 import matplotlib.pyplot as plt
 import numpy as np
 from datetime import datetime
 import matplotlib.dates as mdates

 # Read Excel file
 file_path = "Functional Mailbox_20250212.xlsx"
 df = pd.read_excel(file_path, sheet_name='Functional Email')

 # Regex pattern for DD/MM/YYYY or D/M/YYYY format
 date_pattern = r"(\d{1,2})/(\d{1,2})/(\d{4})"

 # Dictionary to store completed cases per day
 completed_cases_per_day = {}

 # Iterate through each row
 for index, row in df.iterrows():
    case_status = str(row['Status']).strip().lower()


    # Only process "completed" cases
    if not case_status.startswith("completed"):
        continue
 

    # Handle date column
    if pd.isna(row['V2 Date']):
        continue

    date_str = str(row['V2 Date']).strip()
    case_date = None

    try:
        # Try parsing YYYY-MM-DD HH:MM:SS format
        case_date = datetime.strptime(date_str, "%Y-%m-%d %H:%M:%S").date()
    except ValueError:
        match = re.search(date_pattern, date_str)
        if match:
            day, month, year = match.groups()
            try:
                case_date = datetime.strptime(f"{day}/{month}/{year}", "%d/%m/%Y").date()
            except ValueError:
                print(f"Skipping invalid date at row {index}: {date_str}")
        else:
            print(f"⚠️ No valid date found at row {index}: {date_str}")

    # Count completed cases per day
    if case_date:
        completed_cases_per_day[case_date] = completed_cases_per_day.get(case_date, 0) + 1

 # ** Compute cumulative completed cases**
 sorted_dates = sorted(completed_cases_per_day.keys())
 cumulative_counts = np.cumsum([completed_cases_per_day[date] for date in sorted_dates])

 # ** Plot the cumulative bar chart**
 plt.figure(figsize=(14, 7))

 # ** Use gradient green colors**
 colors = plt.cm.Greens(np.linspace(0.4, 1, len(sorted_dates)))

 plt.bar(sorted_dates, cumulative_counts, color=colors, label="Cumulative Completed Cases")

 # ** Style the X and Y axes**
 plt.gca().spines['bottom'].set_color('#444')  # Darker bottom axis
 plt.gca().spines['left'].set_color('#444')  # Darker left axis
 plt.gca().xaxis.label.set_color('#333')  # Darker x-axis label
 plt.gca().yaxis.label.set_color('#333')  # Darker y-axis label
 plt.xticks(fontsize=10, color='#222')  # Darker x-axis ticks
 plt.yticks(fontsize=10, color='#222')  # Darker y-axis ticks

 # ** Adjust X-axis to show only the 1st and 15th of each month**
 plt.gca().xaxis.set_major_locator(mdates.DayLocator(bymonthday=[1, 15]))
 plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))

 plt.xticks(rotation=45)  # Rotate labels for better visibility

 # ** Add total completed cases inside the chart**
 total_completed_cases = cumulative_counts[-1] if cumulative_counts.size > 0 else 0
 plt.text(sorted_dates[-1], cumulative_counts[-1] + 20, f"Total: {total_completed_cases}", 
         ha='right', va='bottom', fontsize=12, fontweight="bold", color="black")

 # ** Label every 10th bar**
 for i in range(0, len(sorted_dates), 10):
    plt.text(sorted_dates[i], cumulative_counts[i], f"{cumulative_counts[i]}", 
             ha='center', va='bottom', fontsize=9, fontweight="bold", color="black")

 # ** Add title and labels**
 plt.xlabel("Date", fontsize=12, fontweight="bold")
 plt.ylabel("Cumulative Completed Cases", fontsize=12, fontweight="bold")
 plt.title("Cumulative Completed Cases Over Time", fontsize=14, fontweight="bold")
 plt.grid(True, linestyle="--", alpha=0.5)  # Improve readability
 plt.legend()

 # ** Show the chart**
 plt.show()

 # ** Debugging output remains unchanged**
 print(f"✅ Total completed cases: {total_completed_cases}")
 print(f"📅 Number of unique days with completed cases: {len(completed_cases_per_day)}")
	import pandas as pd
	import re
	import matplotlib.pyplot as plt
	import numpy as np
	from datetime import datetime
	import matplotlib.dates as mdates

	# Read Excel file
	file_path = "Functional Mailbox_20250212.xlsx"
	df = pd.read_excel(file_path, sheet_name='Functional Email')

	# Regex pattern for DD/MM/YYYY or D/M/YYYY format
	date_pattern = r"(\d{1,2})/(\d{1,2})/(\d{4})"

	# Dictionary to store completed cases per day
	completed_cases_per_day = {}

	# Iterate through each row
	for index, row in df.iterrows():
	case_status = str(row['Status']).strip().lower()


	# Only process "completed" cases
	if not case_status.startswith("completed"):
	continue


	# Handle date column
	if pd.isna(row['V2 Date']):
	continue

	date_str = str(row['V2 Date']).strip()
	case_date = None

	try:
	# Try parsing YYYY-MM-DD HH:MM:SS format
	case_date = datetime.strptime(date_str, "%Y-%m-%d %H:%M:%S").date()
	except ValueError:
	match = re.search(date_pattern, date_str)
	if match:
	day, month, year = match.groups()
	try:
	case_date = datetime.strptime(f"{day}/{month}/{year}", "%d/%m/%Y").date()
	except ValueError:
	print(f"Skipping invalid date at row {index}: {date_str}")
	else:
	print(f"⚠️ No valid date found at row {index}: {date_str}")

	# Count completed cases per day
	if case_date:
	completed_cases_per_day[case_date] = completed_cases_per_day.get(case_date, 0) + 1

	# Compute cumulative completed cases
	sorted_dates = sorted(completed_cases_per_day.keys())
	cumulative_counts = np.cumsum([completed_cases_per_day[date] for date in sorted_dates])

	# Plot the cumulative bar chart
	plt.figure(figsize=(14, 7))

	# Use gradient green colors
	colors = plt.cm.Greens(np.linspace(0.4, 1, len(sorted_dates)))

	plt.bar(sorted_dates, cumulative_counts, color=colors, label="Cumulative Completed Cases")

	# Style the X and Y axes
	plt.gca().spines['bottom'].set_color('#444') # Darker bottom axis
	plt.gca().spines['left'].set_color('#444') # Darker left axis
	plt.gca().xaxis.label.set_color('#333') # Darker x-axis label
	plt.gca().yaxis.label.set_color('#333') # Darker y-axis label
	plt.xticks(fontsize=10, color='#222') # Darker x-axis ticks
	plt.yticks(fontsize=10, color='#222') # Darker y-axis ticks

	# Adjust X-axis to show only the 1st and 15th of each month
	plt.gca().xaxis.set_major_locator(mdates.DayLocator(bymonthday=[1, 15]))
	plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))

	plt.xticks(rotation=45) # Rotate labels for better visibility

	# Add total completed cases inside the chart
	total_completed_cases = cumulative_counts[-1] if cumulative_counts.size > 0 else 0
	plt.text(sorted_dates[-1], cumulative_counts[-1] + 20, f"Total: {total_completed_cases}",
	ha='right', va='bottom', fontsize=12, fontweight="bold", color="black")

	# Label every 10th bar
	for i in range(0, len(sorted_dates), 10):
	plt.text(sorted_dates[i], cumulative_counts[i], f"{cumulative_counts[i]}",
	ha='center', va='bottom', fontsize=9, fontweight="bold", color="black")

	# Add title and labels
	plt.xlabel("Date", fontsize=12, fontweight="bold")
	plt.ylabel("Cumulative Completed Cases", fontsize=12, fontweight="bold")
	plt.title("Cumulative Completed Cases Over Time", fontsize=14, fontweight="bold")
	plt.grid(True, linestyle="--", alpha=0.5) # Improve readability
	plt.legend()

	# Show the chart
	plt.show()

	# Debugging output remains unchanged
	print(f"✅ Total completed cases: {total_completed_cases}")
	print(f"📅 Number of unique days with completed cases: {len(completed_cases_per_day)}")
No results found