Created
December 18, 2024 21:19
-
-
Save mshalaby/37d64fa8e6754a51f35bd08d4f13dcd3 to your computer and use it in GitHub Desktop.
A script to collect commit count for all organization repos within a certain time range
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/bash | |
| # Date helper functions | |
| get_year_dates() { | |
| local year=${1:-$(date +%Y)} | |
| local start="${year}-01-01T00:00:00Z" | |
| local end="${year}-12-31T23:59:59Z" | |
| echo "$start $end" | |
| } | |
| format_date() { | |
| local input_date="$1" | |
| local is_end_date="$2" | |
| if [[ "$input_date" =~ ^[0-9]{4}-[0-9]{2}-[0-9]{2}$ ]]; then | |
| if [ "$is_end_date" = "true" ]; then | |
| echo "${input_date}T23:59:59Z" | |
| else | |
| echo "${input_date}T00:00:00Z" | |
| fi | |
| else | |
| echo "$input_date" | |
| fi | |
| } | |
| # Set defaults | |
| read START_DATE END_DATE <<< "$(get_year_dates)" | |
| BRANCHES=("main" "master" "develop" "staging" "stage" "production" "live") | |
| SKIP_CACHE=false | |
| usage() { | |
| cat << EOF | |
| Usage: $(basename "$0") -o ORG_NAME [-s START_DATE] [-e END_DATE] [-b "branch1 branch2 ..."] | |
| Options: | |
| -o GitHub organization name (required) | |
| -s Start date (default: ${START_DATE}) | |
| -e End date (default: ${END_DATE}) | |
| -b Space-separated list of branches (default: ${BRANCHES[*]}) | |
| -f Force skip cache | |
| -h Show this help message | |
| Dates can be specified in YYYY-MM-DD format or full ISO-8601 format | |
| EOF | |
| exit 1 | |
| } | |
| # Parse arguments | |
| while getopts "o:s:e:b:fh" opt; do | |
| case $opt in | |
| o) ORG_NAME="$OPTARG" ;; | |
| s) START_DATE=$(format_date "$OPTARG" false) ;; | |
| e) END_DATE=$(format_date "$OPTARG" true) ;; | |
| b) IFS=' ' read -ra BRANCHES <<< "$OPTARG" ;; | |
| f) SKIP_CACHE=true ;; | |
| h) usage ;; | |
| ?) usage ;; | |
| esac | |
| done | |
| # Validate required parameters | |
| if [ -z "$ORG_NAME" ]; then | |
| log "Error: Organization name (-o) is required" | |
| usage | |
| fi | |
| # Setup logging | |
| mkdir -p logs | |
| exec 1> >(tee "logs/commit_count_$(date +%Y%m%d_%H%M%S).log") | |
| exec 2>&1 | |
| TOTAL_COMMITS=0 | |
| BATCH_COMMITS=0 | |
| # Cache settings | |
| CACHE_DIR="./cache" | |
| CACHE_TTL=1800 # 30 minutes in seconds | |
| declare -A REPO_COMMITS | |
| # Logging function | |
| log() { | |
| echo "logs: $1" >&2 | |
| # printf '[%s] %s\n' "$(date '+%F %T')" "$1" >&2 | |
| } | |
| # Add cache functions | |
| init_cache() { | |
| mkdir -p "$CACHE_DIR" | |
| } | |
| get_cache_key() { | |
| local query="$1" | |
| echo "$query" | md5sum | cut -d' ' -f1 | |
| } | |
| write_cache() { | |
| local key="$1" | |
| local data="$2" | |
| local timestamp=$(date +%s) | |
| echo "{\"timestamp\":$timestamp,\"data\":$data}" > "$CACHE_DIR/$key" | |
| } | |
| read_cache() { | |
| local key="$1" | |
| local cache_file="$CACHE_DIR/$key" | |
| local now=$(date +%s) | |
| if [ -f "$cache_file" ]; then | |
| local cache_data=$(cat "$cache_file") | |
| local timestamp=$(echo "$cache_data" | jq -r '.timestamp') | |
| local age=$((now - timestamp)) | |
| if [ $age -lt $CACHE_TTL ]; then | |
| echo "$cache_data" | jq -r '.data' | |
| return 0 | |
| fi | |
| fi | |
| return 1 | |
| } | |
| # GraphQL query execution | |
| execute_query() { | |
| local query="$1" | |
| local cache_key=$(get_cache_key "$query") | |
| local cached_data | |
| if [ "$SKIP_CACHE" = true ]; then | |
| log "Forced skip cache, fetching from API..." | |
| else | |
| if cached_data=$(read_cache "$cache_key"); then | |
| log "Using cached data for query" | |
| echo "$cached_data" | |
| return 0 | |
| fi | |
| log "Cache miss, fetching from API..." | |
| fi | |
| sleep 1 | |
| local response=$(gh api -X POST -H "Accept: application/vnd.github+json" \ | |
| -H "X-GitHub-Api-Version: 2022-11-28" \ | |
| graphql -f query="$query") | |
| write_cache "$cache_key" "$response" | |
| echo "$response" | |
| } | |
| fetch_and_process_repos() { | |
| local all_commits=0 | |
| local has_next_page="true" | |
| local cursor="" | |
| local batch_number=1 | |
| init_cache | |
| while [ "$has_next_page" = "true" ]; do | |
| log "Processing batch #${batch_number}..." | |
| # Build cursor argument | |
| local cursor_arg="" | |
| if [ -n "$cursor" ]; then | |
| cursor_arg=", after: \"$cursor\"" | |
| fi | |
| # Build branch fragments | |
| local branch_queries="" | |
| for branch in "${BRANCHES[@]}"; do | |
| branch_queries+="$(echo -n " | |
| branch_${branch}: ref(qualifiedName: \"${branch}\") { | |
| target { | |
| ... on Commit { | |
| history(since: \"$START_DATE\", until: \"$END_DATE\") { | |
| totalCount | |
| } | |
| } | |
| } | |
| }")" | |
| done | |
| local query=$(cat <<EOF | |
| { | |
| organization(login: "$ORG_NAME") { | |
| repositories(first: 50${cursor_arg}) { | |
| pageInfo { | |
| hasNextPage | |
| endCursor | |
| } | |
| nodes { | |
| name | |
| $branch_queries | |
| } | |
| } | |
| } | |
| } | |
| EOF | |
| ) | |
| # Execute query | |
| sleep 1 | |
| log "Fetching batch of repositories..." | |
| local batch_data=$(execute_query "$query") | |
| # Process this batch immediately | |
| BATCH_COMMITS=0 | |
| process_batch "$batch_data" | |
| local batch_commits=BATCH_COMMITS | |
| all_commits=$((all_commits + batch_commits)) | |
| # Update cursor and has_next_page for next iteration | |
| has_next_page=$(echo "$batch_data" | jq -r '.data.organization.repositories.pageInfo.hasNextPage') | |
| cursor=$(echo "$batch_data" | jq -r '.data.organization.repositories.pageInfo.endCursor') | |
| log "Batch #${batch_number} complete. Running total commits: $all_commits" | |
| batch_number=$((batch_number + 1)) | |
| # has_next_page="false" # For testing, remove this line when | |
| done | |
| TOTAL_COMMITS=$all_commits | |
| } | |
| process_batch() { | |
| local batch_data="$1" | |
| local batch_commits=0 | |
| # Extract repositories from this batch | |
| local repos=$(echo "$batch_data" | jq -r '.data.organization.repositories.nodes[].name') | |
| local batch_size=$(echo "$repos" | wc -w) | |
| log "Processing $batch_size repositories in this batch" | |
| for repo in $repos; do | |
| local max_commits=0 | |
| local max_branch="" | |
| # log "Checking commits across branches for $repo:" | |
| # Check each branch and find the one with maximum commits | |
| for branch in "${BRANCHES[@]}"; do | |
| local commits=$(echo "$batch_data" | jq -r --arg repo "$repo" --arg branch "$branch" \ | |
| '.data.organization.repositories.nodes[] | | |
| select(.name == $repo) | | |
| .["branch_" + $branch].target.history.totalCount // 0') | |
| if [[ "$commits" =~ ^[0-9]+$ ]]; then | |
| # log " - $branch: $commits commits" | |
| if ((commits > max_commits)); then | |
| max_commits=$commits | |
| max_branch=$branch | |
| fi | |
| fi | |
| done | |
| if [ $max_commits -gt 0 ]; then | |
| batch_commits=$((batch_commits + max_commits)) | |
| REPO_COMMITS["${repo}"]="${max_commits}:${max_branch}" | |
| log "→ Selected '$max_branch' with $max_commits commits from [$repo]" | |
| else | |
| log "→ No commits found in any branch for [$repo]" | |
| fi | |
| done | |
| BATCH_COMMITS=$batch_commits | |
| } | |
| # Main execution | |
| log "Script started - fetching repositories for $ORG_NAME" | |
| fetch_and_process_repos | |
| # Add debug logging before summary section | |
| # log "Debug: Number of repos with commits: ${#REPO_COMMITS[@]}" | |
| # log "Debug: Repo array contents:" | |
| # for key in "${!REPO_COMMITS[@]}"; do | |
| # log "Debug: $key -> ${REPO_COMMITS[$key]}" | |
| # done | |
| # Final summary | |
| REPORTS_DIR="reports" | |
| CSV_FILE="${REPORTS_DIR}/commit_summary_$(date +%Y%m%d_%H%M%S).csv" | |
| mkdir -p "$REPORTS_DIR" | |
| echo "Repository,Commits,Branch" > "$CSV_FILE" | |
| log "----------------------------------------" | |
| log "Summary:" | |
| log "Organization: $ORG_NAME" | |
| log "Date Range: $START_DATE to $END_DATE" | |
| log "Branches checked: ${BRANCHES[*]}" | |
| log "Total Commits: $TOTAL_COMMITS" | |
| log "" | |
| log "Repositories with commits:" | |
| # Sort repos by commit count (highest first) and display | |
| if [ ${#REPO_COMMITS[@]} -eq 0 ]; then | |
| log " No repositories with commits found" | |
| else | |
| ( | |
| for repo in "${!REPO_COMMITS[@]}"; do | |
| IFS=: read -r count branch <<< "${REPO_COMMITS[$repo]}" | |
| echo "$count $repo $branch" | |
| done | |
| ) | sort -rn | while read -r count repo branch; do | |
| log " - $(printf "%-40s %10d commits (branch: %s)" "$repo" "$count" "$branch")" | |
| echo "\"${repo}\",${count},\"${branch}\"" >> "$CSV_FILE" | |
| done | |
| log "CSV report saved to: $CSV_FILE" | |
| fi | |
| log "----------------------------------------" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment