Skip to content

Instantly share code, notes, and snippets.

@mshalaby
Created December 18, 2024 21:19
Show Gist options
  • Select an option

  • Save mshalaby/37d64fa8e6754a51f35bd08d4f13dcd3 to your computer and use it in GitHub Desktop.

Select an option

Save mshalaby/37d64fa8e6754a51f35bd08d4f13dcd3 to your computer and use it in GitHub Desktop.
A script to collect commit count for all organization repos within a certain time range
#!/bin/bash
# Date helper functions
get_year_dates() {
local year=${1:-$(date +%Y)}
local start="${year}-01-01T00:00:00Z"
local end="${year}-12-31T23:59:59Z"
echo "$start $end"
}
format_date() {
local input_date="$1"
local is_end_date="$2"
if [[ "$input_date" =~ ^[0-9]{4}-[0-9]{2}-[0-9]{2}$ ]]; then
if [ "$is_end_date" = "true" ]; then
echo "${input_date}T23:59:59Z"
else
echo "${input_date}T00:00:00Z"
fi
else
echo "$input_date"
fi
}
# Set defaults
read START_DATE END_DATE <<< "$(get_year_dates)"
BRANCHES=("main" "master" "develop" "staging" "stage" "production" "live")
SKIP_CACHE=false
usage() {
cat << EOF
Usage: $(basename "$0") -o ORG_NAME [-s START_DATE] [-e END_DATE] [-b "branch1 branch2 ..."]
Options:
-o GitHub organization name (required)
-s Start date (default: ${START_DATE})
-e End date (default: ${END_DATE})
-b Space-separated list of branches (default: ${BRANCHES[*]})
-f Force skip cache
-h Show this help message
Dates can be specified in YYYY-MM-DD format or full ISO-8601 format
EOF
exit 1
}
# Parse arguments
while getopts "o:s:e:b:fh" opt; do
case $opt in
o) ORG_NAME="$OPTARG" ;;
s) START_DATE=$(format_date "$OPTARG" false) ;;
e) END_DATE=$(format_date "$OPTARG" true) ;;
b) IFS=' ' read -ra BRANCHES <<< "$OPTARG" ;;
f) SKIP_CACHE=true ;;
h) usage ;;
?) usage ;;
esac
done
# Validate required parameters
if [ -z "$ORG_NAME" ]; then
log "Error: Organization name (-o) is required"
usage
fi
# Setup logging
mkdir -p logs
exec 1> >(tee "logs/commit_count_$(date +%Y%m%d_%H%M%S).log")
exec 2>&1
TOTAL_COMMITS=0
BATCH_COMMITS=0
# Cache settings
CACHE_DIR="./cache"
CACHE_TTL=1800 # 30 minutes in seconds
declare -A REPO_COMMITS
# Logging function
log() {
echo "logs: $1" >&2
# printf '[%s] %s\n' "$(date '+%F %T')" "$1" >&2
}
# Add cache functions
init_cache() {
mkdir -p "$CACHE_DIR"
}
get_cache_key() {
local query="$1"
echo "$query" | md5sum | cut -d' ' -f1
}
write_cache() {
local key="$1"
local data="$2"
local timestamp=$(date +%s)
echo "{\"timestamp\":$timestamp,\"data\":$data}" > "$CACHE_DIR/$key"
}
read_cache() {
local key="$1"
local cache_file="$CACHE_DIR/$key"
local now=$(date +%s)
if [ -f "$cache_file" ]; then
local cache_data=$(cat "$cache_file")
local timestamp=$(echo "$cache_data" | jq -r '.timestamp')
local age=$((now - timestamp))
if [ $age -lt $CACHE_TTL ]; then
echo "$cache_data" | jq -r '.data'
return 0
fi
fi
return 1
}
# GraphQL query execution
execute_query() {
local query="$1"
local cache_key=$(get_cache_key "$query")
local cached_data
if [ "$SKIP_CACHE" = true ]; then
log "Forced skip cache, fetching from API..."
else
if cached_data=$(read_cache "$cache_key"); then
log "Using cached data for query"
echo "$cached_data"
return 0
fi
log "Cache miss, fetching from API..."
fi
sleep 1
local response=$(gh api -X POST -H "Accept: application/vnd.github+json" \
-H "X-GitHub-Api-Version: 2022-11-28" \
graphql -f query="$query")
write_cache "$cache_key" "$response"
echo "$response"
}
fetch_and_process_repos() {
local all_commits=0
local has_next_page="true"
local cursor=""
local batch_number=1
init_cache
while [ "$has_next_page" = "true" ]; do
log "Processing batch #${batch_number}..."
# Build cursor argument
local cursor_arg=""
if [ -n "$cursor" ]; then
cursor_arg=", after: \"$cursor\""
fi
# Build branch fragments
local branch_queries=""
for branch in "${BRANCHES[@]}"; do
branch_queries+="$(echo -n "
branch_${branch}: ref(qualifiedName: \"${branch}\") {
target {
... on Commit {
history(since: \"$START_DATE\", until: \"$END_DATE\") {
totalCount
}
}
}
}")"
done
local query=$(cat <<EOF
{
organization(login: "$ORG_NAME") {
repositories(first: 50${cursor_arg}) {
pageInfo {
hasNextPage
endCursor
}
nodes {
name
$branch_queries
}
}
}
}
EOF
)
# Execute query
sleep 1
log "Fetching batch of repositories..."
local batch_data=$(execute_query "$query")
# Process this batch immediately
BATCH_COMMITS=0
process_batch "$batch_data"
local batch_commits=BATCH_COMMITS
all_commits=$((all_commits + batch_commits))
# Update cursor and has_next_page for next iteration
has_next_page=$(echo "$batch_data" | jq -r '.data.organization.repositories.pageInfo.hasNextPage')
cursor=$(echo "$batch_data" | jq -r '.data.organization.repositories.pageInfo.endCursor')
log "Batch #${batch_number} complete. Running total commits: $all_commits"
batch_number=$((batch_number + 1))
# has_next_page="false" # For testing, remove this line when
done
TOTAL_COMMITS=$all_commits
}
process_batch() {
local batch_data="$1"
local batch_commits=0
# Extract repositories from this batch
local repos=$(echo "$batch_data" | jq -r '.data.organization.repositories.nodes[].name')
local batch_size=$(echo "$repos" | wc -w)
log "Processing $batch_size repositories in this batch"
for repo in $repos; do
local max_commits=0
local max_branch=""
# log "Checking commits across branches for $repo:"
# Check each branch and find the one with maximum commits
for branch in "${BRANCHES[@]}"; do
local commits=$(echo "$batch_data" | jq -r --arg repo "$repo" --arg branch "$branch" \
'.data.organization.repositories.nodes[] |
select(.name == $repo) |
.["branch_" + $branch].target.history.totalCount // 0')
if [[ "$commits" =~ ^[0-9]+$ ]]; then
# log " - $branch: $commits commits"
if ((commits > max_commits)); then
max_commits=$commits
max_branch=$branch
fi
fi
done
if [ $max_commits -gt 0 ]; then
batch_commits=$((batch_commits + max_commits))
REPO_COMMITS["${repo}"]="${max_commits}:${max_branch}"
log "→ Selected '$max_branch' with $max_commits commits from [$repo]"
else
log "→ No commits found in any branch for [$repo]"
fi
done
BATCH_COMMITS=$batch_commits
}
# Main execution
log "Script started - fetching repositories for $ORG_NAME"
fetch_and_process_repos
# Add debug logging before summary section
# log "Debug: Number of repos with commits: ${#REPO_COMMITS[@]}"
# log "Debug: Repo array contents:"
# for key in "${!REPO_COMMITS[@]}"; do
# log "Debug: $key -> ${REPO_COMMITS[$key]}"
# done
# Final summary
REPORTS_DIR="reports"
CSV_FILE="${REPORTS_DIR}/commit_summary_$(date +%Y%m%d_%H%M%S).csv"
mkdir -p "$REPORTS_DIR"
echo "Repository,Commits,Branch" > "$CSV_FILE"
log "----------------------------------------"
log "Summary:"
log "Organization: $ORG_NAME"
log "Date Range: $START_DATE to $END_DATE"
log "Branches checked: ${BRANCHES[*]}"
log "Total Commits: $TOTAL_COMMITS"
log ""
log "Repositories with commits:"
# Sort repos by commit count (highest first) and display
if [ ${#REPO_COMMITS[@]} -eq 0 ]; then
log " No repositories with commits found"
else
(
for repo in "${!REPO_COMMITS[@]}"; do
IFS=: read -r count branch <<< "${REPO_COMMITS[$repo]}"
echo "$count $repo $branch"
done
) | sort -rn | while read -r count repo branch; do
log " - $(printf "%-40s %10d commits (branch: %s)" "$repo" "$count" "$branch")"
echo "\"${repo}\",${count},\"${branch}\"" >> "$CSV_FILE"
done
log "CSV report saved to: $CSV_FILE"
fi
log "----------------------------------------"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment