Created
April 5, 2026 19:04
-
-
Save ifthenelse/de25e0b82eebf9540d697bafb4000dfa to your computer and use it in GitHub Desktop.
Brew clenaup advisor
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env zsh | |
| set -euo pipefail | |
| # ----------------------------------------------------------------------------- | |
| # Homebrew cleanup advisor (read-only) | |
| # | |
| # Performs checks corresponding to points 1..5: | |
| # 1) "Leaves" installed on request (explicit installs that nothing depends on) | |
| # 2) Staleness signal via filesystem access time on the Cellar formula dir | |
| # 3) Shell history frequency (what you actually run) | |
| # 4) brew autoremove dry-run (unused dependencies; does NOT remove) | |
| # 5) Disk usage per formula (heavy items are prioritized) | |
| # | |
| # Output: | |
| # - A ranked report with "reasons" | |
| # - A final list of *suggested removable formulas* (heuristic, not perfect) | |
| # | |
| # Notes: | |
| # - Homebrew does NOT track usage counts; we infer using shell history + atime. | |
| # - Access times can be noisy depending on filesystem and tools, but still useful. | |
| # - This script never removes anything. | |
| # ----------------------------------------------------------------------------- | |
| # ----------------------------- | |
| # Configurable knobs | |
| # ----------------------------- | |
| HISTORY_TOP_N=200 # consider a formula "used" if its binaries appear among these top commands | |
| STALE_DAYS=90 # consider a formula "stale" if Cellar dir last access is older than this | |
| MIN_SIZE_MB=50 # only prioritize formulas larger than this if other signals are weak | |
| SUGGESTION_SCORE_MIN=3 # minimum score to include in the final "suggested removal" list | |
| # ----------------------------- | |
| # Dependency checks | |
| # ----------------------------- | |
| need_cmds=(brew awk sed grep sort uniq stat du date tr cut head tail xargs) | |
| for c in $need_cmds; do | |
| command -v "$c" >/dev/null 2>&1 || { echo "Error: missing required command in PATH: $c" >&2; exit 1; } | |
| done | |
| # ----------------------------- | |
| # Homebrew paths | |
| # ----------------------------- | |
| HB_PREFIX="$(brew --prefix)" | |
| CELLAR="$HB_PREFIX/Cellar" | |
| BIN_DIR="$HB_PREFIX/bin" | |
| if [[ ! -d "$CELLAR" ]]; then | |
| echo "Error: Homebrew Cellar not found at: $CELLAR" >&2 | |
| exit 1 | |
| fi | |
| # ----------------------------- | |
| # Helper: parse zsh history into "command" tokens | |
| # Supports both: | |
| # - plain history lines | |
| # - extended history format: ': 1700000000:0;command args...' | |
| # ----------------------------- | |
| HISTFILE_PATH="${HISTFILE:-$HOME/.zsh_history}" | |
| if [[ ! -f "$HISTFILE_PATH" ]]; then | |
| echo "Warning: zsh history file not found at $HISTFILE_PATH; usage check will be degraded." >&2 | |
| fi | |
| function get_top_commands_from_history() { | |
| if [[ -f "$HISTFILE_PATH" ]]; then | |
| # Extract the command name (first token), stripping extended history prefix if present. | |
| # Examples: | |
| # ": 1700000000:0;git status" -> git | |
| # "docker ps" -> docker | |
| sed -E 's/^: [0-9]+:[0-9]+;//' "$HISTFILE_PATH" \ | |
| | awk '{print $1}' \ | |
| | grep -vE '^[[:space:]]*$' \ | |
| | sort | uniq -c | sort -nr \ | |
| | head -n "$HISTORY_TOP_N" \ | |
| | awk '{print $2}' | |
| fi | |
| } | |
| # ----------------------------- | |
| # Helper: compute human-free MB size for a formula (Cellar subtree) | |
| # ----------------------------- | |
| function formula_size_mb() { | |
| local f="$1" | |
| local p="$CELLAR/$f" | |
| if [[ -d "$p" ]]; then | |
| # Use du in KiB and convert to MiB for stable numeric comparisons | |
| local kib | |
| kib="$(du -sk "$p" 2>/dev/null | awk '{print $1}' || echo 0)" | |
| # integer MiB | |
| echo $(( (kib + 1023) / 1024 )) | |
| else | |
| echo 0 | |
| fi | |
| } | |
| # ----------------------------- | |
| # Helper: get last access epoch seconds for a formula dir (Cellar/<name>) | |
| # macOS stat format: -f %a gives atime (epoch) | |
| # ----------------------------- | |
| function formula_atime_epoch() { | |
| local f="$1" | |
| local p="$CELLAR/$f" | |
| if [[ -d "$p" ]]; then | |
| stat -f '%a' "$p" 2>/dev/null || echo 0 | |
| else | |
| echo 0 | |
| fi | |
| } | |
| # ----------------------------- | |
| # Helper: map formulas to their linked binaries in Homebrew bin directory | |
| # - Heuristic: find symlinks in $BIN_DIR that resolve into Cellar/<formula>/* | |
| # - This captures what you actually run (git, uv, ncdu, etc.) | |
| # ----------------------------- | |
| function build_formula_to_bins_map() { | |
| local out="$1" | |
| : > "$out" | |
| if [[ ! -d "$BIN_DIR" ]]; then | |
| return | |
| fi | |
| # Resolve each executable in $BIN_DIR to its real path. If the resolved path | |
| # contains /Cellar/<formula>/..., map that bin to that formula. | |
| # | |
| # Note: This is "best effort". Not all formula-provided commands are linked here. | |
| for b in "$BIN_DIR"/*; do | |
| [[ -e "$b" ]] || continue | |
| # Resolve symlinks; skip if not resolvable | |
| local real | |
| real="$(python3 - <<'PY' "$b" 2>/dev/null || true | |
| import os, sys | |
| p=sys.argv[1] | |
| try: | |
| print(os.path.realpath(p)) | |
| except Exception: | |
| pass | |
| PY | |
| )" | |
| [[ -n "$real" ]] || continue | |
| # Match .../Cellar/<formula>/... | |
| if echo "$real" | grep -qE "/Cellar/[^/]+/"; then | |
| local f | |
| f="$(echo "$real" | sed -E 's#^.*/Cellar/([^/]+)/.*#\1#')" | |
| local bn | |
| bn="$(basename "$b")" | |
| echo "$f"$'\t'"$bn" >> "$out" | |
| fi | |
| done | |
| } | |
| # We rely on python3 above for robust realpath; ensure available. | |
| command -v python3 >/dev/null 2>&1 || { echo "Error: python3 is required for path resolution (python3 not found)." >&2; exit 1; } | |
| # ----------------------------- | |
| # 1) Leaves installed on request | |
| # ----------------------------- | |
| echo "==> (1) Collecting leaves installed on request..." | |
| LEAVES_ON_REQ="$(mktemp)" | |
| brew leaves --installed-on-request | sed '/^$/d' | sort > "$LEAVES_ON_REQ" | |
| LEAVES_COUNT="$(wc -l < "$LEAVES_ON_REQ" | tr -d ' ')" | |
| echo "Found $LEAVES_COUNT leaves installed on request." | |
| # ----------------------------- | |
| # 3) Usage frequency via shell history (top commands) | |
| # ----------------------------- | |
| echo "==> (3) Extracting top commands from zsh history (top $HISTORY_TOP_N)..." | |
| TOP_CMDS="$(mktemp)" | |
| get_top_commands_from_history > "$TOP_CMDS" || true | |
| TOP_CMDS_COUNT="$(wc -l < "$TOP_CMDS" 2>/dev/null | tr -d ' ' || echo 0)" | |
| echo "Parsed $TOP_CMDS_COUNT top commands from history." | |
| # Build formula->bins mapping | |
| echo "==> Building Homebrew bin -> formula mapping..." | |
| F2B="$(mktemp)" | |
| build_formula_to_bins_map "$F2B" | |
| # Determine which formulas appear as used because any of their bins is in TOP_CMDS. | |
| USED_FORMULAS="$(mktemp)" | |
| if [[ -s "$TOP_CMDS" && -s "$F2B" ]]; then | |
| # Join: if a formula has at least one bin in TOP_CMDS => mark used | |
| while IFS=$'\t' read -r f bn; do | |
| if grep -qx "$bn" "$TOP_CMDS"; then | |
| echo "$f" | |
| fi | |
| done < "$F2B" | sort -u > "$USED_FORMULAS" | |
| else | |
| : > "$USED_FORMULAS" | |
| fi | |
| USED_COUNT="$(wc -l < "$USED_FORMULAS" | tr -d ' ')" | |
| echo "Detected $USED_COUNT formulas with binaries appearing in top history commands." | |
| # ----------------------------- | |
| # 4) brew autoremove dry-run | |
| # ----------------------------- | |
| echo "==> (4) Checking brew autoremove (dry-run)..." | |
| AUTOREMOVE_OUT="$(mktemp)" | |
| if brew autoremove --help 2>/dev/null | grep -q -- '--dry-run'; then | |
| brew autoremove --dry-run > "$AUTOREMOVE_OUT" 2>&1 || true | |
| else | |
| echo "brew autoremove does not support --dry-run on this brew version." > "$AUTOREMOVE_OUT" | |
| fi | |
| # Extract any formula-like tokens from autoremove output (best effort) | |
| AUTOREMOVE_CAND="$(mktemp)" | |
| grep -Eo '^[[:alnum:]@._+-]+' "$AUTOREMOVE_OUT" 2>/dev/null | sort -u > "$AUTOREMOVE_CAND" || true | |
| # ----------------------------- | |
| # 2) Staleness signal + 5) Disk usage per formula | |
| # ----------------------------- | |
| echo "==> (2)+(5) Computing staleness and size for each leaf..." | |
| NOW_EPOCH="$(date +%s)" | |
| STALE_SEC=$(( STALE_DAYS * 24 * 60 * 60 )) | |
| REPORT="$(mktemp)" | |
| : > "$REPORT" | |
| # Header | |
| printf "%-28s %-8s %-8s %-6s %s\n" "formula" "sizeMB" "stale" "score" "reasons" >> "$REPORT" | |
| printf "%-28s %-8s %-8s %-6s %s\n" "------" "------" "-----" "-----" "-------" >> "$REPORT" | |
| SUGGESTED="$(mktemp)" | |
| : > "$SUGGESTED" | |
| while IFS= read -r f; do | |
| [[ -n "$f" ]] || continue | |
| local_score=0 | |
| reasons=() | |
| # Leaf installed on request => baseline candidate class | |
| local_score=$((local_score + 1)) | |
| reasons+=("leaf-on-request") | |
| # Size | |
| sz_mb="$(formula_size_mb "$f")" | |
| if (( sz_mb >= MIN_SIZE_MB )); then | |
| local_score=$((local_score + 1)) | |
| reasons+=("size>=${MIN_SIZE_MB}MB") | |
| fi | |
| # Staleness by atime | |
| atime="$(formula_atime_epoch "$f")" | |
| stale="unknown" | |
| if [[ "$atime" != "0" ]]; then | |
| age=$(( NOW_EPOCH - atime )) | |
| if (( age >= STALE_SEC )); then | |
| stale="yes" | |
| local_score=$((local_score + 1)) | |
| reasons+=("stale>=${STALE_DAYS}d") | |
| else | |
| stale="no" | |
| fi | |
| fi | |
| # Usage by history | |
| if grep -qx "$f" "$USED_FORMULAS" 2>/dev/null; then | |
| # If used recently (by command frequency), strongly reduce removal likelihood | |
| local_score=$((local_score - 2)) | |
| reasons+=("seen-in-history-top") | |
| else | |
| local_score=$((local_score + 1)) | |
| reasons+=("not-seen-in-history-top") | |
| fi | |
| # Autoremove hint: if it appears in autoremove output, it is an unused dep candidate | |
| if grep -qx "$f" "$AUTOREMOVE_CAND" 2>/dev/null; then | |
| local_score=$((local_score + 1)) | |
| reasons+=("autoremove-suggests") | |
| fi | |
| # Print report row | |
| printf "%-28s %-8s %-8s %-6s %s\n" "$f" "${sz_mb}" "$stale" "$local_score" "$(IFS=,; echo "${reasons[*]}")" >> "$REPORT" | |
| # Decide suggestion | |
| if (( local_score >= SUGGESTION_SCORE_MIN )); then | |
| echo "$f" >> "$SUGGESTED" | |
| fi | |
| done < "$LEAVES_ON_REQ" | |
| # Sort suggested list alphabetically (stable output) | |
| sort -u "$SUGGESTED" -o "$SUGGESTED" | |
| # ----------------------------- | |
| # Output | |
| # ----------------------------- | |
| echo "" | |
| echo "==============================" | |
| echo "Homebrew Cleanup Advisor Report" | |
| echo "Prefix: $HB_PREFIX" | |
| echo "Cellar: $CELLAR" | |
| echo "History file: $HISTFILE_PATH" | |
| echo "Stale threshold: ${STALE_DAYS} days" | |
| echo "History top N: $HISTORY_TOP_N" | |
| echo "Min size: ${MIN_SIZE_MB} MB" | |
| echo "Suggestion score min: $SUGGESTION_SCORE_MIN" | |
| echo "==============================" | |
| echo "" | |
| echo "==> Ranked report (higher score => more likely removable)" | |
| # Sort by score numeric desc (4th column), then size desc (2nd col) | |
| # Skip header lines when sorting, then re-print header. | |
| head -n 2 "$REPORT" | |
| tail -n +3 "$REPORT" | sort -k4,4nr -k2,2nr | |
| echo "" | |
| echo "==> Suggested removable formulas (heuristic; review before uninstall):" | |
| if [[ -s "$SUGGESTED" ]]; then | |
| cat "$SUGGESTED" | |
| else | |
| echo "(none matched the current thresholds)" | |
| fi | |
| echo "" | |
| echo "==> brew autoremove --dry-run output (for reference):" | |
| cat "$AUTOREMOVE_OUT" | |
| echo "" | |
| echo "Tip: To uninstall suggestions (MANUAL REVIEW FIRST):" | |
| echo " brew uninstall <formula>" | |
| echo "" | |
| echo "Tip: To remove unused deps after manual uninstalls:" | |
| echo " brew autoremove" | |
| echo "" | |
| # Cleanup temp files | |
| rm -f "$LEAVES_ON_REQ" "$TOP_CMDS" "$F2B" "$USED_FORMULAS" "$AUTOREMOVE_OUT" "$AUTOREMOVE_CAND" "$REPORT" "$SUGGESTED" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment