Skip to content

Instantly share code, notes, and snippets.

@bachmanity1
Created March 10, 2026 07:19
Show Gist options
  • Select an option

  • Save bachmanity1/8020eac9355832b827ecfeee32547496 to your computer and use it in GitHub Desktop.

Select an option

Save bachmanity1/8020eac9355832b827ecfeee32547496 to your computer and use it in GitHub Desktop.

Revisions

  1. bachmanity1 created this gist Mar 10, 2026.
    83 changes: 83 additions & 0 deletions ceph-host-check.sh
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,83 @@
    #!/bin/bash
    set -euo pipefail

    HOST_FILE=""
    HOSTS=()

    usage() {
    echo "Usage: $0 [--file <hosts-file>] [hostname ...]"
    echo ""
    echo "Verifies that hosts are fully removed: no daemons, not in host list, not in crush map."
    exit 1
    }

    die() { echo "ERROR: $*" >&2; exit 1; }

    [[ $# -lt 1 ]] && usage
    [[ "$1" == "--help" || "$1" == "-h" ]] && usage

    while [[ $# -gt 0 ]]; do
    case "$1" in
    --file|-f) HOST_FILE="$2"; shift 2 ;;
    --help|-h) usage ;;
    *) HOSTS+=("$1"); shift ;;
    esac
    done

    if [[ -n "$HOST_FILE" ]]; then
    [[ -f "$HOST_FILE" ]] || die "file not found: $HOST_FILE"
    while IFS= read -r line; do
    line="${line%%#*}"
    line="$(echo "$line" | xargs)"
    [[ -n "$line" ]] && HOSTS+=("$line")
    done < "$HOST_FILE"
    fi

    [[ ${#HOSTS[@]} -eq 0 ]] && die "no hosts specified"

    # orch uses FQDN (hostname.n3r), crush uses short hostname
    orch_name() { echo "${1}.n3r"; }
    crush_name() { echo "${1%.n3r}"; }

    orch_hosts=$(ceph orch host ls)
    crush_tree=$(ceph osd crush tree)

    failed=()
    for hostname in "${HOSTS[@]}"; do
    orch_h=$(orch_name "$hostname")
    crush_h=$(crush_name "$hostname")
    ok=true

    # Check no daemons running
    daemon_count=$(ceph orch ps "$orch_h" 2>&1 | grep -c "$orch_h" || true)
    if [[ "$daemon_count" -gt 0 ]]; then
    echo "FAIL: [$hostname] $daemon_count daemon(s) still running"
    ok=false
    else
    echo "OK: [$hostname] no daemons running"
    fi

    # Check host removed from orch
    if echo "$orch_hosts" | grep -q "$orch_h"; then
    echo "FAIL: [$hostname] still in host list"
    ok=false
    else
    echo "OK: [$hostname] removed from host list"
    fi

    # Check removed from crush map
    if echo "$crush_tree" | grep -q "host $crush_h"; then
    echo "FAIL: [$hostname] still in crush map"
    ok=false
    else
    echo "OK: [$hostname] removed from crush map"
    fi

    [[ "$ok" == false ]] && failed+=("$hostname")
    echo ""
    done

    if [[ ${#failed[@]} -gt 0 ]]; then
    die "${#failed[@]} host(s) not fully removed: ${failed[*]}"
    fi
    echo "=== All ${#HOSTS[@]} host(s) verified as removed ==="