Created
May 14, 2024 06:55
-
-
Save AkideLiu/3a30b10c968277a2639b77f6d6995772 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/bash | |
| # set -x | |
| # | |
| if [[ "$1" == "-a" ]] | |
| then | |
| echo "Avaiable (MIXED or IDEL) nodes:" | |
| sinfo -N --noheader | grep -e mix -e idle | while read line | |
| do | |
| arr=($line) | |
| srun -p ${arr[2]} -w ${arr[0]} gpustat -p --color 2>/dev/null </dev/null | |
| done | |
| echo "Unavailable (NOT MIXED and NOT IDEL) nodes:" | |
| sinfo | grep -e mix -e idle -v | |
| elif [[ "$1" == "-w" ]] | |
| then | |
| if [ -z "$2" ] | |
| then echo "Please designate the node name. Options: [-w] NODE_NAME" | |
| else | |
| line=`sinfo -N --noheader | grep $2` | |
| if [ -z "$line" ] | |
| then echo "Node $2 not found." | |
| else | |
| arr=($line) | |
| srun -p ${arr[2]} -w ${arr[0]} gpustat -p --color 2>/dev/null </dev/null | |
| fi | |
| fi | |
| elif [[ "$1" == "-p" ]] | |
| then | |
| if [ -z "$2" ] | |
| then echo "Please designate the partition name. Options: [-p] PARTITION_NAME" | |
| else | |
| IFS=$'\n' read -a array -d '' <<< `sinfo -N --noheader | grep $2 | grep -e mix -e idle` | |
| if [ -z "$array" ] | |
| then echo "Partition $2 not found." | |
| else | |
| echo "Avaiable (MIXED or IDEL) nodes on $2:" | |
| for line in "${array[@]}"; | |
| do | |
| arr=($line) | |
| srun -p ${arr[2]} -w ${arr[0]} gpustat -p --color 2>/dev/null </dev/null | |
| done | |
| echo "Unavailable (NOT MIXED and NOT IDEL) nodes on $2:" | |
| sinfo -N | head -n 1 | |
| sinfo -N | grep $2 | grep -e mix -e idle -v | |
| fi | |
| fi | |
| elif [[ "$1" == "-j" ]] | |
| then | |
| if [ -z "$2" ] | |
| then echo "Please Job ID. Options: [-j] JOB_ID" | |
| else | |
| squeue -j $2 -o "%N" | tail -n +2 | while read line | |
| do | |
| # srun -w $line gpustat -p --color 2>/dev/null </dev/null | |
| # Extract the prefix before the brackets | |
| prefix="${line%%[*}" | |
| # Extract the range part inside the brackets | |
| range="${line#*[}" | |
| range="${range%]*}" | |
| declare -a output_array # Declare an array to store results | |
| # Split the range by commas and iterate | |
| IFS=',' read -ra ADDR <<< "$range" | |
| for i in "${ADDR[@]}"; do | |
| # Check if we have a range (x-y) or a single number | |
| if [[ "$i" =~ - ]]; then | |
| start=${i%-*} | |
| end=${i#*-} | |
| # Loop from start to end of the range | |
| for (( num=start; num<=end; num++ )); do | |
| # echo "${prefix}${num}" | |
| output_array+=("${prefix}${num}") # Append to array | |
| done | |
| else | |
| # Single number, just print it | |
| # echo "${prefix}${i}" | |
| output_array+=("${prefix}${i}") # Append to array | |
| fi | |
| done | |
| for node in "${output_array[@]}"; do | |
| ssh $node gpustat -p --color 2>/dev/null </dev/null | |
| done | |
| done | |
| fi | |
| elif [[ "$1" == "-u" ]]; then | |
| if [ -z "$2" ]; then | |
| echo "You have not specified a user ID, using $USER instead" | |
| USER_ID=$USER | |
| else | |
| USER_ID=$2 | |
| fi | |
| squeue -u $USER_ID -o "%N" | tail -n +2 | while read line; do | |
| # Extract the prefix before the brackets | |
| prefix="${line%%[*}" | |
| # Extract the range part inside the brackets | |
| range="${line#*[}" | |
| range="${range%]*}" | |
| declare -a output_array # Declare an array to store results | |
| # Split the range by commas and iterate | |
| IFS=',' read -ra ADDR <<< "$range" | |
| for i in "${ADDR[@]}"; do | |
| # Check if we have a range (x-y) or a single number | |
| if [[ "$i" =~ - ]]; then | |
| start=${i%-*} | |
| end=${i#*-} | |
| # Loop from start to end of the range | |
| for (( num=start; num<=end; num++ )); do | |
| output_array+=("${prefix}${num}") # Append to array | |
| done | |
| else | |
| # Single number, just print it | |
| output_array+=("${prefix}${i}") # Append to array | |
| fi | |
| done | |
| for node in "${output_array[@]}"; do | |
| ssh $node gpustat -p --color 2>/dev/null </dev/null | |
| done | |
| done | |
| else | |
| echo "Usage: mi [options]" | |
| echo "[M]on[I]toring GPU usage on nodes of slurm cluster" | |
| echo "Options: " | |
| echo " [-h, --help] show this help message" | |
| echo " [-a] show all stats on each node (take dozens of seconds)" | |
| echo " [-w] NODE_NAME show the stats on a specific node" | |
| echo " [-p] PARTITION_NAME show the stats on a specific partition" | |
| echo " [-j] JobID show the stats on a specific job ID" | |
| exit 0 | |
| fi |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment