Skip to content

Instantly share code, notes, and snippets.

@AkideLiu
Created May 14, 2024 06:55
Show Gist options
  • Select an option

  • Save AkideLiu/3a30b10c968277a2639b77f6d6995772 to your computer and use it in GitHub Desktop.

Select an option

Save AkideLiu/3a30b10c968277a2639b77f6d6995772 to your computer and use it in GitHub Desktop.
#!/bin/bash
# set -x
#
if [[ "$1" == "-a" ]]
then
echo "Avaiable (MIXED or IDEL) nodes:"
sinfo -N --noheader | grep -e mix -e idle | while read line
do
arr=($line)
srun -p ${arr[2]} -w ${arr[0]} gpustat -p --color 2>/dev/null </dev/null
done
echo "Unavailable (NOT MIXED and NOT IDEL) nodes:"
sinfo | grep -e mix -e idle -v
elif [[ "$1" == "-w" ]]
then
if [ -z "$2" ]
then echo "Please designate the node name. Options: [-w] NODE_NAME"
else
line=`sinfo -N --noheader | grep $2`
if [ -z "$line" ]
then echo "Node $2 not found."
else
arr=($line)
srun -p ${arr[2]} -w ${arr[0]} gpustat -p --color 2>/dev/null </dev/null
fi
fi
elif [[ "$1" == "-p" ]]
then
if [ -z "$2" ]
then echo "Please designate the partition name. Options: [-p] PARTITION_NAME"
else
IFS=$'\n' read -a array -d '' <<< `sinfo -N --noheader | grep $2 | grep -e mix -e idle`
if [ -z "$array" ]
then echo "Partition $2 not found."
else
echo "Avaiable (MIXED or IDEL) nodes on $2:"
for line in "${array[@]}";
do
arr=($line)
srun -p ${arr[2]} -w ${arr[0]} gpustat -p --color 2>/dev/null </dev/null
done
echo "Unavailable (NOT MIXED and NOT IDEL) nodes on $2:"
sinfo -N | head -n 1
sinfo -N | grep $2 | grep -e mix -e idle -v
fi
fi
elif [[ "$1" == "-j" ]]
then
if [ -z "$2" ]
then echo "Please Job ID. Options: [-j] JOB_ID"
else
squeue -j $2 -o "%N" | tail -n +2 | while read line
do
# srun -w $line gpustat -p --color 2>/dev/null </dev/null
# Extract the prefix before the brackets
prefix="${line%%[*}"
# Extract the range part inside the brackets
range="${line#*[}"
range="${range%]*}"
declare -a output_array # Declare an array to store results
# Split the range by commas and iterate
IFS=',' read -ra ADDR <<< "$range"
for i in "${ADDR[@]}"; do
# Check if we have a range (x-y) or a single number
if [[ "$i" =~ - ]]; then
start=${i%-*}
end=${i#*-}
# Loop from start to end of the range
for (( num=start; num<=end; num++ )); do
# echo "${prefix}${num}"
output_array+=("${prefix}${num}") # Append to array
done
else
# Single number, just print it
# echo "${prefix}${i}"
output_array+=("${prefix}${i}") # Append to array
fi
done
for node in "${output_array[@]}"; do
ssh $node gpustat -p --color 2>/dev/null </dev/null
done
done
fi
elif [[ "$1" == "-u" ]]; then
if [ -z "$2" ]; then
echo "You have not specified a user ID, using $USER instead"
USER_ID=$USER
else
USER_ID=$2
fi
squeue -u $USER_ID -o "%N" | tail -n +2 | while read line; do
# Extract the prefix before the brackets
prefix="${line%%[*}"
# Extract the range part inside the brackets
range="${line#*[}"
range="${range%]*}"
declare -a output_array # Declare an array to store results
# Split the range by commas and iterate
IFS=',' read -ra ADDR <<< "$range"
for i in "${ADDR[@]}"; do
# Check if we have a range (x-y) or a single number
if [[ "$i" =~ - ]]; then
start=${i%-*}
end=${i#*-}
# Loop from start to end of the range
for (( num=start; num<=end; num++ )); do
output_array+=("${prefix}${num}") # Append to array
done
else
# Single number, just print it
output_array+=("${prefix}${i}") # Append to array
fi
done
for node in "${output_array[@]}"; do
ssh $node gpustat -p --color 2>/dev/null </dev/null
done
done
else
echo "Usage: mi [options]"
echo "[M]on[I]toring GPU usage on nodes of slurm cluster"
echo "Options: "
echo " [-h, --help] show this help message"
echo " [-a] show all stats on each node (take dozens of seconds)"
echo " [-w] NODE_NAME show the stats on a specific node"
echo " [-p] PARTITION_NAME show the stats on a specific partition"
echo " [-j] JobID show the stats on a specific job ID"
exit 0
fi
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment