Skip to content

Instantly share code, notes, and snippets.

@reza-yousefi
Forked from lucidyan/gpu-control.md
Created January 18, 2023 16:35
Show Gist options
  • Select an option

  • Save reza-yousefi/f980d821876869110d3cb4826ba703a3 to your computer and use it in GitHub Desktop.

Select an option

Save reza-yousefi/f980d821876869110d3cb4826ba703a3 to your computer and use it in GitHub Desktop.
Prevent NVIDIA GPUs' throttling on headless server

Prevent NVIDIA GPUs' throttling on headless server

  • Unlock manual fan & overclock settings
    sudo nvidia-xconfig -a --cool-bits=28 --allow-empty-initial-configuration
  • Reboot system
  • Create script /usr/local/bin/gpu-fan-control.sh
#!/bin/bash

export DISPLAY=:0
export XAUTHORITY=/var/run/lightdm/root/:0

setFanSpeed() {
        eval "nvidia-settings -a [gpu:$2]/GPUFanControlState=1 -a [fan:$2]/GPUTargetFanSpeed=$1" > /dev/null
        echo "Updating fans speed to $1 on GPU $2"
}

cleanup() {
        eval "nvidia-settings -a [gpu:0]/GPUFanControlState=0"
        eval "nvidia-settings -a [gpu:1]/GPUFanControlState=0"
        exit
}

declare -i gpuTemp

# Set cleanup function (clean up and exit when interrupted)
trap cleanup 1 2 3 15 20

checkGpu(){
        #echo "Checking GPU $1"
        gpuTemp=$(nvidia-settings -q gpucoretemp | grep '^  Attribute' | grep "gpu:$1" | \
                head -n 1 | perl -pe 's/^.*?(\d+)\.\s*$/\1/;')
        echo "Current GPU $1 temperature: $gpuTemp"

        # Set GPU fan speed
        if   [ $gpuTemp -ge 80 ]; then
                setFanSpeed 100 $1
        elif [ $gpuTemp -ge 75 ]; then
                setFanSpeed 90 $1
        elif [ $gpuTemp -ge 70 ]; then
                setFanSpeed 75 $1
        elif [ $gpuTemp -ge 65 ]; then
                setFanSpeed 60 $1
        elif [ $gpuTemp -ge 60 ]; then
                setFanSpeed 50 $1
        else
                setFanSpeed 40 $1
        fi

}

while : # Loop
do
        checkGpu 0
        checkGpu 1
        #checkGpu 2
        #checkGpu 3
        # Interval
        sleep 5
done
  • Make our script executable
    chmod 744 /usr/local/bin/gpu-fan-control.sh

  • Create file /etc/systemd/system/gpu-fan-control.service

[Unit]
Description=Prevent GPU throttling under load

[Service]
ExecStart=/usr/local/bin/gpu-fan-control.sh

[Install]
WantedBy=multi-user.target
  • Make properly rights for service file
    chmod 664 /etc/systemd/system/gpu-fan-control.service

  • Activate our service (now it will run on startup)
    systemctl enable /etc/systemd/system/gpu-fan-control.service

  • Check service if needed (After that fans must run with 40% speed)
    systemctl start gpu-fan-control.service

@reza-yousefi
Copy link
Author

What I used for my workstaiton for the gpu-fan-control.sh script:

#!/bin/bash

export DISPLAY=:0
export XAUTHORITY=/var/run/lightdm/root/:0

setFanSpeed() {
        eval "nvidia-settings -a [gpu:$2]/GPUFanControlState=1 -a [fan:$2]/GPUTargetFanSpeed=$1" > /dev/null
        echo "Updating fans speed to $1 on GPU $2"
}

cleanup() {
        eval "nvidia-settings -a [gpu:0]/GPUFanControlState=0"
        eval "nvidia-settings -a [gpu:1]/GPUFanControlState=0"
        exit
}

declare -i gpuTemp

# Set cleanup function (clean up and exit when interrupted)
trap cleanup 1 2 3 15 20

checkGpu(){
        #echo "Checking GPU $1"
        gpuTemp=$(nvidia-settings -q gpucoretemp | grep '^  Attribute' | grep "gpu:$1" | \
                head -n 1 | perl -pe 's/^.*?(\d+)\.\s*$/\1/;')
        echo "Current GPU $1 temperature: $gpuTemp"

        # Set GPU fan speed
        # setFanSpeed $gpuTemp $1
        if   [ $gpuTemp -ge 83 ]; then
                setFanSpeed 100 $1
        elif [ $gpuTemp -ge 73 ] && [ $gpuTemp -le 80 ]; then
                setFanSpeed 90 $1
        elif [ $gpuTemp -ge 67 ] && [ $gpuTemp -le 70 ]; then
                setFanSpeed 80 $1
        elif [ $gpuTemp -ge 63 ] && [ $gpuTemp -le 65 ]; then
                setFanSpeed 70 $1
        elif [ $gpuTemp -ge 53 ] && [ $gpuTemp -le 60 ]; then
                setFanSpeed 65 $1
        elif [ $gpuTemp -ge 45 ] && [ $gpuTemp -le 50 ]; then
                setFanSpeed 55 $1
        elif [ $gpuTemp -ge 35 ] && [ $gpuTemp -le 40 ]; then
                setFanSpeed 35 $1
        elif [ $gpuTemp -le 30 ]; then
                setFanSpeed 30 $1
        fi


}

while : # Loop
do
        checkGpu 0
        checkGpu 1
        checkGpu 2
        #checkGpu 3
        # Interval
        sleep 5
done

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment