Forked from mshuler/kill-processes-orphaned-by-jenkins.sh
Last active
September 5, 2018 18:06
-
-
Save t0r0X/d2d0ad9adb4a727c2efa to your computer and use it in GitHub Desktop.
Kill job processes orphaned by Jenkins
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/bash | |
| # Gist: https://gist.github.com/t0r0X/d2d0ad9adb4a727c2efa (see fork history for previous versions). | |
| # | |
| # Kill job processes orphaned by Jenkins | |
| # | |
| # The Jenkins "Signal Killer" plugin (https://wiki.jenkins-ci.org/display/JENKINS/Signal+killer) tries to | |
| # solve this issue: https://issues.jenkins-ci.org/browse/JENKINS-17116. But, sometimes processes slip | |
| # through (Timing issue: that plugin handles the process tree of a job, and, between the retrieval of the | |
| # process list of a given Job and the killing of those processes, some new child processes might have been | |
| # spawned, which are not in the kill list. | |
| # | |
| # This script handles those "orphaned" processes, but does not touch Git processes with PPID != 1. | |
| # | |
| # Suggested usage 1: | |
| # | |
| # $ crontab -l | |
| # */5 0 0 0 0 /path/to/kill-processes-orphaned-by-jenkins.sh -u <jenkins_username> -d false -f true 2>&1 | logger | |
| # | |
| # Suggested usage 2: | |
| # | |
| # A Jenkis job for each machine running the Jenkins slaves. | |
| # | |
| # Tested on SLES 11 & 12 (SuSE). Previous script versions tested on Linux Ubuntu 11.04, 12.04 (see fork history). | |
| # | |
| # NOTE: this must run as the Jenkins user or root to see the environment variables (needed to list build URLs) | |
| function usage() { | |
| echo 'Parameters:' | |
| echo ' -u val ; mandatory: username or user ID of Jenkins user' | |
| echo ' -d val ; optional: dry run, true|false, default = true' | |
| echo ' -f val ; optional: fail on found orphaned processes, true|false, default = false' | |
| echo ' -a val ; optional: Jenkins authentication, "<username>:<password/token>"' | |
| echo ' -U val ; optional: Jenkins base URL, needed when multiple instances run on same machine' | |
| echo '' | |
| exit 1 | |
| } | |
| function validateNotEmpty() { | |
| if [[ -z "$1" ]] ; then | |
| echo "ERROR: parameter value '$1' is empty!" | |
| usage | |
| fi | |
| } | |
| function validateTrueFalse() { | |
| if [[ "$1" != 'true' && "$1" != 'false' ]] ; then | |
| echo "ERROR: parameter value '$1' is neither 'true' nor 'false'!" | |
| usage | |
| fi | |
| } | |
| JENKINS_USERNAME= | |
| DRYRUN=true | |
| FAIL_IF_ORPHANDED_PROCESSES_FOUND=false | |
| USER_AUTH= | |
| JENKINS_BASE_URL= | |
| while getopts "u:d:f:a:U:" OPT ; do | |
| case "${OPT}" in | |
| u) validateNotEmpty "${OPTARG}"; JENKINS_USERNAME="${OPTARG}" ;; | |
| d) validateTrueFalse "${OPTARG}"; DRYRUN="${OPTARG}" ;; | |
| f) validateTrueFalse "${OPTARG}"; FAIL_IF_ORPHANDED_PROCESSES_FOUND="${OPTARG}" ;; | |
| a) validateNotEmpty "${OPTARG}"; USER_AUTH="-u ${OPTARG}" ;; | |
| U) validateNotEmpty "${OPTARG}"; JENKINS_BASE_URL="${OPTARG}" ;; | |
| *) usage ;; | |
| esac | |
| done | |
| shift ${OPTIND} | |
| # Parameter validation | |
| if [[ -z "${JENKINS_USERNAME}" ]] ; then | |
| usage | |
| fi | |
| echo "" | |
| echo "Current parameters:" | |
| echo " * JENKINS_USERNAME: ${JENKINS_USERNAME}" | |
| echo " * DRYRUN: ${DRYRUN}" | |
| echo " * FAIL_IF_ORPHANDED_PROCESSES_FOUND: ${FAIL_IF_ORPHANDED_PROCESSES_FOUND}" | |
| echo " * USER_AUTH: <$( [ -n "${USER_AUTH}" ] && echo '***CENSORED***' || echo 'NONE' )>" | |
| echo " * JENKINS_BASE_URL: ${JENKINS_BASE_URL}" | |
| echo "" | |
| # 2-phase check | |
| FOUND_ORPHANED_PROCESSES_ON_1ST_RUN=false | |
| FOUND_ORPHANED_PROCESSES_ON_2ND_RUN=false | |
| OK=0 | |
| NOK=1 | |
| # Array holding URLs found during analysis | |
| declare -a urls | |
| echo "===============================================================================" | |
| echo "Analysis..." | |
| for url in $( ps e -U "${JENKINS_USERNAME}" | grep -o '[B]UILD_URL=[^ ]*' | awk -F'=' '{print $2}' | grep -F "${JENKINS_BASE_URL}" | sort -u ) ; do | |
| API_URL="${url}api/json?tree=building" | |
| echo curl -s "${USER_AUTH}" "${API_URL}" | |
| curl -s "${USER_AUTH}" "${API_URL}" | |
| [ $? -ne ${OK} ] && echo 'ERROR: curl query failed!' && exit 1 | |
| echo "" | |
| curl -s "${USER_AUTH}" "${API_URL}" | grep -q '"building":true' | |
| CURL_PIPE_STATUS=(${PIPESTATUS[*]}) | |
| CURL_RESULT=${CURL_PIPE_STATUS[0]} | |
| GREP_RESULT=${CURL_PIPE_STATUS[1]} | |
| # If you have a frontend, e.g. Apache, before your Jenkins instance, that frontend might be | |
| # down. Catch that situation, before SIGKILLing innocent processes! | |
| if [ ${CURL_RESULT} -ne ${OK} ] ; then | |
| echo "" | |
| echo "CURL_RESULT: '${CURL_RESULT}', GREP_RESULT: '${GREP_RESULT}'" | |
| echo "" | |
| echo "ERROR: Server API not reachable: ${API_URL}" | |
| echo "" | |
| exit 1 | |
| fi | |
| # Ignore self, rarely (immediately after Jenkins restart) triggers false alarms | |
| if [ "${BUILD_URL}" == "${url}" ] ; then | |
| echo " ignoring self: '${url}'" | |
| echo "" | |
| continue | |
| fi | |
| # If process isn't building anymore... | |
| if [ ${GREP_RESULT} -eq ${NOK} ] ; then | |
| # ignore 'git' processes TODO currently there's no way to distinguish a Jenkins initiated git poll from a hanging git process | |
| for pid in $( ps e -U ${JENKINS_USERNAME} | grep "[B]UILD_URL=${url}" | awk '{print $1}' ) ; do | |
| if [[ "$( ps -o comm= ${pid} )" = 'git' && "$( ps -o ppid= ${pid} )" != '1' ]] ; then | |
| echo -e " skip Git process with parent PID not 1, might be a Git poll:\n$( ps l ${pid} )" | |
| echo "" | |
| continue 2 | |
| fi | |
| done | |
| # add current URL to end of array | |
| urls[${#urls[@]}]="${url}" | |
| fi | |
| echo "" | |
| done | |
| echo "done." | |
| echo "===============================================================================" | |
| echo "" | |
| if [ ${#urls[@]} -ne 0 ] ; then | |
| echo "collected URLs:" | |
| # iterate over all URLs | |
| for url in "${urls[@]}"; do | |
| echo " ${url}" | |
| done | |
| echo "===============================================================================" | |
| echo "" | |
| fi | |
| # If URLs array is not empty... | |
| if [ ${#urls[@]} -ne 0 ] ; then | |
| FOUND_ORPHANED_PROCESSES_ON_1ST_RUN=true | |
| echo "Orphaned Jenkins job processes:" | |
| echo "===============================" | |
| echo "" | |
| # iterate over all URLs | |
| for url in "${urls[@]}"; do | |
| echo "URL: ${url}" | |
| # iterate over all processes for given URL | |
| for pid in $( ps e -U $JENKINS_USERNAME | grep "[B]UILD_URL=${url}" | awk '{print $1}' ) ; do | |
| FOUND_ORPHANED_PROCESSES_ON_2ND_RUN=true | |
| echo " * $( ps -f -p ${pid} --no-headers )" | |
| if ! $DRYRUN; then | |
| echo " * SIGTERMing process ${pid} ..." | |
| kill ${pid} | |
| sleep 2 | |
| if ps -p ${pid} --no-heading -f > /dev/null ; then | |
| echo " * SIGKILLing process ${pid} ..." | |
| kill -9 ${pid} | |
| sleep 2 | |
| PS_KILL_VERIFY=$( ps -p ${pid} --no-heading -f ) | |
| if ps -p ${pid} --no-heading -f > /dev/null ; then | |
| echo "WARNING: Process ${pid} still exists: '$( ps -p ${pid} --no-heading -f )' !!!" | |
| echo "" | |
| fi | |
| fi | |
| else | |
| echo " * dry run, did NOT terminate or kill process ${pid}." | |
| fi | |
| done | |
| echo "" | |
| done | |
| # Explanatory message if all processed terminated themselves | |
| if ${FOUND_ORPHANED_PROCESSES_ON_1ST_RUN} && ! ${FOUND_ORPHANED_PROCESSES_ON_2ND_RUN} ; then | |
| echo "none (all initially found processes have terminated themselves)!" | |
| echo "" | |
| fi | |
| fi | |
| echo "===============================================================================" | |
| if ${FOUND_ORPHANED_PROCESSES_ON_1ST_RUN} && ${FOUND_ORPHANED_PROCESSES_ON_2ND_RUN} ; then | |
| echo "SUMMARY: Orphaned job processes were found!" | |
| echo "=============================================" | |
| echo "" | |
| if ${FAIL_IF_ORPHANDED_PROCESSES_FOUND} ; then | |
| exit 1 | |
| fi | |
| else | |
| echo "SUMMARY: No orphaned job processes found, ok." | |
| echo "===============================================================================" | |
| echo "" | |
| fi |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment