Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Select an option

  • Save t0r0X/d2d0ad9adb4a727c2efa to your computer and use it in GitHub Desktop.

Select an option

Save t0r0X/d2d0ad9adb4a727c2efa to your computer and use it in GitHub Desktop.
Kill job processes orphaned by Jenkins
#!/bin/bash
# Gist: https://gist.github.com/t0r0X/d2d0ad9adb4a727c2efa (see fork history for previous versions).
#
# Kill job processes orphaned by Jenkins
#
# The Jenkins "Signal Killer" plugin (https://wiki.jenkins-ci.org/display/JENKINS/Signal+killer) tries to
# solve this issue: https://issues.jenkins-ci.org/browse/JENKINS-17116. But, sometimes processes slip
# through (Timing issue: that plugin handles the process tree of a job, and, between the retrieval of the
# process list of a given Job and the killing of those processes, some new child processes might have been
# spawned, which are not in the kill list.
#
# This script handles those "orphaned" processes, but does not touch Git processes with PPID != 1.
#
# Suggested usage 1:
#
# $ crontab -l
# */5 0 0 0 0 /path/to/kill-processes-orphaned-by-jenkins.sh -u <jenkins_username> -d false -f true 2>&1 | logger
#
# Suggested usage 2:
#
# A Jenkis job for each machine running the Jenkins slaves.
#
# Tested on SLES 11 & 12 (SuSE). Previous script versions tested on Linux Ubuntu 11.04, 12.04 (see fork history).
#
# NOTE: this must run as the Jenkins user or root to see the environment variables (needed to list build URLs)
function usage() {
echo 'Parameters:'
echo ' -u val ; mandatory: username or user ID of Jenkins user'
echo ' -d val ; optional: dry run, true|false, default = true'
echo ' -f val ; optional: fail on found orphaned processes, true|false, default = false'
echo ' -a val ; optional: Jenkins authentication, "<username>:<password/token>"'
echo ' -U val ; optional: Jenkins base URL, needed when multiple instances run on same machine'
echo ''
exit 1
}
function validateNotEmpty() {
if [[ -z "$1" ]] ; then
echo "ERROR: parameter value '$1' is empty!"
usage
fi
}
function validateTrueFalse() {
if [[ "$1" != 'true' && "$1" != 'false' ]] ; then
echo "ERROR: parameter value '$1' is neither 'true' nor 'false'!"
usage
fi
}
JENKINS_USERNAME=
DRYRUN=true
FAIL_IF_ORPHANDED_PROCESSES_FOUND=false
USER_AUTH=
JENKINS_BASE_URL=
while getopts "u:d:f:a:U:" OPT ; do
case "${OPT}" in
u) validateNotEmpty "${OPTARG}"; JENKINS_USERNAME="${OPTARG}" ;;
d) validateTrueFalse "${OPTARG}"; DRYRUN="${OPTARG}" ;;
f) validateTrueFalse "${OPTARG}"; FAIL_IF_ORPHANDED_PROCESSES_FOUND="${OPTARG}" ;;
a) validateNotEmpty "${OPTARG}"; USER_AUTH="-u ${OPTARG}" ;;
U) validateNotEmpty "${OPTARG}"; JENKINS_BASE_URL="${OPTARG}" ;;
*) usage ;;
esac
done
shift ${OPTIND}
# Parameter validation
if [[ -z "${JENKINS_USERNAME}" ]] ; then
usage
fi
echo ""
echo "Current parameters:"
echo " * JENKINS_USERNAME: ${JENKINS_USERNAME}"
echo " * DRYRUN: ${DRYRUN}"
echo " * FAIL_IF_ORPHANDED_PROCESSES_FOUND: ${FAIL_IF_ORPHANDED_PROCESSES_FOUND}"
echo " * USER_AUTH: <$( [ -n "${USER_AUTH}" ] && echo '***CENSORED***' || echo 'NONE' )>"
echo " * JENKINS_BASE_URL: ${JENKINS_BASE_URL}"
echo ""
# 2-phase check
FOUND_ORPHANED_PROCESSES_ON_1ST_RUN=false
FOUND_ORPHANED_PROCESSES_ON_2ND_RUN=false
OK=0
NOK=1
# Array holding URLs found during analysis
declare -a urls
echo "==============================================================================="
echo "Analysis..."
for url in $( ps e -U "${JENKINS_USERNAME}" | grep -o '[B]UILD_URL=[^ ]*' | awk -F'=' '{print $2}' | grep -F "${JENKINS_BASE_URL}" | sort -u ) ; do
API_URL="${url}api/json?tree=building"
echo curl -s "${USER_AUTH}" "${API_URL}"
curl -s "${USER_AUTH}" "${API_URL}"
[ $? -ne ${OK} ] && echo 'ERROR: curl query failed!' && exit 1
echo ""
curl -s "${USER_AUTH}" "${API_URL}" | grep -q '"building":true'
CURL_PIPE_STATUS=(${PIPESTATUS[*]})
CURL_RESULT=${CURL_PIPE_STATUS[0]}
GREP_RESULT=${CURL_PIPE_STATUS[1]}
# If you have a frontend, e.g. Apache, before your Jenkins instance, that frontend might be
# down. Catch that situation, before SIGKILLing innocent processes!
if [ ${CURL_RESULT} -ne ${OK} ] ; then
echo ""
echo "CURL_RESULT: '${CURL_RESULT}', GREP_RESULT: '${GREP_RESULT}'"
echo ""
echo "ERROR: Server API not reachable: ${API_URL}"
echo ""
exit 1
fi
# Ignore self, rarely (immediately after Jenkins restart) triggers false alarms
if [ "${BUILD_URL}" == "${url}" ] ; then
echo " ignoring self: '${url}'"
echo ""
continue
fi
# If process isn't building anymore...
if [ ${GREP_RESULT} -eq ${NOK} ] ; then
# ignore 'git' processes TODO currently there's no way to distinguish a Jenkins initiated git poll from a hanging git process
for pid in $( ps e -U ${JENKINS_USERNAME} | grep "[B]UILD_URL=${url}" | awk '{print $1}' ) ; do
if [[ "$( ps -o comm= ${pid} )" = 'git' && "$( ps -o ppid= ${pid} )" != '1' ]] ; then
echo -e " skip Git process with parent PID not 1, might be a Git poll:\n$( ps l ${pid} )"
echo ""
continue 2
fi
done
# add current URL to end of array
urls[${#urls[@]}]="${url}"
fi
echo ""
done
echo "done."
echo "==============================================================================="
echo ""
if [ ${#urls[@]} -ne 0 ] ; then
echo "collected URLs:"
# iterate over all URLs
for url in "${urls[@]}"; do
echo " ${url}"
done
echo "==============================================================================="
echo ""
fi
# If URLs array is not empty...
if [ ${#urls[@]} -ne 0 ] ; then
FOUND_ORPHANED_PROCESSES_ON_1ST_RUN=true
echo "Orphaned Jenkins job processes:"
echo "==============================="
echo ""
# iterate over all URLs
for url in "${urls[@]}"; do
echo "URL: ${url}"
# iterate over all processes for given URL
for pid in $( ps e -U $JENKINS_USERNAME | grep "[B]UILD_URL=${url}" | awk '{print $1}' ) ; do
FOUND_ORPHANED_PROCESSES_ON_2ND_RUN=true
echo " * $( ps -f -p ${pid} --no-headers )"
if ! $DRYRUN; then
echo " * SIGTERMing process ${pid} ..."
kill ${pid}
sleep 2
if ps -p ${pid} --no-heading -f > /dev/null ; then
echo " * SIGKILLing process ${pid} ..."
kill -9 ${pid}
sleep 2
PS_KILL_VERIFY=$( ps -p ${pid} --no-heading -f )
if ps -p ${pid} --no-heading -f > /dev/null ; then
echo "WARNING: Process ${pid} still exists: '$( ps -p ${pid} --no-heading -f )' !!!"
echo ""
fi
fi
else
echo " * dry run, did NOT terminate or kill process ${pid}."
fi
done
echo ""
done
# Explanatory message if all processed terminated themselves
if ${FOUND_ORPHANED_PROCESSES_ON_1ST_RUN} && ! ${FOUND_ORPHANED_PROCESSES_ON_2ND_RUN} ; then
echo "none (all initially found processes have terminated themselves)!"
echo ""
fi
fi
echo "==============================================================================="
if ${FOUND_ORPHANED_PROCESSES_ON_1ST_RUN} && ${FOUND_ORPHANED_PROCESSES_ON_2ND_RUN} ; then
echo "SUMMARY: Orphaned job processes were found!"
echo "============================================="
echo ""
if ${FAIL_IF_ORPHANDED_PROCESSES_FOUND} ; then
exit 1
fi
else
echo "SUMMARY: No orphaned job processes found, ok."
echo "==============================================================================="
echo ""
fi
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment