Skip to content

Instantly share code, notes, and snippets.

@bvanhou
Last active August 5, 2020 14:12
Show Gist options
  • Select an option

  • Save bvanhou/0c98a50217d7917f0ac1d4e9d3a8fab5 to your computer and use it in GitHub Desktop.

Select an option

Save bvanhou/0c98a50217d7917f0ac1d4e9d3a8fab5 to your computer and use it in GitHub Desktop.
Graceful Shutdown for CircleCI Nomad Clients
// const AWS = require('aws-sdk');
import * as AWS from 'aws-sdk';
import * as async from 'async';
const as = new AWS.AutoScaling();
const ssm = new AWS.SSM();
const documentName = 'CircleCiDrainNodes' ; //n ame of the document to be executed on nodes
const {
s3bucket,
region
} = process.env;
export const handler = async (notification, _context) => {
console.log("INFO: request Recieved.\nDetails:\n", JSON.stringify(notification));
const message = JSON.parse(notification.Records[0].Sns.Message);
console.log("DEBUG: SNS message contents. \nMessage:\n", message);
const instanceId = message.EC2InstanceId;
console.log(instanceId);
let lifecycleParams = {
"AutoScalingGroupName": message.AutoScalingGroupName,
"LifecycleHookName": message.LifecycleHookName,
"LifecycleActionToken": message.LifecycleActionToken,
"LifecycleActionResult": "CONTINUE"
};
executeCommand(instanceId, lifecycleParams, _context);
};
const wait = () => {
return new Promise((resolve, reject) => {
setTimeout(() => resolve(""), 2000)
});
}
const executeCommand = (nodename, lifecycleParams, context) =>{
const ssmparams = {
DocumentName: documentName,
Comment: 'Draining Nomad Node', //any comment
OutputS3BucketName: s3bucket, //save the logs in this bucket
OutputS3KeyPrefix: 'ssm-nomad-logs', //bucket prefix
OutputS3Region: region, //region of bucket
InstanceIds: [nodename],
Parameters: {
'nodename': [
nodename
]
}
};
ssm.sendCommand(ssmparams, function(err, data) {
if (err) console.log(err, err.stack);
else {
console.log(data);
let commandid = data.Command.CommandId;
waitCommandSuccess(commandid, function waitCommandReadyCallback(err) {
if (err) {
console.log("ERROR: Failure waiting for Command to be Success");
console.log(err);
recordLifecycleActionHeartbeat(lifecycleParams, function lifecycleActionResponseHandler(err) {
if (err) {
context.fail();
} else {
//if we successfully notified AutoScaling of the instance status, tell lambda we succeeded
//even if the operation on the instance failed
context.succeed();
}
});
} else {
console.log("Command Status is Success");
completeAsLifecycleAction(lifecycleParams, function lifecycleActionResponseHandler(err) {
if (err) {
context.fail();
} else {
//if we successfully notified AutoScaling of the instance status, tell lambda we succeeded
//even if the operation on the instance failed
context.succeed();
}
});
}
});
}
});
}
const waitCommandSuccess = (commandid, waitCommandReadyCallback) => {
var commandStatus = undefined;
async.until(
function isSuccess(err) {
return commandStatus === "Success";
},
function getCommandStatus(getCommandStatusCallback) {
ssm.listCommands({
CommandId: commandid
}, function(err, data) {
if (err) console.log(err, err.stack);
else {
console.log(data.Commands[0].Status);
commandStatus = data.Commands[0].Status;
wait()
getCommandStatusCallback(err)
}
});
},
function waitCommandReadyCallbackClosure(err) {
if (err) {
console.log("ERROR: error waiting for Command to be success:\n", err);
}
waitCommandReadyCallback(err);
}
);
}
const recordLifecycleActionHeartbeat = (lifecycleParams, callback) => {
//returns true on success or false on failure
//notifies AutoScaling that it should either continue or abandon the instance
as.recordLifecycleActionHeartbeat(lifecycleParams, function(err, data) {
if (err) {
console.log("ERROR: AS lifecycle completion failed.\nDetails:\n", err);
console.log("DEBUG: CompleteLifecycleAction\nParams:\n", lifecycleParams);
callback(err);
} else {
console.log("INFO: CompleteLifecycleAction Successful.\nReported:\n", data);
callback(null);
}
});
}
const completeAsLifecycleAction = (lifecycleParams, callback) => {
//returns true on success or false on failure
//notifies AutoScaling that it should either continue or abandon the instance
as.completeLifecycleAction(lifecycleParams, function(err, data) {
if (err) {
console.log("ERROR: AS lifecycle completion failed.\nDetails:\n", err);
console.log("DEBUG: CompleteLifecycleAction\nParams:\n", lifecycleParams);
callback(err);
} else {
console.log("INFO: CompleteLifecycleAction Successful.\nReported:\n", data);
callback(null);
}
});
}
{
"schemaVersion": "1.2",
"description": "Draining Node",
"parameters":{
"nodename":{
"type":"String",
"description":"Specify the Node name to drain"
}
},
"runtimeConfig": {
"aws:runShellScript": {
"properties": [
{
"id": "0.aws:runShellScript",
"runCommand": [
"#!/bin/bash",
"nomad node drain -enable -self -y",
"isEligible=$(nomad node-status -self -json | jq '.SchedulingEligibility | contains (\"ineligible\")')",
"if (( ${isEligible} == true )) ; then exit 0 ; else exit 129; fi"
]
}
]
}
}
}
# resource "aws_sqs_queue" "circleci_graceful_termination_autoscale" {
# name = "circleci_graceful_termination_autoscale"
# }
resource "aws_lambda_permission" "with_sns" {
statement_id = "AllowExecutionFromSNS"
action = "lambda:InvokeFunction"
function_name = "${aws_lambda_function.circleci_graceful_shutdown.function_name}"
principal = "sns.amazonaws.com"
source_arn = "${aws_sns_topic.circleci_graceful_termination_autoscale.arn}"
}
resource "aws_sns_topic" "circleci_graceful_termination_autoscale" {
name = "circleci_graceful_termination_autoscale"
}
resource "aws_iam_role" "circleci_autoscaling_role" {
name = "circleci_autoscaling_role"
assume_role_policy = <<EOF
{
"Version": "2012-10-17",
"Statement": [
{
"Sid": "",
"Effect": "Allow",
"Principal": {
"Service": ["autoscaling.amazonaws.com","lambda.amazonaws.com"]
},
"Action": "sts:AssumeRole"
}
]
}
EOF
}
resource "aws_iam_role_policy" "lifecycle_hook_autoscaling_policy" {
name = "lifecycle_hook_autoscaling_policy"
role = "${aws_iam_role.circleci_autoscaling_role.id}"
policy = <<EOF
{
"Version": "2012-10-17",
"Statement": [
{
"Sid": "Stmt1436380187000",
"Effect": "Allow",
"Action": [
"sns:Publish"
],
"Resource": [
"*"
]
}
]
}
EOF
}
resource "aws_autoscaling_lifecycle_hook" "graceful_shutdown_asg_hook" {
name = "graceful_shutdown_asg"
autoscaling_group_name = "${aws_autoscaling_group.clients_asg[0].name}"
default_result = "CONTINUE"
heartbeat_timeout = 3600
lifecycle_transition = "autoscaling:EC2_INSTANCE_TERMINATING"
notification_target_arn = "${aws_sns_topic.circleci_graceful_termination_autoscale.arn}"
role_arn = "${aws_iam_role.circleci_autoscaling_role.arn}"
}
// Schedule Lifecycle Action
resource "aws_autoscaling_schedule" "graceful_shutdown_action" {
scheduled_action_name = "Nomad Graceful Shutdown Action"
min_size = 0
max_size = 1
desired_capacity = 0
start_time = "2020-07-28T18:00:00Z"
end_time = "2025-07-28T18:00:00Z"
recurrence = "0 0 * * Fri"
autoscaling_group_name = "${aws_autoscaling_group.clients_asg[0].name}"
}
resource "aws_lambda_function" "circleci_graceful_shutdown" {
filename = "${path.root}/files/circleci-nomad-autoscaling.zip"
function_name = "circleci-graceful-shutdown"
role = "${aws_iam_role.circleci_autoscaling_role.arn}"
handler = "index.handler"
# The filebase64sha256() function is available in Terraform 0.11.12 and later
# For Terraform 0.11.11 and earlier, use the base64sha256() function and the file() function:
# source_code_hash = "${base64sha256(file("circleci-nomad-autoscaling.zip"))}"
source_code_hash = "${filebase64sha256("${path.root}/files/circleci-nomad-autoscaling.zip")}"
runtime = "nodejs12.x"
environment {
variables = {
s3_bucket = ""
region = "us-east-2"
}
}
}
resource "aws_sns_topic_subscription" "circleci_graceful_termination_subscription" {
topic_arn = "${aws_sns_topic.circleci_graceful_termination_autoscale.arn}"
protocol = "lambda"
endpoint = "${aws_lambda_function.circleci_graceful_shutdown.arn}"
}
output "sns_topic_arn" {
value = "${aws_sns_topic.circleci_graceful_termination_autoscale.arn}"
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment