Last active
August 5, 2020 14:12
-
-
Save bvanhou/0c98a50217d7917f0ac1d4e9d3a8fab5 to your computer and use it in GitHub Desktop.
Graceful Shutdown for CircleCI Nomad Clients
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // const AWS = require('aws-sdk'); | |
| import * as AWS from 'aws-sdk'; | |
| import * as async from 'async'; | |
| const as = new AWS.AutoScaling(); | |
| const ssm = new AWS.SSM(); | |
| const documentName = 'CircleCiDrainNodes' ; //n ame of the document to be executed on nodes | |
| const { | |
| s3bucket, | |
| region | |
| } = process.env; | |
| export const handler = async (notification, _context) => { | |
| console.log("INFO: request Recieved.\nDetails:\n", JSON.stringify(notification)); | |
| const message = JSON.parse(notification.Records[0].Sns.Message); | |
| console.log("DEBUG: SNS message contents. \nMessage:\n", message); | |
| const instanceId = message.EC2InstanceId; | |
| console.log(instanceId); | |
| let lifecycleParams = { | |
| "AutoScalingGroupName": message.AutoScalingGroupName, | |
| "LifecycleHookName": message.LifecycleHookName, | |
| "LifecycleActionToken": message.LifecycleActionToken, | |
| "LifecycleActionResult": "CONTINUE" | |
| }; | |
| executeCommand(instanceId, lifecycleParams, _context); | |
| }; | |
| const wait = () => { | |
| return new Promise((resolve, reject) => { | |
| setTimeout(() => resolve(""), 2000) | |
| }); | |
| } | |
| const executeCommand = (nodename, lifecycleParams, context) =>{ | |
| const ssmparams = { | |
| DocumentName: documentName, | |
| Comment: 'Draining Nomad Node', //any comment | |
| OutputS3BucketName: s3bucket, //save the logs in this bucket | |
| OutputS3KeyPrefix: 'ssm-nomad-logs', //bucket prefix | |
| OutputS3Region: region, //region of bucket | |
| InstanceIds: [nodename], | |
| Parameters: { | |
| 'nodename': [ | |
| nodename | |
| ] | |
| } | |
| }; | |
| ssm.sendCommand(ssmparams, function(err, data) { | |
| if (err) console.log(err, err.stack); | |
| else { | |
| console.log(data); | |
| let commandid = data.Command.CommandId; | |
| waitCommandSuccess(commandid, function waitCommandReadyCallback(err) { | |
| if (err) { | |
| console.log("ERROR: Failure waiting for Command to be Success"); | |
| console.log(err); | |
| recordLifecycleActionHeartbeat(lifecycleParams, function lifecycleActionResponseHandler(err) { | |
| if (err) { | |
| context.fail(); | |
| } else { | |
| //if we successfully notified AutoScaling of the instance status, tell lambda we succeeded | |
| //even if the operation on the instance failed | |
| context.succeed(); | |
| } | |
| }); | |
| } else { | |
| console.log("Command Status is Success"); | |
| completeAsLifecycleAction(lifecycleParams, function lifecycleActionResponseHandler(err) { | |
| if (err) { | |
| context.fail(); | |
| } else { | |
| //if we successfully notified AutoScaling of the instance status, tell lambda we succeeded | |
| //even if the operation on the instance failed | |
| context.succeed(); | |
| } | |
| }); | |
| } | |
| }); | |
| } | |
| }); | |
| } | |
| const waitCommandSuccess = (commandid, waitCommandReadyCallback) => { | |
| var commandStatus = undefined; | |
| async.until( | |
| function isSuccess(err) { | |
| return commandStatus === "Success"; | |
| }, | |
| function getCommandStatus(getCommandStatusCallback) { | |
| ssm.listCommands({ | |
| CommandId: commandid | |
| }, function(err, data) { | |
| if (err) console.log(err, err.stack); | |
| else { | |
| console.log(data.Commands[0].Status); | |
| commandStatus = data.Commands[0].Status; | |
| wait() | |
| getCommandStatusCallback(err) | |
| } | |
| }); | |
| }, | |
| function waitCommandReadyCallbackClosure(err) { | |
| if (err) { | |
| console.log("ERROR: error waiting for Command to be success:\n", err); | |
| } | |
| waitCommandReadyCallback(err); | |
| } | |
| ); | |
| } | |
| const recordLifecycleActionHeartbeat = (lifecycleParams, callback) => { | |
| //returns true on success or false on failure | |
| //notifies AutoScaling that it should either continue or abandon the instance | |
| as.recordLifecycleActionHeartbeat(lifecycleParams, function(err, data) { | |
| if (err) { | |
| console.log("ERROR: AS lifecycle completion failed.\nDetails:\n", err); | |
| console.log("DEBUG: CompleteLifecycleAction\nParams:\n", lifecycleParams); | |
| callback(err); | |
| } else { | |
| console.log("INFO: CompleteLifecycleAction Successful.\nReported:\n", data); | |
| callback(null); | |
| } | |
| }); | |
| } | |
| const completeAsLifecycleAction = (lifecycleParams, callback) => { | |
| //returns true on success or false on failure | |
| //notifies AutoScaling that it should either continue or abandon the instance | |
| as.completeLifecycleAction(lifecycleParams, function(err, data) { | |
| if (err) { | |
| console.log("ERROR: AS lifecycle completion failed.\nDetails:\n", err); | |
| console.log("DEBUG: CompleteLifecycleAction\nParams:\n", lifecycleParams); | |
| callback(err); | |
| } else { | |
| console.log("INFO: CompleteLifecycleAction Successful.\nReported:\n", data); | |
| callback(null); | |
| } | |
| }); | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "schemaVersion": "1.2", | |
| "description": "Draining Node", | |
| "parameters":{ | |
| "nodename":{ | |
| "type":"String", | |
| "description":"Specify the Node name to drain" | |
| } | |
| }, | |
| "runtimeConfig": { | |
| "aws:runShellScript": { | |
| "properties": [ | |
| { | |
| "id": "0.aws:runShellScript", | |
| "runCommand": [ | |
| "#!/bin/bash", | |
| "nomad node drain -enable -self -y", | |
| "isEligible=$(nomad node-status -self -json | jq '.SchedulingEligibility | contains (\"ineligible\")')", | |
| "if (( ${isEligible} == true )) ; then exit 0 ; else exit 129; fi" | |
| ] | |
| } | |
| ] | |
| } | |
| } | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # resource "aws_sqs_queue" "circleci_graceful_termination_autoscale" { | |
| # name = "circleci_graceful_termination_autoscale" | |
| # } | |
| resource "aws_lambda_permission" "with_sns" { | |
| statement_id = "AllowExecutionFromSNS" | |
| action = "lambda:InvokeFunction" | |
| function_name = "${aws_lambda_function.circleci_graceful_shutdown.function_name}" | |
| principal = "sns.amazonaws.com" | |
| source_arn = "${aws_sns_topic.circleci_graceful_termination_autoscale.arn}" | |
| } | |
| resource "aws_sns_topic" "circleci_graceful_termination_autoscale" { | |
| name = "circleci_graceful_termination_autoscale" | |
| } | |
| resource "aws_iam_role" "circleci_autoscaling_role" { | |
| name = "circleci_autoscaling_role" | |
| assume_role_policy = <<EOF | |
| { | |
| "Version": "2012-10-17", | |
| "Statement": [ | |
| { | |
| "Sid": "", | |
| "Effect": "Allow", | |
| "Principal": { | |
| "Service": ["autoscaling.amazonaws.com","lambda.amazonaws.com"] | |
| }, | |
| "Action": "sts:AssumeRole" | |
| } | |
| ] | |
| } | |
| EOF | |
| } | |
| resource "aws_iam_role_policy" "lifecycle_hook_autoscaling_policy" { | |
| name = "lifecycle_hook_autoscaling_policy" | |
| role = "${aws_iam_role.circleci_autoscaling_role.id}" | |
| policy = <<EOF | |
| { | |
| "Version": "2012-10-17", | |
| "Statement": [ | |
| { | |
| "Sid": "Stmt1436380187000", | |
| "Effect": "Allow", | |
| "Action": [ | |
| "sns:Publish" | |
| ], | |
| "Resource": [ | |
| "*" | |
| ] | |
| } | |
| ] | |
| } | |
| EOF | |
| } | |
| resource "aws_autoscaling_lifecycle_hook" "graceful_shutdown_asg_hook" { | |
| name = "graceful_shutdown_asg" | |
| autoscaling_group_name = "${aws_autoscaling_group.clients_asg[0].name}" | |
| default_result = "CONTINUE" | |
| heartbeat_timeout = 3600 | |
| lifecycle_transition = "autoscaling:EC2_INSTANCE_TERMINATING" | |
| notification_target_arn = "${aws_sns_topic.circleci_graceful_termination_autoscale.arn}" | |
| role_arn = "${aws_iam_role.circleci_autoscaling_role.arn}" | |
| } | |
| // Schedule Lifecycle Action | |
| resource "aws_autoscaling_schedule" "graceful_shutdown_action" { | |
| scheduled_action_name = "Nomad Graceful Shutdown Action" | |
| min_size = 0 | |
| max_size = 1 | |
| desired_capacity = 0 | |
| start_time = "2020-07-28T18:00:00Z" | |
| end_time = "2025-07-28T18:00:00Z" | |
| recurrence = "0 0 * * Fri" | |
| autoscaling_group_name = "${aws_autoscaling_group.clients_asg[0].name}" | |
| } | |
| resource "aws_lambda_function" "circleci_graceful_shutdown" { | |
| filename = "${path.root}/files/circleci-nomad-autoscaling.zip" | |
| function_name = "circleci-graceful-shutdown" | |
| role = "${aws_iam_role.circleci_autoscaling_role.arn}" | |
| handler = "index.handler" | |
| # The filebase64sha256() function is available in Terraform 0.11.12 and later | |
| # For Terraform 0.11.11 and earlier, use the base64sha256() function and the file() function: | |
| # source_code_hash = "${base64sha256(file("circleci-nomad-autoscaling.zip"))}" | |
| source_code_hash = "${filebase64sha256("${path.root}/files/circleci-nomad-autoscaling.zip")}" | |
| runtime = "nodejs12.x" | |
| environment { | |
| variables = { | |
| s3_bucket = "" | |
| region = "us-east-2" | |
| } | |
| } | |
| } | |
| resource "aws_sns_topic_subscription" "circleci_graceful_termination_subscription" { | |
| topic_arn = "${aws_sns_topic.circleci_graceful_termination_autoscale.arn}" | |
| protocol = "lambda" | |
| endpoint = "${aws_lambda_function.circleci_graceful_shutdown.arn}" | |
| } | |
| output "sns_topic_arn" { | |
| value = "${aws_sns_topic.circleci_graceful_termination_autoscale.arn}" | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment