Skip to content

Instantly share code, notes, and snippets.

@b1uec0in
Last active January 27, 2022 02:59
Show Gist options
  • Select an option

  • Save b1uec0in/4a4c64774cd6364feb42eda7c6551fae to your computer and use it in GitHub Desktop.

Select an option

Save b1uec0in/4a4c64774cd6364feb42eda7c6551fae to your computer and use it in GitHub Desktop.
List kubernates node resource usage and sort by label.
# -*- coding: utf-8 -*-
"""
Usage:
# alias kres='python ~/environment/k8s_list_node_resource_usage.py'
# kres --all
node_name nodegroup instance_type cpu cpu_req (%) cpu_used (%) mem mem_req (%) mem_used (%) disk disk_used (%) mem_req/used mem_req/cpu mem_used/cpu
ip-10-121-3-44.ap-northeast-2.compute.internal base m5.large 2 600m 31% 306m 15% 8G 0.3G 3% 1.9G 26% 300.0G 6.5G 2% 13% 0.1 1.0
ip-10-121-3-153.ap-northeast-2.compute.internal be m5.2xlarge 8 5900m 74% 322m 4% 30G 8.8G 29% 5.7G 18% 300.0G 9.9G 3% 153% 1.1 0.7
ip-10-121-3-49.ap-northeast-2.compute.internal be m5.2xlarge 8 5900m 74% 262m 3% 30G 8.8G 29% 5.7G 19% 300.0G 8.9G 3% 153% 1.1 0.7
ip-10-121-3-216.ap-northeast-2.compute.internal fe m5.large 2 400m 20% 323m 16% 8G 0.1G 1% 1.6G 21% 300.0G 21.0G 7% 6% 0.0 0.8
ip-10-121-3-86.ap-northeast-2.compute.internal fe m5.large 2 400m 20% 313m 16% 8G 0.1G 1% 1.7G 22% 300.0G 21.0G 7% 5% 0.0 0.8
"""
import argparse
import json
import logging
import os
import re
import subprocess
from logging import getLogger
from typing import List, Union, Any, Dict
logger = getLogger()
def shell(cmds: Union[List[str], str], debug=False, output=False) -> str:
if isinstance(cmds, str):
cmds = cmds.split(' ')
if debug:
logger.info(f"SHELL: {' '.join(cmds)}'")
process = subprocess.Popen(cmds, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
result = ''
while process.stdout.readable():
line = process.stdout.readline()
if not line:
break
line = line.decode('utf-8')
if debug or output:
print(line.strip('\r\n'))
result += line
return result
def print_table(table: List[List[Any]], ljustcols: List[int] = None):
ljustcols = ljustcols or []
widths = [max(map(lambda x: len(str(x)), col)) for col in zip(*table)]
result = ''
for row in table:
result += ' '.join(val.ljust(width) if col_idx in ljustcols else val.rjust(width) for col_idx, (val, width) in
enumerate(zip(row, widths))) + '\n'
print(result)
_UNIT_PATTERN = re.compile(r'^(?P<number>[0-9]+\.?[0-9]*)(?P<unit>\S*)')
def unit(value: Any, unit: str = '', flen: int = 0) -> str:
value = str(value)
m = re.match(_UNIT_PATTERN, value)
if m:
number = float(m.group('number'))
value_unit = m.group('unit')
if not value_unit:
pass
elif value_unit == 'Ki':
number = number * 1024
elif value_unit == 'Mi':
number = number * 1024 * 1024
elif value_unit == 'Gi':
number = number * 1024 * 1024 * 1024
elif value_unit == 'n':
number = number / 1000.0 / 1000.0 / 1000.0
elif value_unit == 'u':
number = number / 1000.0 / 1000.0 / 1000.0 / 1000.0
elif value_unit == 'm':
number = number / 1000.0
else:
return str(value)
divider = 1.0
if unit == 'G':
divider *= 1024 * 1024 * 1024
if unit == 'm':
divider /= 1000.0
if flen == 0:
return f'{round(number / divider)}{unit}'
return f'{round(number / divider * pow(10, flen)) * 1.0 / pow(10, flen)}{unit}'
return str(value)
def list_node_resource_usage(sort_label: str, include_disk_info: bool):
logging.info('Loading node info...')
describe_node = shell('kubectl describe node')
node_infos: Dict[str, Dict[str, Any]] = {}
if not sort_label:
sort_label = 'sort_label'
node_name = None
section_name = None
_SECTION_PATTERN = re.compile(r'^(?P<section_name>\S[^:]+):\s*(?P<section_value>.*)')
for line in describe_node.split('\n'):
m = re.match(_SECTION_PATTERN, line)
if m:
section_name = m.group('section_name')
section_value = m.group('section_value').strip()
if section_name == 'Name':
node_name = section_value
node_infos[node_name] = {'sections': {}}
node_infos[node_name]['sections'][section_name] = []
if section_value:
node_infos[node_name]['sections'][section_name].append(section_value.strip())
else:
node_infos[node_name]['sections'][section_name].append(line.strip())
# add label
for node_name, node_info in node_infos.items():
labels = {}
node_info['Labels'] = labels
for line in node_info['sections']['Labels']:
cols = [word.strip() for word in line.split('=')]
labels[cols[0]] = cols[1]
# sort node info by label
if sort_label and sort_label != 'sort_label':
node_infos = {k: v for k, v in sorted(node_infos.items(), key=lambda item: item[1]['Labels'][sort_label])}
# section_line - "Key: Value"
for node_name, node_info in node_infos.items():
for section_name, lines in node_info['sections'].items():
if section_name in ['Annotations', 'Lease', 'Addresses', 'Capacity', 'Allocatable', 'System Info']:
items = {}
node_info[section_name] = items
for line in lines:
cols = [word.strip() for word in line.split(':')]
items[cols[0]] = cols[1]
# requested cpu, memory
_ALLOCATED_RESOURCE_PATTERN = re.compile(
r'(?P<name>\S+)\s+(?P<request>\S+)\s+(?:\((?P<request_percent>\d+)\%\))?\s+(?P<limit>\S+)\s+(?:\((?P<limt_percent>\d+)\%\))?')
for node_name, node_info in node_infos.items():
for line in node_info['sections']['Allocated resources']:
m = re.match(_ALLOCATED_RESOURCE_PATTERN, line)
if m:
node_info[f"{m.group('name')}_req"] = m.group('request')
node_info[f"{m.group('name')}_req_percent"] = m.group('request_percent')
# memory 2G normalize
for node_name, node_info in node_infos.items():
node_info['memory_norm'] = round(
float(unit(node_info['Capacity']['memory'])) / 1024 / 1024 / 1024 / 2) * 1024 * 1024 * 1024 * 2
logger.info('Loading node metrics info...')
node_metrics = json.loads(shell('kubectl get --raw /apis/metrics.k8s.io/v1beta1/nodes/'))
for node_metric in node_metrics['items']:
node_name = node_metric['metadata']['name']
node_info = node_infos[node_name]
node_info['cpu_used'] = node_metric['usage']['cpu']
node_info['cpu_used_percent'] = str(
round(float(unit(node_info['cpu_used'], flen=3)) / float(unit(node_info['Capacity']['cpu'])) * 100))
node_info['mem_used'] = node_metric['usage']['memory']
node_info['mem_used_percent'] = str(
round(float(unit(node_info['mem_used'])) / float(unit(node_info['Capacity']['memory'])) * 100))
if include_disk_info:
logger.info('Loading kube-proxy daemonset info...')
kube_proxies = json.loads(shell('kubectl get pod -n kube-system -l k8s-app=kube-proxy -o json'))
for kube_proxy in kube_proxies['items']:
node_name = kube_proxy['spec']['nodeName']
node_info = node_infos[node_name]
pod_name = kube_proxy['metadata']['name']
logger.info(f"Loading disk info: {node_name}")
disk_free_text = shell(f'kubectl exec -n kube-system {pod_name} -- df -B 1')
for line in disk_free_text.split('\n'):
columns = line.split()
if columns[5] == '/':
node_info.update({
'disk_size': columns[1],
'disk_used': columns[2],
'disk_used_percent': round(float(columns[2]) / float(columns[1]) * 100),
# sort_label: node_info['Labels'].get(sort_label, '')
})
break
headers = [
'node_name', sort_label.split('/')[-1], 'instance_type',
'cpu', 'cpu_req', '(%)', 'cpu_used', '(%)',
'mem', 'mem_req', '(%)', 'mem_used', '(%)',
]
if include_disk_info:
headers += ['disk', 'disk_used', '(%)', ]
headers += ['mem_req/used', 'mem_req/cpu', 'mem_used/cpu', ]
table = [headers]
for node_name, node_info in node_infos.items():
mem_req = float(unit(node_info.get('memory_req')))
mem_used = float(unit(node_info.get('mem_used')))
cpu_mem_req_ratio = mem_req / 1024 / 1024 / 1024 / float(node_info['Capacity']['cpu'])
mem_req_used_ratio = mem_req / mem_used
cpu_mem_used_ratio = float(unit(node_info.get('mem_used'))) / 1024 / 1024 / 1024 / float(
node_info['Capacity']['cpu'])
columns = [
node_name,
node_info['Labels'].get(sort_label, ''),
node_info['Labels'].get('beta.kubernetes.io/instance-type', ''),
node_info['Capacity']['cpu'],
node_info['cpu_req'],
f"{node_info['cpu_req_percent']}%",
unit(node_info['cpu_used'], 'm'),
f"{node_info['cpu_used_percent']}%",
unit(node_info['memory_norm'], 'G'),
unit(node_info.get('memory_req'), 'G', 1),
f"{node_info['memory_req_percent']}%",
unit(node_info.get('mem_used'), 'G', 1),
f"{node_info['mem_used_percent']}%",
]
if include_disk_info:
columns += [
unit(node_info.get('disk_size'), 'G', 1),
unit(node_info.get('disk_used'), 'G', 1),
f"{node_info['disk_used_percent']}%",
]
columns += [
f'{int(mem_req_used_ratio * 100)}%',
f'{cpu_mem_req_ratio:.1f}',
f'{cpu_mem_used_ratio:.1f}',
]
table.append(columns)
print_table(table, ljustcols=[0, 1])
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--all', action='store_true', help='include disk info')
parser.add_argument('--label', type=str, default='eks.amazonaws.com/nodegroup', help='show and sort by specific label')
args = parser.parse_args()
include_disk_info = False
if args.all:
include_disk_info = True
sort_label = args.label
logging.basicConfig(level=os.environ.get("LOGLEVEL", "INFO"), format='%(message)s')
list_node_resource_usage(sort_label=sort_label, include_disk_info=include_disk_info)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment