From 15efadbb5d9bb1f4a153a2c8909c8ebd27b31577 Mon Sep 17 00:00:00 2001 From: Scott Wallace Date: Fri, 10 Sep 2021 13:21:44 +0100 Subject: [PATCH] Add stats to enable monitoring for non-running containers --- .gitignore | 2 + dockstat.py | 97 ++++++++++++++++++++++++++++++++++++------------ requirements.txt | 1 + 3 files changed, 77 insertions(+), 23 deletions(-) diff --git a/.gitignore b/.gitignore index 27573b8..496213a 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,3 @@ +.mypy_cache/ .pyenv/ +.vscode/ diff --git a/dockstat.py b/dockstat.py index 62e3343..2424291 100644 --- a/dockstat.py +++ b/dockstat.py @@ -5,19 +5,21 @@ Module to act as a Prometheus Exporter for Docker containers with a """ import argparse +import os import os.path import sys from http.server import HTTPServer -import docker -from prometheus_client import ( +import docker # type: ignore +import numpy +from prometheus_client import ( # type: ignore CollectorRegistry, Gauge, - generate_latest, MetricsHandler, + generate_latest, ) -LISTEN_PORT = 8080 +LISTEN_PORT = int(os.environ.get('LISTEN_PORT', 8080)) HEALTHY_STR = 'healthy' @@ -28,12 +30,11 @@ class HTTPHandler(MetricsHandler): """ def __init__(self, *args, **kwargs): - self.docker_api = docker.APIClient() + self.docker_api: docker.APIClient = docker.APIClient() self.docker_client = docker.from_env() super().__init__(*args, **kwargs) # Override built-in method - # pylint: disable=invalid-name def do_GET(self): """ Method to handle GET requests @@ -48,7 +49,7 @@ class HTTPHandler(MetricsHandler): self._respond(200, 'OK') - def _respond(self, status, message): + def _respond(self, status: int, message: str): """ Method to output a simple HTTP status and string to the client """ @@ -67,60 +68,110 @@ class HTTPHandler(MetricsHandler): registry = CollectorRegistry() - gauge = Gauge( + health_gauge = Gauge( 'container_inspect_state_health_status', "Container's healthcheck value (binary)", labelnames=['id', 'name', 'value'], - registry=registry + registry=registry, + ) + status_gauge = Gauge( + 'container_inspect_state_running', + "Container's running state (binary)", + labelnames=['id', 'name'], + registry=registry, + ) + started_at_gauge = Gauge( + 'container_inspect_state_started_at', + "Container's start time (int)", + labelnames=['id', 'name'], + registry=registry, + ) + exit_code_gauge = Gauge( + 'container_inspect_state_exit_code', + "Container's exit code (int)", + labelnames=['id', 'name'], + registry=registry, + ) + alert_threshold_gauge = Gauge( + 'container_inspect_downtime_alert_threshold', + "Container's downtime alert threshold in seconds (int)", + labelnames=['id', 'name'], + registry=registry, ) - for container in self.docker_client.containers.list(): + for container in self.docker_client.containers.list(all=True): data = self.docker_api.inspect_container(container.id) + running: str = bool(data['State']['Running']) + started_at: int = data['State']['StartedAt'] + exit_code: int = int(data['State']['ExitCode']) + alert_threshold = int( + data['Config']['Labels'].get('io.prometheus.alert.downtime', 3600) + ) + starttime = numpy.datetime64(started_at) + + status_gauge.labels( + container.id, + container.name, + ).set(int(running)) + started_at_gauge.labels(container.id, container.name,).set( + int(int(starttime) / 1000000000) # strip nanoseconds + ) + exit_code_gauge.labels( + container.id, + container.name, + ).set(int(exit_code)) + alert_threshold_gauge.labels( + container.id, + container.name, + ).set(alert_threshold) + try: - health_str = data["State"]["Health"]["Status"] - label_values = [ + health_str: str = data['State']['Health']['Status'] + health_gauge.labels( container.id, container.name, health_str, - ] + ).set(int(health_str == HEALTHY_STR)) except KeyError: pass - else: - gauge.labels(*label_values).set(int(health_str == HEALTHY_STR)) self._respond(200, generate_latest(registry).decode()) -def healthy(): +def healthy() -> bool: """ Simple funtion to return if all the requirements are met """ - return all([ - os.path.exists('/var/run/docker.sock'), - ]) + return all( + [ + os.path.exists('/var/run/docker.sock'), + ] + ) if __name__ == '__main__': - def cli_parse(): + + def cli_parse() -> argparse.Namespace: """ Function to parse the CLI """ parser = argparse.ArgumentParser() parser.add_argument( - '-H', '--healthcheck', + '-H', + '--healthcheck', action='store_true', help='Simply exit with 0 for healthy or 1 when unhealthy', ) return parser.parse_args() - def main(): + def main() -> int: """ main() """ - args = cli_parse() + args: argparse.Namespace = cli_parse() if args.healthcheck: # Invert the sense of 'healthy' for Unix CLI usage diff --git a/requirements.txt b/requirements.txt index 90b4360..6a035d0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ # To ensure app dependencies are ported from your virtual environment/host machine into your container, run 'pip freeze > requirements.txt' in the terminal to overwrite this file docker +numpy prometheus_client