#!/usr/bin/env python3 """ Module to act as a Prometheus Exporter for Docker containers with a healthcheck configured """ import argparse import os import os.path import sys from http.server import HTTPServer import docker # type: ignore import numpy from prometheus_client import ( # type: ignore CollectorRegistry, Gauge, MetricsHandler, generate_latest, ) LISTEN_PORT = int(os.environ.get('DOCKSTAT_LISTEN_PORT', 8080)) HEALTHY_STR = 'healthy' class HTTPHandler(MetricsHandler): """ Class to encompass the requirements of a Prometheus Exporter for Docker containers with a healthcheck configured """ def __init__(self, *args, **kwargs): self.docker_api: docker.APIClient = docker.APIClient() self.docker_client = docker.from_env() super().__init__(*args, **kwargs) # Override built-in method def do_GET(self): """ Handle GET requests """ if self.path == '/metrics': self._metrics() if self.path == '/healthcheck': if not healthy(): print('ERROR: Check requirements') self._respond(500, 'ERR') self._respond(200, 'OK') def _respond(self, status: int, message: str): """ Output a simple HTTP status and string to the client e.g. 200 OK Args: status (int): HTTP status to output message (str): String to output """ self.send_response(int(status) or 500) self.end_headers() self.wfile.write(bytes(str(message).encode())) def _metrics(self): """ Handle the request for metrics """ if not healthy: print('ERROR: Check requirements') self._respond(500, 'Server not configured correctly') return registry = CollectorRegistry() health_gauge = Gauge( 'container_inspect_state_health_status', "Container's healthcheck value (binary)", labelnames=['id', 'name', 'value'], registry=registry, ) status_gauge = Gauge( 'container_inspect_state_running', "Container's running state (binary)", labelnames=['id', 'name'], registry=registry, ) started_at_gauge = Gauge( 'container_inspect_state_started_at', "Container's start time (int)", labelnames=['id', 'name'], registry=registry, ) exit_code_gauge = Gauge( 'container_inspect_state_exit_code', "Container's exit code (int)", labelnames=['id', 'name'], registry=registry, ) alert_threshold_gauge = Gauge( 'container_inspect_downtime_alert_threshold', "Container's downtime alert threshold in seconds (int)", labelnames=['id', 'name'], registry=registry, ) for container in self.docker_client.containers.list(all=True): data = self.docker_api.inspect_container(container.id) running: str = bool(data['State']['Running']) started_at: int = data['State']['StartedAt'] exit_code: int = int(data['State']['ExitCode']) alert_threshold = int( data['Config']['Labels'].get('io.prometheus.alert.downtime', 3600) ) starttime = numpy.datetime64(started_at, 's').astype('long') status_gauge.labels( container.id, container.name, ).set(int(running)) started_at_gauge.labels( container.id, container.name, ).set(starttime) exit_code_gauge.labels( container.id, container.name, ).set(int(exit_code)) alert_threshold_gauge.labels( container.id, container.name, ).set(alert_threshold) try: health_str: str = data['State']['Health']['Status'] health_gauge.labels( container.id, container.name, health_str, ).set(int(health_str == HEALTHY_STR)) except KeyError: pass self._respond(200, generate_latest(registry).decode()) def healthy() -> bool: """ Simple funtion to return if all the requirements are met Returns: bool: True if healthy or False if unhealthy """ return all( [ os.path.exists('/var/run/docker.sock'), ] ) if __name__ == '__main__': def cli_parse() -> argparse.Namespace: """ Parse the CLI Returns: argparse.Namespace: Arguments from the CLI """ parser = argparse.ArgumentParser() parser.add_argument( '-H', '--healthcheck', action='store_true', help='Simply exit with 0 for healthy or 1 when unhealthy', ) return parser.parse_args() def main() -> int: """ main() """ args: argparse.Namespace = cli_parse() if args.healthcheck: # Invert the sense of 'healthy' for Unix CLI usage return not healthy() print(f'Starting web server on port {LISTEN_PORT}') try: HTTPServer(('', LISTEN_PORT), HTTPHandler).serve_forever() except KeyboardInterrupt: print('Exiting') return 0 sys.exit(main())