#!/usr/bin/env python3 """ Module to act as a Prometheus Exporter for Docker containers with a healthcheck configured """ import argparse import os import os.path import sys from http.server import HTTPServer from typing import Any import docker # type: ignore[import] import docker.errors # type: ignore[import] import numpy from docker.models.containers import Container # type: ignore[import] from prometheus_client import CollectorRegistry # type: ignore[import] from prometheus_client import Gauge, MetricsHandler, generate_latest LISTEN_PORT = int(os.environ.get("DOCKSTAT_LISTEN_PORT", 8080)) HEALTHY_STR = "healthy" class HTTPHandler(MetricsHandler): # type: ignore[misc] """ Class to encompass the requirements of a Prometheus Exporter for Docker containers with a healthcheck configured """ def __init__(self, *args: Any, **kwargs: Any): self.docker_api: docker.APIClient = docker.APIClient() self.docker_client = docker.from_env() super().__init__(*args, **kwargs) # Override built-in method def do_GET(self) -> None: """ Handle GET requests """ if self.path == "/metrics": try: self._metrics() except docker.errors.NotFound: pass if self.path == "/healthcheck": if not healthy(): print("ERROR: Check requirements") self._respond(500, "ERR") self._respond(200, "OK") def _respond(self, status: int, message: str) -> None: """ Output a simple HTTP status and string to the client e.g. 200 OK Args: status (int): HTTP status to output message (str): String to output """ self.send_response(int(status) or 500) self.end_headers() try: self.wfile.write(bytes(str(message).encode())) except BrokenPipeError: pass def _metrics(self) -> None: """ Handle the request for metrics """ if not healthy: print("ERROR: Check requirements") self._respond(500, "Server not configured correctly") return registry = CollectorRegistry() health_gauge = Gauge( "container_inspect_state_health_status", "Container's healthcheck value (binary)", labelnames=["id", "name", "value"], registry=registry, ) status_gauge = Gauge( "container_inspect_state_running", "Container's running state (binary)", labelnames=["id", "name"], registry=registry, ) started_at_gauge = Gauge( "container_inspect_state_started_at", "Container's start time (int)", labelnames=["id", "name"], registry=registry, ) exit_code_gauge = Gauge( "container_inspect_state_exit_code", "Container's exit code (int)", labelnames=["id", "name"], registry=registry, ) alert_threshold_gauge = Gauge( "container_inspect_downtime_alert_threshold", "Container's downtime alert threshold in seconds (int)", labelnames=["id", "name"], registry=registry, ) container: Container for container in self.docker_client.containers.list(all=True): # type: ignore try: data = self.docker_api.inspect_container(container.id) except docker.errors.NotFound: print(f"WARNING: Container {container.id} does not exist. Skipping.") continue running = bool(data["State"]["Running"]) started_at = data["State"]["StartedAt"] exit_code = int(data["State"]["ExitCode"]) alert_threshold = int( data["Config"]["Labels"].get("io.prometheus.alert.downtime", 3600) ) starttime = numpy.datetime64(started_at, "s").astype("long") status_gauge.labels( container.id, container.name, ).set(int(running)) started_at_gauge.labels( container.id, container.name, ).set(starttime) exit_code_gauge.labels( container.id, container.name, ).set(int(exit_code)) alert_threshold_gauge.labels( container.id, container.name, ).set(alert_threshold) try: health_str = data["State"]["Health"]["Status"] health_gauge.labels( container.id, container.name, health_str, ).set(int(health_str == HEALTHY_STR)) except KeyError: pass self._respond(200, generate_latest(registry).decode()) def healthy() -> bool: """ Simple funtion to return if all the requirements are met Returns: bool: True if healthy or False if unhealthy """ return all( [ os.path.exists("/var/run/docker.sock"), ] ) if __name__ == "__main__": def cli_parse() -> argparse.Namespace: """ Parse the CLI Returns: argparse.Namespace: Arguments from the CLI """ parser = argparse.ArgumentParser() parser.add_argument( "-H", "--healthcheck", action="store_true", help="Simply exit with 0 for healthy or 1 when unhealthy", ) return parser.parse_args() def main() -> int: """ main() """ args: argparse.Namespace = cli_parse() if args.healthcheck: # Invert the sense of 'healthy' for Unix CLI usage return not healthy() print(f"Starting web server on port {LISTEN_PORT}") try: HTTPServer(("", LISTEN_PORT), HTTPHandler).serve_forever() except KeyboardInterrupt: print("Exiting") return 0 sys.exit(main())