#!/usr/bin/env python3 """ Module to act as a Prometheus Exporter for Docker containers with a healthcheck configured """ import argparse import os import os.path import sys from http.server import HTTPServer from typing import Any import docker import docker.errors import numpy from docker.models.containers import Container from prometheus_client import CollectorRegistry, Gauge, MetricsHandler, generate_latest LISTEN_PORT = int(os.environ.get("DOCKSTAT_LISTEN_PORT", 8080)) HEALTHY_STR = "healthy" class HTTPHandler(MetricsHandler): """ Class to encompass the requirements of a Prometheus Exporter for Docker containers with a healthcheck configured """ def __init__(self, *args: Any, **kwargs: Any): self.docker_api: docker.APIClient = docker.APIClient() self.docker_client = docker.from_env() super().__init__(*args, **kwargs) # Override built-in method def do_GET(self) -> None: """ Handle GET requests """ if self.path == "/metrics": try: self._metrics() except docker.errors.NotFound: pass if self.path == "/healthcheck": if not healthy(): print("ERROR: Check requirements") self._respond(500, "ERR") self._respond(200, "OK") def _respond(self, status: int, message: str) -> None: """ Output a simple HTTP status and string to the client e.g. 200 OK Args: status (int): HTTP status to output message (str): String to output """ self.send_response(int(status) or 500) self.send_header("content-type", "text/plain") self.end_headers() try: self.wfile.write(bytes(str(message).encode())) except BrokenPipeError: pass def _metrics(self) -> None: """ Handle the request for metrics """ if not healthy: print("ERROR: Check requirements") self._respond(500, "Server not configured correctly") return registry = CollectorRegistry() health_gauge = Gauge( "container_inspect_state_health_status", "Container's healthcheck value (binary)", labelnames=["id", "name", "value"], registry=registry, ) status_gauge = Gauge( "container_inspect_state_running", "Container's running state (binary)", labelnames=["id", "name"], registry=registry, ) started_at_gauge = Gauge( "container_inspect_state_started_at", "Container's start time (int)", labelnames=["id", "name"], registry=registry, ) exit_code_gauge = Gauge( "container_inspect_state_exit_code", "Container's exit code (int)", labelnames=["id", "name"], registry=registry, ) alert_threshold_gauge = Gauge( "container_inspect_downtime_alert_threshold", "Container's downtime alert threshold in seconds (int)", labelnames=["id", "name"], registry=registry, ) container: Container for container in self.docker_client.containers.list(all=True): try: data = self.docker_api.inspect_container(getattr(container, "id")) except docker.errors.NotFound: print(f"WARNING: Container {container.id} does not exist. Skipping.") continue running = bool(data["State"]["Running"]) started_at = data["State"]["StartedAt"] exit_code = int(data["State"]["ExitCode"]) alert_threshold = int( data["Config"]["Labels"].get("io.prometheus.alert.downtime", 3600) ) starttime = numpy.datetime64(started_at, "s").astype("long") status_gauge.labels( container.id, container.name, ).set(int(running)) started_at_gauge.labels( container.id, container.name, ).set(starttime) exit_code_gauge.labels( container.id, container.name, ).set(int(exit_code)) alert_threshold_gauge.labels( container.id, container.name, ).set(alert_threshold) try: health_str = data["State"]["Health"]["Status"] health_gauge.labels( container.id, container.name, health_str, ).set(int(health_str == HEALTHY_STR)) except KeyError: pass self._respond(200, generate_latest(registry).decode()) def healthy() -> bool: """ Simple funtion to return if all the requirements are met Returns: bool: True if healthy or False if unhealthy """ return all( [ os.path.exists("/var/run/docker.sock"), ] ) if __name__ == "__main__": def cli_parse() -> argparse.Namespace: """ Parse the CLI Returns: argparse.Namespace: Arguments from the CLI """ parser = argparse.ArgumentParser() parser.add_argument( "-H", "--healthcheck", action="store_true", help="Simply exit with 0 for healthy or 1 when unhealthy", ) return parser.parse_args() def main() -> int: """ main() """ args: argparse.Namespace = cli_parse() if args.healthcheck: # Invert the sense of 'healthy' for Unix CLI usage return not healthy() print(f"Starting web server on port {LISTEN_PORT}") try: HTTPServer(("", LISTEN_PORT), HTTPHandler).serve_forever() except KeyboardInterrupt: print("Exiting") return 0 sys.exit(main())