dockstat/dockstat.py

212 lines
6 KiB
Python

#!/usr/bin/env python3
"""
Module to act as a Prometheus Exporter for Docker containers with a
healthcheck configured
"""
import argparse
import os
import os.path
import sys
from http.server import HTTPServer
from typing import Any
import docker
import docker.errors
import numpy
from docker.models.containers import Container
from prometheus_client import CollectorRegistry, Gauge, MetricsHandler, generate_latest
LISTEN_PORT = int(os.environ.get("DOCKSTAT_LISTEN_PORT", 8080))
HEALTHY_STR = "healthy"
class HTTPHandler(MetricsHandler):
"""
Class to encompass the requirements of a Prometheus Exporter
for Docker containers with a healthcheck configured
"""
def __init__(self, *args: Any, **kwargs: Any):
self.docker_api: docker.APIClient = docker.APIClient()
self.docker_client = docker.from_env()
super().__init__(*args, **kwargs)
# Override built-in method
def do_GET(self) -> None:
"""
Handle GET requests
"""
if self.path == "/metrics":
try:
self._metrics()
except docker.errors.NotFound:
pass
if self.path == "/healthcheck":
if not healthy():
print("ERROR: Check requirements")
self._respond(500, "ERR")
self._respond(200, "OK")
def _respond(self, status: int, message: str) -> None:
"""
Output a simple HTTP status and string to the client
e.g. 200 OK
Args:
status (int): HTTP status to output
message (str): String to output
"""
self.send_response(int(status) or 500)
self.send_header("content-type", "text/plain")
self.end_headers()
try:
self.wfile.write(bytes(str(message).encode()))
except BrokenPipeError:
pass
def _metrics(self) -> None:
"""
Handle the request for metrics
"""
if not healthy:
print("ERROR: Check requirements")
self._respond(500, "Server not configured correctly")
return
registry = CollectorRegistry()
health_gauge = Gauge(
"container_inspect_state_health_status",
"Container's healthcheck value (binary)",
labelnames=["id", "name", "value"],
registry=registry,
)
status_gauge = Gauge(
"container_inspect_state_running",
"Container's running state (binary)",
labelnames=["id", "name"],
registry=registry,
)
started_at_gauge = Gauge(
"container_inspect_state_started_at",
"Container's start time (int)",
labelnames=["id", "name"],
registry=registry,
)
exit_code_gauge = Gauge(
"container_inspect_state_exit_code",
"Container's exit code (int)",
labelnames=["id", "name"],
registry=registry,
)
alert_threshold_gauge = Gauge(
"container_inspect_downtime_alert_threshold",
"Container's downtime alert threshold in seconds (int)",
labelnames=["id", "name"],
registry=registry,
)
container: Container
for container in self.docker_client.containers.list(all=True):
try:
data = self.docker_api.inspect_container(getattr(container, "id"))
except docker.errors.NotFound:
print(f"WARNING: Container {container.id} does not exist. Skipping.")
continue
running = bool(data["State"]["Running"])
started_at = data["State"]["StartedAt"]
exit_code = int(data["State"]["ExitCode"])
alert_threshold = int(
data["Config"]["Labels"].get("io.prometheus.alert.downtime", 3600)
)
starttime = numpy.datetime64(started_at, "s").astype("long")
status_gauge.labels(
container.id,
container.name,
).set(int(running))
started_at_gauge.labels(
container.id,
container.name,
).set(starttime)
exit_code_gauge.labels(
container.id,
container.name,
).set(int(exit_code))
alert_threshold_gauge.labels(
container.id,
container.name,
).set(alert_threshold)
try:
health_str = data["State"]["Health"]["Status"]
health_gauge.labels(
container.id,
container.name,
health_str,
).set(int(health_str == HEALTHY_STR))
except KeyError:
pass
self._respond(200, generate_latest(registry).decode())
def healthy() -> bool:
"""
Simple funtion to return if all the requirements are met
Returns:
bool: True if healthy or False if unhealthy
"""
return all(
[
os.path.exists("/var/run/docker.sock"),
]
)
if __name__ == "__main__":
def cli_parse() -> argparse.Namespace:
"""
Parse the CLI
Returns:
argparse.Namespace: Arguments from the CLI
"""
parser = argparse.ArgumentParser()
parser.add_argument(
"-H",
"--healthcheck",
action="store_true",
help="Simply exit with 0 for healthy or 1 when unhealthy",
)
return parser.parse_args()
def main() -> int:
"""
main()
"""
args: argparse.Namespace = cli_parse()
if args.healthcheck:
# Invert the sense of 'healthy' for Unix CLI usage
return not healthy()
print(f"Starting web server on port {LISTEN_PORT}")
try:
HTTPServer(("", LISTEN_PORT), HTTPHandler).serve_forever()
except KeyboardInterrupt:
print("Exiting")
return 0
sys.exit(main())