diff --git a/README.md b/README.md index ee993bb..aba4630 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -[Prometheus](https://prometheus.io/) endpoint to report the healthcheck status of Docker containers. +[Prometheus](https://prometheus.io/) endpoint to report the health check status of Docker containers. # Usage ``` @@ -10,6 +10,7 @@ optional arguments: ``` # Example +## Output ``` curl -qsS localhost:8080/metrics # HELP container_inspect_state_health_status Container's healthcheck value (binary) @@ -18,10 +19,59 @@ container_inspect_state_health_status{id="21ac232f35edc4e630ed0c6b19b828a40df3db container_inspect_state_health_status{id="73a3d19d996de90f15da6cea016d2b1733d0e63bca4d36b0a1bcb2d680d6f108",name="dockstat",value="healthy"} 1.0 container_inspect_state_health_status{id="db14abb41eec0ff06dc11b740b71839aec2b3855192b83a4ba31ee77bd21abfd",name="gotify",value="healthy"} 1.0 container_inspect_state_health_status{id="470e17a15751881cc0787f9aab6f1af000b7bbce7e590d82de987c583425b4ef",name="down-example",value="unhealthy"} 0.0 +# HELP container_inspect_state_running Container's running state (binary) +# TYPE container_inspect_state_running gauge +container_inspect_state_running{id="21ac232f35edc4e630ed0c6b19b828a40df3dbc280c6bcf779b02a1488a741c3",name="alertify"} 1.0 +container_inspect_state_running{id="73a3d19d996de90f15da6cea016d2b1733d0e63bca4d36b0a1bcb2d680d6f108",name="dockstat"} 1.0 +container_inspect_state_running{id="db14abb41eec0ff06dc11b740b71839aec2b3855192b83a4ba31ee77bd21abfd",name="gotify"} 1.0 +container_inspect_state_running{id="470e17a15751881cc0787f9aab6f1af000b7bbce7e590d82de987c583425b4ef",name="down-example"} 0.0 +# HELP container_inspect_state_started_at Container's start time (int) +# TYPE container_inspect_state_started_at gauge +container_inspect_state_started_at{id="21ac232f35edc4e630ed0c6b19b828a40df3dbc280c6bcf779b02a1488a741c3",name="alertify"} 1.631792247e+09 +container_inspect_state_started_at{id="73a3d19d996de90f15da6cea016d2b1733d0e63bca4d36b0a1bcb2d680d6f108",name="dockstat"} 1.631779075e+09 +container_inspect_state_started_at{id="db14abb41eec0ff06dc11b740b71839aec2b3855192b83a4ba31ee77bd21abfd",name="gotify"} 1.631779073e+09 +container_inspect_state_started_at{id="470e17a15751881cc0787f9aab6f1af000b7bbce7e590d82de987c583425b4ef",name="down-example"} 1.631779081e+09 +# HELP container_inspect_state_exit_code Container's exit code (int) +# TYPE container_inspect_state_exit_code gauge +container_inspect_state_exit_code{id="21ac232f35edc4e630ed0c6b19b828a40df3dbc280c6bcf779b02a1488a741c3",name="alertify"} 0.0 +container_inspect_state_exit_code{id="73a3d19d996de90f15da6cea016d2b1733d0e63bca4d36b0a1bcb2d680d6f108",name="dockstat"} 0.0 +container_inspect_state_exit_code{id="db14abb41eec0ff06dc11b740b71839aec2b3855192b83a4ba31ee77bd21abfd",name="gotify"} 0.0 +container_inspect_state_exit_code{id="470e17a15751881cc0787f9aab6f1af000b7bbce7e590d82de987c583425b4ef",name="down-example"} 137.0 +# HELP container_inspect_downtime_alert_threshold Container's downtime alert threshold in seconds (int) +# TYPE container_inspect_downtime_alert_threshold gauge +container_inspect_downtime_alert_threshold{id="21ac232f35edc4e630ed0c6b19b828a40df3dbc280c6bcf779b02a1488a741c3",name="alertify"} 3600.0 +container_inspect_downtime_alert_threshold{id="73a3d19d996de90f15da6cea016d2b1733d0e63bca4d36b0a1bcb2d680d6f108",name="dockstat"} 3600.0 +container_inspect_downtime_alert_threshold{id="db14abb41eec0ff06dc11b740b71839aec2b3855192b83a4ba31ee77bd21abfd",name="gotify"} 3600.0 +container_inspect_downtime_alert_threshold{id="470e17a15751881cc0787f9aab6f1af000b7bbce7e590d82de987c583425b4ef",name="down-example"} 3600.0 +``` + +## Prometheus Alerts +```yaml + - name: "Container status" + rules: + - alert: "Container unhealthy" + expr: container_inspect_state_health_status == 0 + for: 15m + labels: + severity: error + annotations: + summary: "Container unhealthy" + description: "{{ $labels.name }}: {{ $labels.value }}" + + - alert: "Container down" + expr: container_inspect_state_running == 0 and ON(id) time() - container_inspect_state_started_at > container_inspect_downtime_alert_threshold + labels: + severity: critical + annotations: + summary: "Container down" + description: "{{ $labels.name }}: DOWN" ``` # Notes * Requires access to the Docker socket (`/var/run/docker.sock`) +* The port `dockstat` listens on can be changed using the `DOCKSTAT_LISTEN_PORT` environment variable. Default: `8080` + + e.g. `LISTEN_PORT=80 python3 dockstat.py` # Docker ## Build @@ -44,8 +94,7 @@ services: container_name: dockstat environment: - TZ=Europe/London - ports: - - "8080:8080" + - LISTEN_PORT=80 volumes: - /var/run/docker.sock:/var/run/docker.sock:ro restart: unless-stopped diff --git a/dockstat.py b/dockstat.py index 2424291..66adeef 100644 --- a/dockstat.py +++ b/dockstat.py @@ -19,7 +19,7 @@ from prometheus_client import ( # type: ignore generate_latest, ) -LISTEN_PORT = int(os.environ.get('LISTEN_PORT', 8080)) +LISTEN_PORT = int(os.environ.get('DOCKSTAT_LISTEN_PORT', 8080)) HEALTHY_STR = 'healthy' @@ -37,7 +37,7 @@ class HTTPHandler(MetricsHandler): # Override built-in method def do_GET(self): """ - Method to handle GET requests + Handle GET requests """ if self.path == '/metrics': self._metrics() @@ -51,7 +51,13 @@ class HTTPHandler(MetricsHandler): def _respond(self, status: int, message: str): """ - Method to output a simple HTTP status and string to the client + Output a simple HTTP status and string to the client + + e.g. 200 OK + + Args: + status (int): HTTP status to output + message (str): String to output """ self.send_response(int(status) or 500) self.end_headers() @@ -59,7 +65,7 @@ class HTTPHandler(MetricsHandler): def _metrics(self): """ - Method to handle the request for metrics + Handle the request for metrics """ if not healthy: print('ERROR: Check requirements') @@ -142,6 +148,9 @@ class HTTPHandler(MetricsHandler): def healthy() -> bool: """ Simple funtion to return if all the requirements are met + + Returns: + bool: True if healthy or False if unhealthy """ return all( [ @@ -154,7 +163,10 @@ if __name__ == '__main__': def cli_parse() -> argparse.Namespace: """ - Function to parse the CLI + Parse the CLI + + Returns: + argparse.Namespace: Arguments from the CLI """ parser = argparse.ArgumentParser()