102 lines
5 KiB
Markdown
102 lines
5 KiB
Markdown
[Prometheus](https://prometheus.io/) endpoint to report the health check status of Docker containers.
|
|
|
|
# Usage
|
|
```
|
|
usage: dockstat.py [-h] [-H]
|
|
|
|
optional arguments:
|
|
-h, --help show this help message and exit
|
|
-H, --healthcheck Simply exit with 0 for healthy or 1 when unhealthy
|
|
```
|
|
|
|
# Example
|
|
## Output
|
|
```
|
|
curl -qsS localhost:8080/metrics
|
|
# HELP container_inspect_state_health_status Container's healthcheck value (binary)
|
|
# TYPE container_inspect_state_health_status gauge
|
|
container_inspect_state_health_status{id="21ac232f35edc4e630ed0c6b19b828a40df3dbc280c6bcf779b02a1488a741c3",name="alertify",value="healthy"} 1.0
|
|
container_inspect_state_health_status{id="73a3d19d996de90f15da6cea016d2b1733d0e63bca4d36b0a1bcb2d680d6f108",name="dockstat",value="healthy"} 1.0
|
|
container_inspect_state_health_status{id="db14abb41eec0ff06dc11b740b71839aec2b3855192b83a4ba31ee77bd21abfd",name="gotify",value="healthy"} 1.0
|
|
container_inspect_state_health_status{id="470e17a15751881cc0787f9aab6f1af000b7bbce7e590d82de987c583425b4ef",name="down-example",value="unhealthy"} 0.0
|
|
# HELP container_inspect_state_running Container's running state (binary)
|
|
# TYPE container_inspect_state_running gauge
|
|
container_inspect_state_running{id="21ac232f35edc4e630ed0c6b19b828a40df3dbc280c6bcf779b02a1488a741c3",name="alertify"} 1.0
|
|
container_inspect_state_running{id="73a3d19d996de90f15da6cea016d2b1733d0e63bca4d36b0a1bcb2d680d6f108",name="dockstat"} 1.0
|
|
container_inspect_state_running{id="db14abb41eec0ff06dc11b740b71839aec2b3855192b83a4ba31ee77bd21abfd",name="gotify"} 1.0
|
|
container_inspect_state_running{id="470e17a15751881cc0787f9aab6f1af000b7bbce7e590d82de987c583425b4ef",name="down-example"} 0.0
|
|
# HELP container_inspect_state_started_at Container's start time (int)
|
|
# TYPE container_inspect_state_started_at gauge
|
|
container_inspect_state_started_at{id="21ac232f35edc4e630ed0c6b19b828a40df3dbc280c6bcf779b02a1488a741c3",name="alertify"} 1.631792247e+09
|
|
container_inspect_state_started_at{id="73a3d19d996de90f15da6cea016d2b1733d0e63bca4d36b0a1bcb2d680d6f108",name="dockstat"} 1.631779075e+09
|
|
container_inspect_state_started_at{id="db14abb41eec0ff06dc11b740b71839aec2b3855192b83a4ba31ee77bd21abfd",name="gotify"} 1.631779073e+09
|
|
container_inspect_state_started_at{id="470e17a15751881cc0787f9aab6f1af000b7bbce7e590d82de987c583425b4ef",name="down-example"} 1.631779081e+09
|
|
# HELP container_inspect_state_exit_code Container's exit code (int)
|
|
# TYPE container_inspect_state_exit_code gauge
|
|
container_inspect_state_exit_code{id="21ac232f35edc4e630ed0c6b19b828a40df3dbc280c6bcf779b02a1488a741c3",name="alertify"} 0.0
|
|
container_inspect_state_exit_code{id="73a3d19d996de90f15da6cea016d2b1733d0e63bca4d36b0a1bcb2d680d6f108",name="dockstat"} 0.0
|
|
container_inspect_state_exit_code{id="db14abb41eec0ff06dc11b740b71839aec2b3855192b83a4ba31ee77bd21abfd",name="gotify"} 0.0
|
|
container_inspect_state_exit_code{id="470e17a15751881cc0787f9aab6f1af000b7bbce7e590d82de987c583425b4ef",name="down-example"} 137.0
|
|
# HELP container_inspect_downtime_alert_threshold Container's downtime alert threshold in seconds (int)
|
|
# TYPE container_inspect_downtime_alert_threshold gauge
|
|
container_inspect_downtime_alert_threshold{id="21ac232f35edc4e630ed0c6b19b828a40df3dbc280c6bcf779b02a1488a741c3",name="alertify"} 3600.0
|
|
container_inspect_downtime_alert_threshold{id="73a3d19d996de90f15da6cea016d2b1733d0e63bca4d36b0a1bcb2d680d6f108",name="dockstat"} 3600.0
|
|
container_inspect_downtime_alert_threshold{id="db14abb41eec0ff06dc11b740b71839aec2b3855192b83a4ba31ee77bd21abfd",name="gotify"} 3600.0
|
|
container_inspect_downtime_alert_threshold{id="470e17a15751881cc0787f9aab6f1af000b7bbce7e590d82de987c583425b4ef",name="down-example"} 3600.0
|
|
```
|
|
|
|
## Prometheus Alerts
|
|
```yaml
|
|
- name: "Container status"
|
|
rules:
|
|
- alert: "Container unhealthy"
|
|
expr: container_inspect_state_health_status == 0
|
|
for: 15m
|
|
labels:
|
|
severity: error
|
|
annotations:
|
|
summary: "Container unhealthy"
|
|
description: "{{ $labels.name }}: {{ $labels.value }}"
|
|
|
|
- alert: "Container down"
|
|
expr: container_inspect_state_running == 0 and ON(id) time() - container_inspect_state_started_at > container_inspect_downtime_alert_threshold
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
summary: "Container down"
|
|
description: "{{ $labels.name }}: DOWN"
|
|
```
|
|
|
|
# Notes
|
|
* Requires access to the Docker socket (`/var/run/docker.sock`)
|
|
* The port `dockstat` listens on can be changed using the `DOCKSTAT_LISTEN_PORT` environment variable. Default: `8080`
|
|
|
|
e.g. `DOCKSTAT_LISTEN_PORT=80 python3 dockstat.py`
|
|
|
|
# Docker
|
|
## Build
|
|
```bash
|
|
docker build . -t 'dockstat:latest'
|
|
```
|
|
|
|
## Run
|
|
```bash
|
|
docker run --name dockstat -p 8080:8080 -v /var/run/docker.sock:/var/run/docker.sock:ro -e TZ=Europe/London dockstat:latest
|
|
```
|
|
|
|
## Compose
|
|
```yaml
|
|
---
|
|
version: "2"
|
|
services:
|
|
dockstat:
|
|
image: dockstat:latest
|
|
container_name: dockstat
|
|
environment:
|
|
- TZ=Europe/London
|
|
- DOCKSTAT_LISTEN_PORT=80
|
|
volumes:
|
|
- /var/run/docker.sock:/var/run/docker.sock:ro
|
|
restart: unless-stopped
|
|
```
|