Update the README
This commit is contained in:
parent
15efadbb5d
commit
1f53531086
55
README.md
55
README.md
|
@ -1,4 +1,4 @@
|
|||
[Prometheus](https://prometheus.io/) endpoint to report the healthcheck status of Docker containers.
|
||||
[Prometheus](https://prometheus.io/) endpoint to report the health check status of Docker containers.
|
||||
|
||||
# Usage
|
||||
```
|
||||
|
@ -10,6 +10,7 @@ optional arguments:
|
|||
```
|
||||
|
||||
# Example
|
||||
## Output
|
||||
```
|
||||
curl -qsS localhost:8080/metrics
|
||||
# HELP container_inspect_state_health_status Container's healthcheck value (binary)
|
||||
|
@ -18,10 +19,59 @@ container_inspect_state_health_status{id="21ac232f35edc4e630ed0c6b19b828a40df3db
|
|||
container_inspect_state_health_status{id="73a3d19d996de90f15da6cea016d2b1733d0e63bca4d36b0a1bcb2d680d6f108",name="dockstat",value="healthy"} 1.0
|
||||
container_inspect_state_health_status{id="db14abb41eec0ff06dc11b740b71839aec2b3855192b83a4ba31ee77bd21abfd",name="gotify",value="healthy"} 1.0
|
||||
container_inspect_state_health_status{id="470e17a15751881cc0787f9aab6f1af000b7bbce7e590d82de987c583425b4ef",name="down-example",value="unhealthy"} 0.0
|
||||
# HELP container_inspect_state_running Container's running state (binary)
|
||||
# TYPE container_inspect_state_running gauge
|
||||
container_inspect_state_running{id="21ac232f35edc4e630ed0c6b19b828a40df3dbc280c6bcf779b02a1488a741c3",name="alertify"} 1.0
|
||||
container_inspect_state_running{id="73a3d19d996de90f15da6cea016d2b1733d0e63bca4d36b0a1bcb2d680d6f108",name="dockstat"} 1.0
|
||||
container_inspect_state_running{id="db14abb41eec0ff06dc11b740b71839aec2b3855192b83a4ba31ee77bd21abfd",name="gotify"} 1.0
|
||||
container_inspect_state_running{id="470e17a15751881cc0787f9aab6f1af000b7bbce7e590d82de987c583425b4ef",name="down-example"} 0.0
|
||||
# HELP container_inspect_state_started_at Container's start time (int)
|
||||
# TYPE container_inspect_state_started_at gauge
|
||||
container_inspect_state_started_at{id="21ac232f35edc4e630ed0c6b19b828a40df3dbc280c6bcf779b02a1488a741c3",name="alertify"} 1.631792247e+09
|
||||
container_inspect_state_started_at{id="73a3d19d996de90f15da6cea016d2b1733d0e63bca4d36b0a1bcb2d680d6f108",name="dockstat"} 1.631779075e+09
|
||||
container_inspect_state_started_at{id="db14abb41eec0ff06dc11b740b71839aec2b3855192b83a4ba31ee77bd21abfd",name="gotify"} 1.631779073e+09
|
||||
container_inspect_state_started_at{id="470e17a15751881cc0787f9aab6f1af000b7bbce7e590d82de987c583425b4ef",name="down-example"} 1.631779081e+09
|
||||
# HELP container_inspect_state_exit_code Container's exit code (int)
|
||||
# TYPE container_inspect_state_exit_code gauge
|
||||
container_inspect_state_exit_code{id="21ac232f35edc4e630ed0c6b19b828a40df3dbc280c6bcf779b02a1488a741c3",name="alertify"} 0.0
|
||||
container_inspect_state_exit_code{id="73a3d19d996de90f15da6cea016d2b1733d0e63bca4d36b0a1bcb2d680d6f108",name="dockstat"} 0.0
|
||||
container_inspect_state_exit_code{id="db14abb41eec0ff06dc11b740b71839aec2b3855192b83a4ba31ee77bd21abfd",name="gotify"} 0.0
|
||||
container_inspect_state_exit_code{id="470e17a15751881cc0787f9aab6f1af000b7bbce7e590d82de987c583425b4ef",name="down-example"} 137.0
|
||||
# HELP container_inspect_downtime_alert_threshold Container's downtime alert threshold in seconds (int)
|
||||
# TYPE container_inspect_downtime_alert_threshold gauge
|
||||
container_inspect_downtime_alert_threshold{id="21ac232f35edc4e630ed0c6b19b828a40df3dbc280c6bcf779b02a1488a741c3",name="alertify"} 3600.0
|
||||
container_inspect_downtime_alert_threshold{id="73a3d19d996de90f15da6cea016d2b1733d0e63bca4d36b0a1bcb2d680d6f108",name="dockstat"} 3600.0
|
||||
container_inspect_downtime_alert_threshold{id="db14abb41eec0ff06dc11b740b71839aec2b3855192b83a4ba31ee77bd21abfd",name="gotify"} 3600.0
|
||||
container_inspect_downtime_alert_threshold{id="470e17a15751881cc0787f9aab6f1af000b7bbce7e590d82de987c583425b4ef",name="down-example"} 3600.0
|
||||
```
|
||||
|
||||
## Prometheus Alerts
|
||||
```yaml
|
||||
- name: "Container status"
|
||||
rules:
|
||||
- alert: "Container unhealthy"
|
||||
expr: container_inspect_state_health_status == 0
|
||||
for: 15m
|
||||
labels:
|
||||
severity: error
|
||||
annotations:
|
||||
summary: "Container unhealthy"
|
||||
description: "{{ $labels.name }}: {{ $labels.value }}"
|
||||
|
||||
- alert: "Container down"
|
||||
expr: container_inspect_state_running == 0 and ON(id) time() - container_inspect_state_started_at > container_inspect_downtime_alert_threshold
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "Container down"
|
||||
description: "{{ $labels.name }}: DOWN"
|
||||
```
|
||||
|
||||
# Notes
|
||||
* Requires access to the Docker socket (`/var/run/docker.sock`)
|
||||
* The port `dockstat` listens on can be changed using the `DOCKSTAT_LISTEN_PORT` environment variable. Default: `8080`
|
||||
|
||||
e.g. `LISTEN_PORT=80 python3 dockstat.py`
|
||||
|
||||
# Docker
|
||||
## Build
|
||||
|
@ -44,8 +94,7 @@ services:
|
|||
container_name: dockstat
|
||||
environment:
|
||||
- TZ=Europe/London
|
||||
ports:
|
||||
- "8080:8080"
|
||||
- LISTEN_PORT=80
|
||||
volumes:
|
||||
- /var/run/docker.sock:/var/run/docker.sock:ro
|
||||
restart: unless-stopped
|
||||
|
|
22
dockstat.py
22
dockstat.py
|
@ -19,7 +19,7 @@ from prometheus_client import ( # type: ignore
|
|||
generate_latest,
|
||||
)
|
||||
|
||||
LISTEN_PORT = int(os.environ.get('LISTEN_PORT', 8080))
|
||||
LISTEN_PORT = int(os.environ.get('DOCKSTAT_LISTEN_PORT', 8080))
|
||||
HEALTHY_STR = 'healthy'
|
||||
|
||||
|
||||
|
@ -37,7 +37,7 @@ class HTTPHandler(MetricsHandler):
|
|||
# Override built-in method
|
||||
def do_GET(self):
|
||||
"""
|
||||
Method to handle GET requests
|
||||
Handle GET requests
|
||||
"""
|
||||
if self.path == '/metrics':
|
||||
self._metrics()
|
||||
|
@ -51,7 +51,13 @@ class HTTPHandler(MetricsHandler):
|
|||
|
||||
def _respond(self, status: int, message: str):
|
||||
"""
|
||||
Method to output a simple HTTP status and string to the client
|
||||
Output a simple HTTP status and string to the client
|
||||
|
||||
e.g. 200 OK
|
||||
|
||||
Args:
|
||||
status (int): HTTP status to output
|
||||
message (str): String to output
|
||||
"""
|
||||
self.send_response(int(status) or 500)
|
||||
self.end_headers()
|
||||
|
@ -59,7 +65,7 @@ class HTTPHandler(MetricsHandler):
|
|||
|
||||
def _metrics(self):
|
||||
"""
|
||||
Method to handle the request for metrics
|
||||
Handle the request for metrics
|
||||
"""
|
||||
if not healthy:
|
||||
print('ERROR: Check requirements')
|
||||
|
@ -142,6 +148,9 @@ class HTTPHandler(MetricsHandler):
|
|||
def healthy() -> bool:
|
||||
"""
|
||||
Simple funtion to return if all the requirements are met
|
||||
|
||||
Returns:
|
||||
bool: True if healthy or False if unhealthy
|
||||
"""
|
||||
return all(
|
||||
[
|
||||
|
@ -154,7 +163,10 @@ if __name__ == '__main__':
|
|||
|
||||
def cli_parse() -> argparse.Namespace:
|
||||
"""
|
||||
Function to parse the CLI
|
||||
Parse the CLI
|
||||
|
||||
Returns:
|
||||
argparse.Namespace: Arguments from the CLI
|
||||
"""
|
||||
parser = argparse.ArgumentParser()
|
||||
|
||||
|
|
Loading…
Reference in a new issue