Add stats to enable monitoring for non-running containers
This commit is contained in:
parent
cbf4bd2c7b
commit
15efadbb5d
2
.gitignore
vendored
2
.gitignore
vendored
|
@ -1 +1,3 @@
|
||||||
|
.mypy_cache/
|
||||||
.pyenv/
|
.pyenv/
|
||||||
|
.vscode/
|
||||||
|
|
95
dockstat.py
95
dockstat.py
|
@ -5,19 +5,21 @@ Module to act as a Prometheus Exporter for Docker containers with a
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
|
import os
|
||||||
import os.path
|
import os.path
|
||||||
import sys
|
import sys
|
||||||
from http.server import HTTPServer
|
from http.server import HTTPServer
|
||||||
|
|
||||||
import docker
|
import docker # type: ignore
|
||||||
from prometheus_client import (
|
import numpy
|
||||||
|
from prometheus_client import ( # type: ignore
|
||||||
CollectorRegistry,
|
CollectorRegistry,
|
||||||
Gauge,
|
Gauge,
|
||||||
generate_latest,
|
|
||||||
MetricsHandler,
|
MetricsHandler,
|
||||||
|
generate_latest,
|
||||||
)
|
)
|
||||||
|
|
||||||
LISTEN_PORT = 8080
|
LISTEN_PORT = int(os.environ.get('LISTEN_PORT', 8080))
|
||||||
HEALTHY_STR = 'healthy'
|
HEALTHY_STR = 'healthy'
|
||||||
|
|
||||||
|
|
||||||
|
@ -28,12 +30,11 @@ class HTTPHandler(MetricsHandler):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
self.docker_api = docker.APIClient()
|
self.docker_api: docker.APIClient = docker.APIClient()
|
||||||
self.docker_client = docker.from_env()
|
self.docker_client = docker.from_env()
|
||||||
super().__init__(*args, **kwargs)
|
super().__init__(*args, **kwargs)
|
||||||
|
|
||||||
# Override built-in method
|
# Override built-in method
|
||||||
# pylint: disable=invalid-name
|
|
||||||
def do_GET(self):
|
def do_GET(self):
|
||||||
"""
|
"""
|
||||||
Method to handle GET requests
|
Method to handle GET requests
|
||||||
|
@ -48,7 +49,7 @@ class HTTPHandler(MetricsHandler):
|
||||||
|
|
||||||
self._respond(200, 'OK')
|
self._respond(200, 'OK')
|
||||||
|
|
||||||
def _respond(self, status, message):
|
def _respond(self, status: int, message: str):
|
||||||
"""
|
"""
|
||||||
Method to output a simple HTTP status and string to the client
|
Method to output a simple HTTP status and string to the client
|
||||||
"""
|
"""
|
||||||
|
@ -67,60 +68,110 @@ class HTTPHandler(MetricsHandler):
|
||||||
|
|
||||||
registry = CollectorRegistry()
|
registry = CollectorRegistry()
|
||||||
|
|
||||||
gauge = Gauge(
|
health_gauge = Gauge(
|
||||||
'container_inspect_state_health_status',
|
'container_inspect_state_health_status',
|
||||||
"Container's healthcheck value (binary)",
|
"Container's healthcheck value (binary)",
|
||||||
labelnames=['id', 'name', 'value'],
|
labelnames=['id', 'name', 'value'],
|
||||||
registry=registry
|
registry=registry,
|
||||||
|
)
|
||||||
|
status_gauge = Gauge(
|
||||||
|
'container_inspect_state_running',
|
||||||
|
"Container's running state (binary)",
|
||||||
|
labelnames=['id', 'name'],
|
||||||
|
registry=registry,
|
||||||
|
)
|
||||||
|
started_at_gauge = Gauge(
|
||||||
|
'container_inspect_state_started_at',
|
||||||
|
"Container's start time (int)",
|
||||||
|
labelnames=['id', 'name'],
|
||||||
|
registry=registry,
|
||||||
|
)
|
||||||
|
exit_code_gauge = Gauge(
|
||||||
|
'container_inspect_state_exit_code',
|
||||||
|
"Container's exit code (int)",
|
||||||
|
labelnames=['id', 'name'],
|
||||||
|
registry=registry,
|
||||||
|
)
|
||||||
|
alert_threshold_gauge = Gauge(
|
||||||
|
'container_inspect_downtime_alert_threshold',
|
||||||
|
"Container's downtime alert threshold in seconds (int)",
|
||||||
|
labelnames=['id', 'name'],
|
||||||
|
registry=registry,
|
||||||
)
|
)
|
||||||
|
|
||||||
for container in self.docker_client.containers.list():
|
for container in self.docker_client.containers.list(all=True):
|
||||||
data = self.docker_api.inspect_container(container.id)
|
data = self.docker_api.inspect_container(container.id)
|
||||||
|
|
||||||
|
running: str = bool(data['State']['Running'])
|
||||||
|
started_at: int = data['State']['StartedAt']
|
||||||
|
exit_code: int = int(data['State']['ExitCode'])
|
||||||
|
alert_threshold = int(
|
||||||
|
data['Config']['Labels'].get('io.prometheus.alert.downtime', 3600)
|
||||||
|
)
|
||||||
|
starttime = numpy.datetime64(started_at)
|
||||||
|
|
||||||
|
status_gauge.labels(
|
||||||
|
container.id,
|
||||||
|
container.name,
|
||||||
|
).set(int(running))
|
||||||
|
started_at_gauge.labels(container.id, container.name,).set(
|
||||||
|
int(int(starttime) / 1000000000) # strip nanoseconds
|
||||||
|
)
|
||||||
|
exit_code_gauge.labels(
|
||||||
|
container.id,
|
||||||
|
container.name,
|
||||||
|
).set(int(exit_code))
|
||||||
|
alert_threshold_gauge.labels(
|
||||||
|
container.id,
|
||||||
|
container.name,
|
||||||
|
).set(alert_threshold)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
health_str = data["State"]["Health"]["Status"]
|
health_str: str = data['State']['Health']['Status']
|
||||||
label_values = [
|
health_gauge.labels(
|
||||||
container.id,
|
container.id,
|
||||||
container.name,
|
container.name,
|
||||||
health_str,
|
health_str,
|
||||||
]
|
).set(int(health_str == HEALTHY_STR))
|
||||||
except KeyError:
|
except KeyError:
|
||||||
pass
|
pass
|
||||||
else:
|
|
||||||
gauge.labels(*label_values).set(int(health_str == HEALTHY_STR))
|
|
||||||
|
|
||||||
self._respond(200, generate_latest(registry).decode())
|
self._respond(200, generate_latest(registry).decode())
|
||||||
|
|
||||||
|
|
||||||
def healthy():
|
def healthy() -> bool:
|
||||||
"""
|
"""
|
||||||
Simple funtion to return if all the requirements are met
|
Simple funtion to return if all the requirements are met
|
||||||
"""
|
"""
|
||||||
return all([
|
return all(
|
||||||
|
[
|
||||||
os.path.exists('/var/run/docker.sock'),
|
os.path.exists('/var/run/docker.sock'),
|
||||||
])
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
def cli_parse():
|
|
||||||
|
def cli_parse() -> argparse.Namespace:
|
||||||
"""
|
"""
|
||||||
Function to parse the CLI
|
Function to parse the CLI
|
||||||
"""
|
"""
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'-H', '--healthcheck',
|
'-H',
|
||||||
|
'--healthcheck',
|
||||||
action='store_true',
|
action='store_true',
|
||||||
help='Simply exit with 0 for healthy or 1 when unhealthy',
|
help='Simply exit with 0 for healthy or 1 when unhealthy',
|
||||||
)
|
)
|
||||||
|
|
||||||
return parser.parse_args()
|
return parser.parse_args()
|
||||||
|
|
||||||
def main():
|
def main() -> int:
|
||||||
"""
|
"""
|
||||||
main()
|
main()
|
||||||
"""
|
"""
|
||||||
args = cli_parse()
|
args: argparse.Namespace = cli_parse()
|
||||||
|
|
||||||
if args.healthcheck:
|
if args.healthcheck:
|
||||||
# Invert the sense of 'healthy' for Unix CLI usage
|
# Invert the sense of 'healthy' for Unix CLI usage
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
# To ensure app dependencies are ported from your virtual environment/host machine into your container, run 'pip freeze > requirements.txt' in the terminal to overwrite this file
|
# To ensure app dependencies are ported from your virtual environment/host machine into your container, run 'pip freeze > requirements.txt' in the terminal to overwrite this file
|
||||||
docker
|
docker
|
||||||
|
numpy
|
||||||
prometheus_client
|
prometheus_client
|
||||||
|
|
Loading…
Reference in a new issue