我正在编写自定义 k8s 指标收集器,目的是监控两个不同集群中的应用程序版本。如何暴露“/metrics”等指标的具体位置,并在无限循环中收集指标?
那是我的自定义指标收集器示例:
import time
from prometheus_client import start_http_server
from prometheus_client.core import REGISTRY, CounterMetricFamily
from kubernetes import client, config, watch
class CustomCollector(object):
def __init__(self):
pass
def collect(self):
g = CounterMetricFamily("retail_pods_info", 'info about pods', labels=['secret','namespace','deployment_name','image','helm'])
config.load_kube_config('config')
v1 = client.CoreV1Api()
group = "argoproj.io"
version = "v1alpha1"
plural = "applications"
#kind = "Application"
namespace = "argo-cd"
pod_list: client.V1PodList = v1.list_pod_for_all_namespaces(watch=False)
pods: list[client.V1Pod] = pod_list.items
metrics_list = []
for pod in pods:
metadata: client.V1ObjectMeta = pod.metadata
spec: client.V1PodSpec = pod.spec
volumes: list[client.V1Volume] = spec.volumes
if volumes is not None:
for volume in volumes:
if volume.projected:
projected: client.V1ProjectedVolumeSource = volume.projected
sources: list[client.V1VolumeProjection] = projected.sources
for source in sources:
if source.secret:
secret: client.V1SecretProjection = source.secret
s = secret.name + " " + metadata.namespace.lower() + " " + metadata.name.lower().rsplit('-',2)[0] + " " + pod.spec.containers[0].image
metrics_list.append(s.split())
api_client = client.ApiClient()
argocd_api = client.CustomObjectsApi(api_client)
argocd_apps = argocd_api.list_namespaced_custom_object(group, version, namespace, plural, watch=False)
for metric in metrics_list:
for app in argocd_apps["items"]:
if metric[2] == app["metadata"]["name"]:
helm_version=app["spec"]["source"]["repoURL"]+"-"+app["spec"]["source"]["targetRevision"]
metric.append(helm_version)
g.add_metric([metric[0], metric[1], metric[2], metric[3], metric[4]], 1)
yield g
#
# for k in metrics_list:
# g.add_metric([k[0],k[1],k[2],k[3]], 1)
# yield g
if __name__ == '__main__':
start_http_server(8000)
REGISTRY.register(CustomCollector())
while True:
time.sleep(60)
关于端点
/metrics
:start_http_server
在指定端口上启动一个服务器,该服务器响应任何带有指标的查询。因此,对路径/metrics
的请求将默认使用生成的度量来响应。
关于“在无限循环中收集指标”:
您的应用程序已经在执行此操作(有点)。由于您注册了自定义收集器,因此每个请求都将调用
collect
方法。而且由于普罗米修斯在收集指标时基本上是在无限循环中进行的 - 你的收集器也会这样做。