更新:当前来源可用此处。
我目前正在为 changedetection.io 开发一个定制的普罗米修斯导出器,以公开所有注册手表的抓取和价格指标。
在进行概念验证后,我试图使项目可维护并准备好发布到开源社区(例如,添加测试和文档并使其功能尽可能完整)。
在编写这些测试时,我在尝试测试在受监视的 ChangeDetection.io 实例中创建的新监视的动态注册时偶然发现了一个问题。为了让导出器在不重新启动的情况下获取它们,我在每次收集运行时检查 API 是否有新添加的监视。
这是
Collect
的 priceCollector
功能:
func (c *priceCollector) Collect(ch chan<- prometheus.Metric) {
// check for new watches before collecting metrics
watches, err := c.ApiClient.getWatches()
if err != nil {
log.Errorf("error while fetching watches: %v", err)
} else {
for id, watch := range watches {
if _, ok := c.priceMetrics[id]; !ok {
// create new metric and register it on the DefaultRegisterer
c.priceMetrics[id] = newPriceMetric(prometheus.Labels{"title": watch.Title}, c.ApiClient, id)
prometheus.MustRegister(c.priceMetrics[id])
log.Infof("Picked up new watch %s, registered as metric %s", watch.Title, id)
}
}
}
// collect all registered metrics
for _, metric := range c.priceMetrics {
metric.Collect(ch)
}
}
newPriceMetric
函数只是创建一个新的priceMetric
对象,其中包含一个prometheus.Desc
、一个ApiClient
(提供对changedetection.io API的访问的类)和一个UUID
:
func newPriceMetric(labels prometheus.Labels, apiClient *ApiClient, uuid string) priceMetric {
return priceMetric{
desc: prometheus.NewDesc(
prometheus.BuildFQName(namespace, "watch", "price"),
"Current price of an offer type watch",
nil, labels,
),
apiClient: apiClient,
UUID: uuid,
}
}
测试默认行为工作得很好并通过了所有测试,但是当我尝试测试添加新手表的行为时(当导出器正在运行而不重新启动它时),测试失败。
注意:
和expectMetrics
都是prometheus自己的expectMetricCount
和testutil.CollectAndCompare
的包装函数。帮助器testutil.CollectAndCount
创建一个包装的CreateTestApiServer
服务器,该服务器根据传递的httptest
结构返回 JSON 有效负载。map[string]*data.WatchItem
func TestAutoregisterPriceCollector(t *testing.T) {
watchDb := createCollectorTestDb()
server := testutil.CreateTestApiServer(t, watchDb)
defer server.Close()
c, err := NewPriceCollector(server.URL(), "foo-bar-key")
if err != nil {
t.Fatal(err)
}
expectMetricCount(t, c, 2, "changedetectionio_watch_price")
// now add a new watch and expect the collector to pick it up
uuid, newItem := testutil.NewTestItem("Item 3", 300, "USD")
watchDb[uuid] = newItem
expectMetrics(t, c, "price_metrics_autoregister.prom", "changedetectionio_watch_price")
expectMetricCount(t, c, 3, "changedetectionio_watch_price")
}
运行该测试时,运行失败并出现以下错误:
collector_test.go:23:返回了意外的指标:收集指标失败:收集的指标 ChangeDetectionio_watch_price label:{name:"title" value:"Item 3"} gauge:{value:300} 带有未注册的描述符 Desc{fqName: "changedetectionio_watch_price" ,help:“报价类型手表的当前价格”,constLabels:{title =“Item 3”},variableLabels:{}}
我目前认为此错误与
testutil.CollectAnd*
内部工作方式有关。根据函数注释,他们在新创建的迂腐注册表上注册收集器,这可能导致它无法拾取延迟注册的描述符。
对此有什么想法吗?
我不确定这是否能回答你的问题,但是,这是一个例子
package main
import (
"flag"
"fmt"
"log/slog"
"net/http"
"sync"
"time"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/collectors"
"github.com/prometheus/client_golang/prometheus/promhttp"
"github.com/prometheus/client_golang/prometheus/testutil"
)
var (
endpoint = flag.String(
"endpoint",
"0.0.0.0:8080",
"The endpoint of the HTTP server",
)
)
type TestCollector struct {
sync.RWMutex
values []string
foo *prometheus.Desc
}
func NewTestCollector() *TestCollector {
return &TestCollector{
foo: prometheus.NewDesc(
"foo",
"foo",
[]string{
"labels",
},
nil,
),
}
}
func (c *TestCollector) Collect(ch chan<- prometheus.Metric) {
c.RLock()
defer c.RUnlock()
for _, value := range c.values {
ch <- prometheus.MustNewConstMetric(
c.foo,
prometheus.CounterValue,
1,
value,
)
}
}
func (c *TestCollector) Describe(ch chan<- *prometheus.Desc) {
ch <- c.foo
}
func main() {
flag.Parse()
prometheus.Unregister(
collectors.NewGoCollector())
prometheus.Unregister(
collectors.NewProcessCollector(
collectors.ProcessCollectorOpts{}))
c := NewTestCollector()
registry := prometheus.NewRegistry()
registry.MustRegister(c)
go func() {
for i := range 20 {
value := fmt.Sprintf("value-%02d", i)
slog.Info("Adding value", "value", value)
c.Lock()
c.values = append(c.values, value)
c.Unlock()
slog.Info("testutil",
"count", testutil.CollectAndCount(c, "foo"))
time.Sleep(15 * time.Second)
}
}()
http.Handle(
"/metrics",
promhttp.HandlerFor(
registry, promhttp.HandlerOpts{}))
slog.Error("unable to listen",
"err", http.ListenAndServe(*endpoint, nil))
}
指标
foo
具有不断增长(每 15 秒)的标签集 (0..20) (value-xx
)
CollectAndCount
随着每次迭代而增加:
日志:
2024/04/12 10:43:37 INFO Adding value value=value-00
2024/04/12 10:43:37 INFO testutil count=1
2024/04/12 10:43:52 INFO Adding value value=value-01
2024/04/12 10:43:52 INFO testutil count=2
2024/04/12 10:44:07 INFO Adding value value=value-02
2024/04/12 10:44:07 INFO testutil count=3
2024/04/12 10:44:22 INFO Adding value value=value-03
2024/04/12 10:44:22 INFO testutil count=4
2024/04/12 10:44:37 INFO Adding value value=value-04
2024/04/12 10:44:37 INFO testutil count=5
2024/04/12 10:44:52 INFO Adding value value=value-05
并且:
curl --silent --get http://localhost:8080/metrics
# HELP foo foo
# TYPE foo counter
foo{labels="value-00"} 1
foo{labels="value-01"} 1
foo{labels="value-02"} 1
foo{labels="value-03"} 1
foo{labels="value-04"} 1
foo{labels="value-05"} 1