Skip to content

Commit 46d0106

Browse files
authored
Implement federated metric metadata API (#3686)
* support federated metadata API Signed-off-by: Ben Ye <[email protected]> * update comments Signed-off-by: yeya24 <[email protected]> * use parseInt Signed-off-by: yeya24 <[email protected]> * address Prem's comments Signed-off-by: yeya24 <[email protected]> * update proto comment Signed-off-by: yeya24 <[email protected]> * add changelog Signed-off-by: yeya24 <[email protected]>
1 parent f969003 commit 46d0106

24 files changed

+2281
-28
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ We use _breaking :warning:_ to mark changes that are not backward compatible (re
1818
- [#3700](https://github.com/thanos-io/thanos/pull/3700) ui: make old bucket viewer UI work with vanilla Prometheus blocks
1919
- [#2641](https://github.com/thanos-io/thanos/issues/2641) Query Frontend: Added `--query-range.request-downsampled` flag enabling additional queries for downsampled data in case of empty or incomplete response to range request.
2020
- [#3792](https://github.com/thanos-io/thanos/pull/3792) Receiver: Added `--tsdb.allow-overlapping-blocks` flag to allow overlapping tsdb blocks and enable vertical compaction
21+
- [#3686](https://github.com/thanos-io/thanos/pull/3686) Query: Added federated metric metadata support.
2122

2223
### Fixed
2324

cmd/thanos/query.go

+36
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ import (
3636
extpromhttp "github.com/thanos-io/thanos/pkg/extprom/http"
3737
"github.com/thanos-io/thanos/pkg/gate"
3838
"github.com/thanos-io/thanos/pkg/logging"
39+
"github.com/thanos-io/thanos/pkg/metadata"
3940
"github.com/thanos-io/thanos/pkg/prober"
4041
"github.com/thanos-io/thanos/pkg/query"
4142
"github.com/thanos-io/thanos/pkg/rules"
@@ -96,6 +97,9 @@ func registerQuery(app *extkingpin.App) {
9697
ruleEndpoints := cmd.Flag("rule", "Experimental: Addresses of statically configured rules API servers (repeatable). The scheme may be prefixed with 'dns+' or 'dnssrv+' to detect rule API servers through respective DNS lookups.").
9798
Hidden().PlaceHolder("<rule>").Strings()
9899

100+
metadataEndpoints := cmd.Flag("metadata", "Experimental: Addresses of statically configured metadata API servers (repeatable). The scheme may be prefixed with 'dns+' or 'dnssrv+' to detect metadata API servers through respective DNS lookups.").
101+
Hidden().PlaceHolder("<metadata>").Strings()
102+
99103
strictStores := cmd.Flag("store-strict", "Addresses of only statically configured store API servers that are always used, even if the health check fails. Useful if you have a caching layer on top.").
100104
PlaceHolder("<staticstore>").Strings()
101105

@@ -123,6 +127,9 @@ func registerQuery(app *extkingpin.App) {
123127
enableRulePartialResponse := cmd.Flag("rule.partial-response", "Enable partial response for rules endpoint. --no-rule.partial-response for disabling.").
124128
Hidden().Default("true").Bool()
125129

130+
enableMetricMetadataPartialResponse := cmd.Flag("metric-metadata.partial-response", "Enable partial response for metric metadata endpoint. --no-metric-metadata.partial-response for disabling.").
131+
Hidden().Default("true").Bool()
132+
126133
defaultEvaluationInterval := extkingpin.ModelDuration(cmd.Flag("query.default-evaluation-interval", "Set default evaluation interval for sub queries.").Default("1m"))
127134

128135
defaultRangeQueryStep := extkingpin.ModelDuration(cmd.Flag("query.default-step", "Set default step for range queries. Default step is only used when step is not set in UI. In such cases, Thanos UI will use default step to calculate resolution (resolution = max(rangeSeconds / 250, defaultStep)). This will not work from Grafana, but Grafana has __step variable which can be used.").
@@ -144,6 +151,10 @@ func registerQuery(app *extkingpin.App) {
144151
return errors.Errorf("Address %s is duplicated for --rule flag.", dup)
145152
}
146153

154+
if dup := firstDuplicate(*metadataEndpoints); dup != "" {
155+
return errors.Errorf("Address %s is duplicated for --metadata flag.", dup)
156+
}
157+
147158
var fileSD *file.Discovery
148159
if len(*fileSDFiles) > 0 {
149160
conf := &file.SDConfig{
@@ -195,9 +206,11 @@ func registerQuery(app *extkingpin.App) {
195206
getFlagsMap(cmd.Flags()),
196207
*stores,
197208
*ruleEndpoints,
209+
*metadataEndpoints,
198210
*enableAutodownsampling,
199211
*enableQueryPartialResponse,
200212
*enableRulePartialResponse,
213+
*enableMetricMetadataPartialResponse,
201214
fileSD,
202215
time.Duration(*dnsSDInterval),
203216
*dnsSDResolver,
@@ -246,9 +259,11 @@ func runQuery(
246259
flagsMap map[string]string,
247260
storeAddrs []string,
248261
ruleAddrs []string,
262+
metadataAddrs []string,
249263
enableAutodownsampling bool,
250264
enableQueryPartialResponse bool,
251265
enableRulePartialResponse bool,
266+
enableMetricMetadataPartialResponse bool,
252267
fileSD *file.Discovery,
253268
dnsSDInterval time.Duration,
254269
dnsSDResolver string,
@@ -288,6 +303,12 @@ func runQuery(
288303
dns.ResolverType(dnsSDResolver),
289304
)
290305

306+
dnsMetadataProvider := dns.NewProvider(
307+
logger,
308+
extprom.WrapRegistererWithPrefix("thanos_query_metadata_apis_", reg),
309+
dns.ResolverType(dnsSDResolver),
310+
)
311+
291312
var (
292313
stores = query.NewStoreSet(
293314
logger,
@@ -314,11 +335,19 @@ func runQuery(
314335

315336
return specs
316337
},
338+
func() (specs []query.MetadataSpec) {
339+
for _, addr := range dnsMetadataProvider.Addresses() {
340+
specs = append(specs, query.NewGRPCStoreSpec(addr, false))
341+
}
342+
343+
return specs
344+
},
317345
dialOpts,
318346
unhealthyStoreTimeout,
319347
)
320348
proxy = store.NewProxyStore(logger, reg, stores.Get, component.Query, selectorLset, storeResponseTimeout)
321349
rulesProxy = rules.NewProxy(logger, stores.GetRulesClients)
350+
metadataProxy = metadata.NewProxy(logger, stores.GetMetadataClients)
322351
queryableCreator = query.NewQueryableCreator(
323352
logger,
324353
extprom.WrapRegistererWithPrefix("thanos_query_", reg),
@@ -381,6 +410,7 @@ func runQuery(
381410
if err := dnsStoreProvider.Resolve(ctxUpdate, append(fileSDCache.Addresses(), storeAddrs...)); err != nil {
382411
level.Error(logger).Log("msg", "failed to resolve addresses for storeAPIs", "err", err)
383412
}
413+
384414
// Rules apis do not support file service discovery as of now.
385415
case <-ctxUpdate.Done():
386416
return nil
@@ -404,6 +434,9 @@ func runQuery(
404434
if err := dnsRuleProvider.Resolve(resolveCtx, ruleAddrs); err != nil {
405435
level.Error(logger).Log("msg", "failed to resolve addresses for rulesAPIs", "err", err)
406436
}
437+
if err := dnsMetadataProvider.Resolve(resolveCtx, metadataAddrs); err != nil {
438+
level.Error(logger).Log("msg", "failed to resolve addresses for metadataAPIs", "err", err)
439+
}
407440
return nil
408441
})
409442
}, func(error) {
@@ -454,9 +487,11 @@ func runQuery(
454487
queryableCreator,
455488
// NOTE: Will share the same replica label as the query for now.
456489
rules.NewGRPCClientWithDedup(rulesProxy, queryReplicaLabels),
490+
metadata.NewGRPCClient(metadataProxy),
457491
enableAutodownsampling,
458492
enableQueryPartialResponse,
459493
enableRulePartialResponse,
494+
enableMetricMetadataPartialResponse,
460495
queryReplicaLabels,
461496
flagsMap,
462497
defaultRangeQueryStep,
@@ -497,6 +532,7 @@ func runQuery(
497532
s := grpcserver.New(logger, reg, tracer, comp, grpcProbe,
498533
grpcserver.WithServer(store.RegisterStoreServer(proxy)),
499534
grpcserver.WithServer(rules.RegisterRulesServer(rulesProxy)),
535+
grpcserver.WithServer(metadata.RegisterMetadataServer(metadataProxy)),
500536
grpcserver.WithListen(grpcBindAddr),
501537
grpcserver.WithGracePeriod(grpcGracePeriod),
502538
grpcserver.WithTLSConfig(tlsCfg),

cmd/thanos/sidecar.go

+2
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ import (
2727
"github.com/thanos-io/thanos/pkg/extkingpin"
2828
"github.com/thanos-io/thanos/pkg/extprom"
2929
thanoshttp "github.com/thanos-io/thanos/pkg/http"
30+
meta "github.com/thanos-io/thanos/pkg/metadata"
3031
thanosmodel "github.com/thanos-io/thanos/pkg/model"
3132
"github.com/thanos-io/thanos/pkg/objstore/client"
3233
"github.com/thanos-io/thanos/pkg/prober"
@@ -218,6 +219,7 @@ func runSidecar(
218219
s := grpcserver.New(logger, reg, tracer, comp, grpcProbe,
219220
grpcserver.WithServer(store.RegisterStoreServer(promStore)),
220221
grpcserver.WithServer(rules.RegisterRulesServer(rules.NewPrometheus(conf.prometheus.url, c, m.Labels))),
222+
grpcserver.WithServer(meta.RegisterMetadataServer(meta.NewPrometheus(conf.prometheus.url, c))),
221223
grpcserver.WithListen(conf.grpc.bindAddress),
222224
grpcserver.WithGracePeriod(time.Duration(conf.grpc.gracePeriod)),
223225
grpcserver.WithTLSConfig(tlsCfg),

pkg/api/query/v1.go

+46-4
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@ import (
4343
extpromhttp "github.com/thanos-io/thanos/pkg/extprom/http"
4444
"github.com/thanos-io/thanos/pkg/gate"
4545
"github.com/thanos-io/thanos/pkg/logging"
46+
"github.com/thanos-io/thanos/pkg/metadata"
47+
"github.com/thanos-io/thanos/pkg/metadata/metadatapb"
4648
"github.com/thanos-io/thanos/pkg/query"
4749
"github.com/thanos-io/thanos/pkg/rules"
4850
"github.com/thanos-io/thanos/pkg/rules/rulespb"
@@ -70,10 +72,12 @@ type QueryAPI struct {
7072
// queryEngine returns appropriate promql.Engine for a query with a given step.
7173
queryEngine func(int64) *promql.Engine
7274
ruleGroups rules.UnaryClient
75+
metadatas metadata.UnaryClient
7376

74-
enableAutodownsampling bool
75-
enableQueryPartialResponse bool
76-
enableRulePartialResponse bool
77+
enableAutodownsampling bool
78+
enableQueryPartialResponse bool
79+
enableRulePartialResponse bool
80+
enableMetricMetadataPartialResponse bool
7781

7882
replicaLabels []string
7983
storeSet *query.StoreSet
@@ -90,9 +94,11 @@ func NewQueryAPI(
9094
qe func(int64) *promql.Engine,
9195
c query.QueryableCreator,
9296
ruleGroups rules.UnaryClient,
97+
metadatas metadata.UnaryClient,
9398
enableAutodownsampling bool,
9499
enableQueryPartialResponse bool,
95100
enableRulePartialResponse bool,
101+
enableMetricMetadataPartialResponse bool,
96102
replicaLabels []string,
97103
flagsMap map[string]string,
98104
defaultRangeQueryStep time.Duration,
@@ -107,10 +113,12 @@ func NewQueryAPI(
107113
queryableCreate: c,
108114
gate: gate,
109115
ruleGroups: ruleGroups,
116+
metadatas: metadatas,
110117

111118
enableAutodownsampling: enableAutodownsampling,
112119
enableQueryPartialResponse: enableQueryPartialResponse,
113120
enableRulePartialResponse: enableRulePartialResponse,
121+
enableMetricMetadataPartialResponse: enableMetricMetadataPartialResponse,
114122
replicaLabels: replicaLabels,
115123
storeSet: storeSet,
116124
defaultRangeQueryStep: defaultRangeQueryStep,
@@ -142,6 +150,8 @@ func (qapi *QueryAPI) Register(r *route.Router, tracer opentracing.Tracer, logge
142150
r.Get("/stores", instr("stores", qapi.stores))
143151

144152
r.Get("/rules", instr("rules", NewRulesHandler(qapi.ruleGroups, qapi.enableRulePartialResponse)))
153+
154+
r.Get("/metadata", instr("metadata", NewMetricMetadataHandler(qapi.metadatas, qapi.enableMetricMetadataPartialResponse)))
145155
}
146156

147157
type queryData struct {
@@ -630,7 +640,7 @@ func (qapi *QueryAPI) labelNames(r *http.Request) (interface{}, []error, *api.Ap
630640
return names, warnings, nil
631641
}
632642

633-
func (qapi *QueryAPI) stores(r *http.Request) (interface{}, []error, *api.ApiError) {
643+
func (qapi *QueryAPI) stores(_ *http.Request) (interface{}, []error, *api.ApiError) {
634644
statuses := make(map[string][]query.StoreStatus)
635645
for _, status := range qapi.storeSet.GetStoreStatus() {
636646
statuses[status.StoreType.String()] = append(statuses[status.StoreType.String()], status)
@@ -790,3 +800,35 @@ func labelValuesByMatchers(sets []storage.SeriesSet, name string) ([]string, sto
790800
sort.Strings(labelValues)
791801
return labelValues, warnings, nil
792802
}
803+
804+
func NewMetricMetadataHandler(client metadata.UnaryClient, enablePartialResponse bool) func(*http.Request) (interface{}, []error, *api.ApiError) {
805+
ps := storepb.PartialResponseStrategy_ABORT
806+
if enablePartialResponse {
807+
ps = storepb.PartialResponseStrategy_WARN
808+
}
809+
810+
return func(r *http.Request) (interface{}, []error, *api.ApiError) {
811+
req := &metadatapb.MetadataRequest{
812+
// By default we use -1, which means no limit.
813+
Limit: -1,
814+
Metric: r.URL.Query().Get("metric"),
815+
PartialResponseStrategy: ps,
816+
}
817+
818+
limitStr := r.URL.Query().Get("limit")
819+
if limitStr != "" {
820+
limit, err := strconv.ParseInt(limitStr, 10, 32)
821+
if err != nil {
822+
return nil, nil, &api.ApiError{Typ: api.ErrorBadData, Err: errors.Errorf("invalid metric metadata limit='%v'", limit)}
823+
}
824+
req.Limit = int32(limit)
825+
}
826+
827+
t, warnings, err := client.Metadata(r.Context(), req)
828+
if err != nil {
829+
return nil, nil, &api.ApiError{Typ: api.ErrorInternal, Err: errors.Wrap(err, "retrieving metadata")}
830+
}
831+
832+
return t, warnings, nil
833+
}
834+
}

pkg/metadata/metadata.go

+108
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
// Copyright (c) The Thanos Authors.
2+
// Licensed under the Apache License 2.0.
3+
4+
package metadata
5+
6+
import (
7+
"context"
8+
9+
"github.com/pkg/errors"
10+
"github.com/prometheus/prometheus/storage"
11+
"github.com/thanos-io/thanos/pkg/metadata/metadatapb"
12+
)
13+
14+
var _ UnaryClient = &GRPCClient{}
15+
16+
// UnaryClient is gRPC metadatapb.Metadata client which expands streaming metadata API. Useful for consumers that does not
17+
// support streaming.
18+
type UnaryClient interface {
19+
Metadata(ctx context.Context, req *metadatapb.MetadataRequest) (map[string][]metadatapb.Meta, storage.Warnings, error)
20+
}
21+
22+
// GRPCClient allows to retrieve metadata from local gRPC streaming server implementation.
23+
// TODO(bwplotka): Switch to native gRPC transparent client->server adapter once available.
24+
type GRPCClient struct {
25+
proxy metadatapb.MetadataServer
26+
}
27+
28+
func NewGRPCClient(ts metadatapb.MetadataServer) *GRPCClient {
29+
return &GRPCClient{
30+
proxy: ts,
31+
}
32+
}
33+
34+
func (rr *GRPCClient) Metadata(ctx context.Context, req *metadatapb.MetadataRequest) (map[string][]metadatapb.Meta, storage.Warnings, error) {
35+
srv := &metadataServer{ctx: ctx, metric: req.Metric, limit: int(req.Limit)}
36+
37+
if req.Limit >= 0 {
38+
if req.Metric != "" {
39+
srv.metadataMap = make(map[string][]metadatapb.Meta, 1)
40+
} else if req.Limit <= 100 {
41+
srv.metadataMap = make(map[string][]metadatapb.Meta, req.Limit)
42+
} else {
43+
srv.metadataMap = make(map[string][]metadatapb.Meta)
44+
}
45+
} else {
46+
srv.metadataMap = make(map[string][]metadatapb.Meta)
47+
}
48+
49+
if err := rr.proxy.Metadata(req, srv); err != nil {
50+
return nil, nil, errors.Wrap(err, "proxy Metadata")
51+
}
52+
53+
return srv.metadataMap, srv.warnings, nil
54+
}
55+
56+
type metadataServer struct {
57+
// This field just exist to pseudo-implement the unused methods of the interface.
58+
metadatapb.Metadata_MetadataServer
59+
ctx context.Context
60+
61+
metric string
62+
limit int
63+
64+
warnings []error
65+
metadataMap map[string][]metadatapb.Meta
66+
}
67+
68+
func (srv *metadataServer) Send(res *metadatapb.MetadataResponse) error {
69+
if res.GetWarning() != "" {
70+
srv.warnings = append(srv.warnings, errors.New(res.GetWarning()))
71+
return nil
72+
}
73+
74+
if res.GetMetadata() == nil {
75+
return errors.New("no metadata")
76+
}
77+
78+
// If limit is set to 0, we don't need to add anything.
79+
if srv.limit == 0 {
80+
return nil
81+
}
82+
83+
for k, v := range res.GetMetadata().Metadata {
84+
if metadata, ok := srv.metadataMap[k]; !ok {
85+
// If limit is set and it is positive, we limit the size of the map.
86+
if srv.limit < 0 || srv.limit > 0 && len(srv.metadataMap) < srv.limit {
87+
srv.metadataMap[k] = v.Metas
88+
}
89+
} else {
90+
// There shouldn't be many metadata for one single metric.
91+
Outer:
92+
for _, meta := range v.Metas {
93+
for _, m := range metadata {
94+
if meta == m {
95+
continue Outer
96+
}
97+
}
98+
srv.metadataMap[k] = append(srv.metadataMap[k], meta)
99+
}
100+
}
101+
}
102+
103+
return nil
104+
}
105+
106+
func (srv *metadataServer) Context() context.Context {
107+
return srv.ctx
108+
}

pkg/metadata/metadatapb/custom.go

+28
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
// Copyright (c) The Thanos Authors.
2+
// Licensed under the Apache License 2.0.
3+
4+
package metadatapb
5+
6+
import (
7+
"unsafe"
8+
)
9+
10+
func NewMetadataResponse(metadata *MetricMetadata) *MetadataResponse {
11+
return &MetadataResponse{
12+
Result: &MetadataResponse_Metadata{
13+
Metadata: metadata,
14+
},
15+
}
16+
}
17+
18+
func NewWarningMetadataResponse(warning error) *MetadataResponse {
19+
return &MetadataResponse{
20+
Result: &MetadataResponse_Warning{
21+
Warning: warning.Error(),
22+
},
23+
}
24+
}
25+
26+
func FromMetadataMap(m map[string][]Meta) *MetricMetadata {
27+
return &MetricMetadata{Metadata: *(*map[string]MetricMetadataEntry)(unsafe.Pointer(&m))}
28+
}

0 commit comments

Comments
 (0)