Skip to content

Commit d08a12a

Browse files
Compactor: Add tracing support (#4903)
* first draft for tracing Signed-off-by: metonymic-smokey <[email protected]> * add tracer to context Signed-off-by: metonymic-smokey <[email protected]> * minor fixes Signed-off-by: metonymic-smokey <[email protected]> * create common block spans; log block errors Signed-off-by: metonymic-smokey <[email protected]> * reverted to generic block spans Signed-off-by: metonymic-smokey <[email protected]> * changed block ID tag; add span for block delete Signed-off-by: metonymic-smokey <[email protected]> * minor fix Signed-off-by: metonymic-smokey <[email protected]> * removed extra spans Signed-off-by: metonymic-smokey <[email protected]> * addressed some review comments Signed-off-by: metonymic-smokey <[email protected]> * removed block spans Signed-off-by: metonymic-smokey <[email protected]> * changed group key Signed-off-by: metonymic-smokey <[email protected]> * removed extra var declarations Signed-off-by: metonymic-smokey <[email protected]> * removed comments Signed-off-by: metonymic-smokey <[email protected]> * added changelog entry Signed-off-by: metonymic-smokey <[email protected]> * update healthcheck to healthstats. Co-authored-by: Matej Gera <[email protected]> Signed-off-by: metonymic-smokey <[email protected]> * draft: logging errors Signed-off-by: metonymic-smokey <[email protected]> * removed extra line Signed-off-by: metonymic-smokey <[email protected]> * used alternate function to log errors Signed-off-by: metonymic-smokey <[email protected]> * minor nits Signed-off-by: metonymic-smokey <[email protected]> Co-authored-by: Matej Gera <[email protected]>
1 parent afd23cf commit d08a12a

File tree

4 files changed

+57
-8
lines changed

4 files changed

+57
-8
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ We use *breaking :warning:* to mark changes that are not backward compatible (re
2929
- [#4874](https://github.com/thanos-io/thanos/pull/4874) Query: Add `--endpoint-strict` flag to statically configure Thanos API server endpoints. It is similar to `--store-strict` but supports passing any Thanos gRPC APIs: StoreAPI, MetadataAPI, RulesAPI, TargetsAPI and ExemplarsAPI.
3030
- [#4868](https://github.com/thanos-io/thanos/pull/4868) Rule: Support ruleGroup limit introduced by Prometheus v2.31.0.
3131
- [#4897](https://github.com/thanos-io/thanos/pull/4897) Querier: Add validation for querier address flags.
32+
- [#4903](https://github.com/thanos-io/thanos/pull/4903) Compactor: Added tracing support for compaction.
3233

3334
### Fixed
3435

cmd/thanos/compact.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ import (
4242
"github.com/thanos-io/thanos/pkg/prober"
4343
"github.com/thanos-io/thanos/pkg/runutil"
4444
httpserver "github.com/thanos-io/thanos/pkg/server/http"
45+
"github.com/thanos-io/thanos/pkg/tracing"
4546
"github.com/thanos-io/thanos/pkg/ui"
4647
)
4748

@@ -300,6 +301,7 @@ func runCompact(
300301
}
301302

302303
ctx, cancel := context.WithCancel(context.Background())
304+
ctx = tracing.ContextWithTracer(ctx, tracer)
303305

304306
defer func() {
305307
if rerr != nil {

pkg/compact/compact.go

Lines changed: 41 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ import (
1717
"github.com/go-kit/log"
1818
"github.com/go-kit/log/level"
1919
"github.com/oklog/ulid"
20+
"github.com/opentracing/opentracing-go"
2021
"github.com/pkg/errors"
2122
"github.com/prometheus/client_golang/prometheus"
2223
"github.com/prometheus/client_golang/prometheus/promauto"
@@ -31,6 +32,7 @@ import (
3132
"github.com/thanos-io/thanos/pkg/extprom"
3233
"github.com/thanos-io/thanos/pkg/objstore"
3334
"github.com/thanos-io/thanos/pkg/runutil"
35+
"github.com/thanos-io/thanos/pkg/tracing"
3436
)
3537

3638
type ResolutionLevel int64
@@ -764,7 +766,11 @@ func (cg *Group) Compact(ctx context.Context, dir string, planner Planner, comp
764766
return false, ulid.ULID{}, errors.Wrap(err, "create compaction group dir")
765767
}
766768

767-
shouldRerun, compID, err := cg.compact(ctx, subDir, planner, comp)
769+
var err error
770+
tracing.DoInSpanWithErr(ctx, "compaction_group", func(ctx context.Context) error {
771+
shouldRerun, compID, err = cg.compact(ctx, subDir, planner, comp)
772+
return err
773+
}, opentracing.Tags{"group.key": cg.Key()})
768774
if err != nil {
769775
cg.compactionFailures.Inc()
770776
return false, ulid.ULID{}, err
@@ -980,7 +986,11 @@ func (cg *Group) compact(ctx context.Context, dir string, planner Planner, comp
980986
overlappingBlocks = true
981987
}
982988

983-
toCompact, err := planner.Plan(ctx, cg.metasByMinTime)
989+
var toCompact []*metadata.Meta
990+
tracing.DoInSpanWithErr(ctx, "compaction_planning", func(ctx context.Context) error {
991+
toCompact, err = planner.Plan(ctx, cg.metasByMinTime)
992+
return err
993+
})
984994
if err != nil {
985995
return false, ulid.ULID{}, errors.Wrap(err, "plan compaction")
986996
}
@@ -1008,12 +1018,20 @@ func (cg *Group) compact(ctx context.Context, dir string, planner Planner, comp
10081018
uniqueSources[s] = struct{}{}
10091019
}
10101020

1011-
if err := block.Download(ctx, cg.logger, cg.bkt, meta.ULID, bdir); err != nil {
1021+
tracing.DoInSpanWithErr(ctx, "compaction_block_download", func(ctx context.Context) error {
1022+
err = block.Download(ctx, cg.logger, cg.bkt, meta.ULID, bdir)
1023+
return err
1024+
}, opentracing.Tags{"block.id": meta.ULID})
1025+
if err != nil {
10121026
return false, ulid.ULID{}, retry(errors.Wrapf(err, "download block %s", meta.ULID))
10131027
}
10141028

10151029
// Ensure all input blocks are valid.
1016-
stats, err := block.GatherIndexHealthStats(cg.logger, filepath.Join(bdir, block.IndexFilename), meta.MinTime, meta.MaxTime)
1030+
var stats block.HealthStats
1031+
tracing.DoInSpanWithErr(ctx, "compaction_block_health_stats", func(ctx context.Context) error {
1032+
stats, err = block.GatherIndexHealthStats(cg.logger, filepath.Join(bdir, block.IndexFilename), meta.MinTime, meta.MaxTime)
1033+
return err
1034+
}, opentracing.Tags{"block.id": meta.ULID})
10171035
if err != nil {
10181036
return false, ulid.ULID{}, errors.Wrapf(err, "gather index issues for block %s", bdir)
10191037
}
@@ -1039,7 +1057,10 @@ func (cg *Group) compact(ctx context.Context, dir string, planner Planner, comp
10391057
level.Info(cg.logger).Log("msg", "downloaded and verified blocks; compacting blocks", "plan", fmt.Sprintf("%v", toCompactDirs), "duration", time.Since(begin), "duration_ms", time.Since(begin).Milliseconds())
10401058

10411059
begin = time.Now()
1042-
compID, err = comp.Compact(dir, toCompactDirs, nil)
1060+
tracing.DoInSpanWithErr(ctx, "compaction", func(ctx context.Context) error {
1061+
compID, err = comp.Compact(dir, toCompactDirs, nil)
1062+
return err
1063+
})
10431064
if err != nil {
10441065
return false, ulid.ULID{}, halt(errors.Wrapf(err, "compact blocks %v", toCompactDirs))
10451066
}
@@ -1081,7 +1102,11 @@ func (cg *Group) compact(ctx context.Context, dir string, planner Planner, comp
10811102
}
10821103

10831104
// Ensure the output block is valid.
1084-
if err := block.VerifyIndex(cg.logger, index, newMeta.MinTime, newMeta.MaxTime); !cg.acceptMalformedIndex && err != nil {
1105+
tracing.DoInSpanWithErr(ctx, "compaction_verify_index", func(ctx context.Context) error {
1106+
err = block.VerifyIndex(cg.logger, index, newMeta.MinTime, newMeta.MaxTime)
1107+
return err
1108+
})
1109+
if !cg.acceptMalformedIndex && err != nil {
10851110
return false, ulid.ULID{}, halt(errors.Wrapf(err, "invalid result block %s", bdir))
10861111
}
10871112

@@ -1095,7 +1120,11 @@ func (cg *Group) compact(ctx context.Context, dir string, planner Planner, comp
10951120

10961121
begin = time.Now()
10971122

1098-
if err := block.Upload(ctx, cg.logger, cg.bkt, bdir, cg.hashFunc); err != nil {
1123+
tracing.DoInSpanWithErr(ctx, "compaction_block_upload", func(ctx context.Context) error {
1124+
err = block.Upload(ctx, cg.logger, cg.bkt, bdir, cg.hashFunc)
1125+
return err
1126+
})
1127+
if err != nil {
10991128
return false, ulid.ULID{}, retry(errors.Wrapf(err, "upload of %s failed", compID))
11001129
}
11011130
level.Info(cg.logger).Log("msg", "uploaded block", "result_block", compID, "duration", time.Since(begin), "duration_ms", time.Since(begin).Milliseconds())
@@ -1104,7 +1133,11 @@ func (cg *Group) compact(ctx context.Context, dir string, planner Planner, comp
11041133
// into the next planning cycle.
11051134
// Eventually the block we just uploaded should get synced into the group again (including sync-delay).
11061135
for _, meta := range toCompact {
1107-
if err := cg.deleteBlock(meta.ULID, filepath.Join(dir, meta.ULID.String())); err != nil {
1136+
tracing.DoInSpanWithErr(ctx, "compaction_block_delete", func(ctx context.Context) error {
1137+
err = cg.deleteBlock(meta.ULID, filepath.Join(dir, meta.ULID.String()))
1138+
return err
1139+
}, opentracing.Tags{"block.id": meta.ULID})
1140+
if err != nil {
11081141
return false, ulid.ULID{}, retry(errors.Wrapf(err, "mark old block for deletion from bucket"))
11091142
}
11101143
cg.groupGarbageCollectedBlocks.Inc()

pkg/tracing/tracing.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import (
77
"context"
88

99
"github.com/opentracing/opentracing-go"
10+
"github.com/opentracing/opentracing-go/ext"
1011
)
1112

1213
const (
@@ -72,6 +73,18 @@ func StartSpan(ctx context.Context, operationName string, opts ...opentracing.St
7273
return span, opentracing.ContextWithSpan(ctx, span)
7374
}
7475

76+
// DoInSpanWtihErr executes function doFn inside new span with `operationName` name and hooking as child to a span found within given context if any.
77+
// It uses opentracing.Tracer propagated in context. If no found, it uses noop tracer notification.
78+
// It logs the error inside the new span created, which differentiates it from DoInSpan and DoWithSpan.
79+
func DoInSpanWithErr(ctx context.Context, operationName string, doFn func(context.Context) error, opts ...opentracing.StartSpanOption) {
80+
span, newCtx := StartSpan(ctx, operationName, opts...)
81+
defer span.Finish()
82+
err := doFn(newCtx)
83+
if err != nil {
84+
ext.LogError(span, err)
85+
}
86+
}
87+
7588
// DoInSpan executes function doFn inside new span with `operationName` name and hooking as child to a span found within given context if any.
7689
// It uses opentracing.Tracer propagated in context. If no found, it uses noop tracer notification.
7790
func DoInSpan(ctx context.Context, operationName string, doFn func(context.Context), opts ...opentracing.StartSpanOption) {

0 commit comments

Comments
 (0)