Skip to content

Commit 8fbaa97

Browse files
boekkooi-impossibleclouddmathieupellared
authored
Reduce newEvictedQueueLink and newEvictedQueueEvent memory allocations (#5858)
Good day, While doing some profile with pprof on one of our services I notices that `sync.OnceFunc` was allocating a nice amount of objects. These `sync.OnceFunc` calls where done by `newEvictedQueueEvent` and `newEvictedQueueLink`. So to avoid these extra allocation I created this PR which replaces the `sync.OnceFunc` with `sync.Once` which is now part of the evictedQueue. This resulted in the following benchstat result (commit baad07e): ``` goos: linux goarch: amd64 pkg: go.opentelemetry.io/otel/sdk/trace cpu: 11th Gen Intel(R) Core(TM) i5-11400H @ 2.70GHz │ 1727937489-attr-old.txt │ evic-new1728018933.txt │ │ sec/op │ sec/op vs base │ RecordingSpanSetAttributes/WithLimit/false-12 6.579µ ± 9% 5.875µ ± 8% -10.71% (p=0.000 n=10) RecordingSpanSetAttributes/WithLimit/true-12 14.27µ ± 10% 11.82µ ± 16% -17.18% (p=0.009 n=10) SpanEnd-12 63.44n ± 0% 75.97n ± 2% +19.75% (p=0.000 n=10) TraceStart/with_a_simple_span-12 728.0n ± 3% 412.7n ± 1% -43.31% (p=0.000 n=10) TraceStart/with_several_links-12 881.5n ± 1% 553.2n ± 3% -37.25% (p=0.000 n=10) TraceStart/with_attributes-12 916.5n ± 2% 598.4n ± 2% -34.70% (p=0.000 n=10) SpanProcessorOnEnd/batch:_10,_spans:_10-12 151.9n ± 3% 144.1n ± 3% -5.13% (p=0.000 n=10) SpanProcessorOnEnd/batch:_10,_spans:_100-12 1.497µ ± 2% 1.435µ ± 2% -4.14% (p=0.000 n=10) SpanProcessorOnEnd/batch:_100,_spans:_10-12 148.5n ± 2% 143.0n ± 2% -3.67% (p=0.000 n=10) SpanProcessorOnEnd/batch:_100,_spans:_100-12 1.481µ ± 3% 1.442µ ± 4% -2.67% (p=0.022 n=10) SpanProcessorVerboseLogging-12 9.376µ ± 1% 6.662µ ± 2% -28.94% (p=0.000 n=10) SpanLimits/AttributeValueLengthLimit-12 7.221µ ± 4% 6.605µ ± 3% -8.54% (p=0.002 n=10) SpanLimits/AttributeCountLimit-12 6.300µ ± 6% 6.091µ ± 6% -3.30% (p=0.007 n=10) SpanLimits/EventCountLimit-12 5.921µ ± 4% 5.506µ ± 7% -7.02% (p=0.011 n=10) SpanLimits/LinkCountLimit-12 6.073µ ± 6% 5.423µ ± 3% -10.71% (p=0.000 n=10) SpanLimits/AttributePerEventCountLimit-12 6.455µ ± 3% 5.294µ ± 9% -17.98% (p=0.000 n=10) SpanLimits/AttributePerLinkCountLimit-12 6.263µ ± 4% 5.850µ ± 7% -6.59% (p=0.000 n=10) SpanSetAttributesOverCapacity-12 1.683µ ± 1% 1.391µ ± 0% -17.35% (p=0.000 n=10) StartEndSpan/AlwaysSample-12 803.0n ± 4% 531.4n ± 5% -33.82% (p=0.000 n=10) StartEndSpan/NeverSample-12 216.0n ± 2% 212.7n ± 2% ~ (p=0.055 n=10) SpanWithAttributes_4/AlwaysSample-12 1216.5n ± 2% 877.6n ± 9% -27.85% (p=0.000 n=10) SpanWithAttributes_4/NeverSample-12 370.4n ± 5% 374.5n ± 3% ~ (p=0.197 n=10) SpanWithAttributes_8/AlwaysSample-12 1.494µ ± 4% 1.117µ ± 4% -25.27% (p=0.000 n=10) SpanWithAttributes_8/NeverSample-12 477.1n ± 3% 475.1n ± 7% ~ (p=0.739 n=10) SpanWithAttributes_all/AlwaysSample-12 1310.5n ± 5% 971.5n ± 6% -25.87% (p=0.000 n=10) SpanWithAttributes_all/NeverSample-12 388.0n ± 5% 389.1n ± 6% ~ (p=0.699 n=10) SpanWithAttributes_all_2x/AlwaysSample-12 1.664µ ± 2% 1.236µ ± 3% -25.72% (p=0.000 n=10) SpanWithAttributes_all_2x/NeverSample-12 522.7n ± 7% 527.2n ± 9% ~ (p=0.912 n=10) SpanWithEvents_4/AlwaysSample-12 1.411µ ± 4% 1.058µ ± 2% -24.99% (p=0.000 n=10) SpanWithEvents_4/NeverSample-12 218.4n ± 3% 218.9n ± 2% ~ (p=0.971 n=10) SpanWithEvents_8/AlwaysSample-12 1.997µ ± 5% 1.657µ ± 4% -17.03% (p=0.000 n=10) SpanWithEvents_8/NeverSample-12 226.6n ± 3% 221.7n ± 2% -2.14% (p=0.015 n=10) SpanWithEvents_WithStackTrace/AlwaysSample-12 1016.5n ± 2% 712.5n ± 2% -29.91% (p=0.000 n=10) SpanWithEvents_WithStackTrace/NeverSample-12 242.0n ± 3% 241.0n ± 1% ~ (p=0.254 n=10) SpanWithEvents_WithTimestamp/AlwaysSample-12 1019.5n ± 3% 713.2n ± 6% -30.04% (p=0.000 n=10) SpanWithEvents_WithTimestamp/NeverSample-12 276.4n ± 3% 276.6n ± 2% ~ (p=0.723 n=10) TraceID_DotString-12 71.94n ± 3% 75.71n ± 3% +5.23% (p=0.001 n=10) SpanID_DotString-12 50.88n ± 3% 52.35n ± 1% +2.88% (p=0.001 n=10) geomean 952.8n 822.2n -13.71% │ 1727937489-attr-old.txt │ evic-new1728018933.txt │ │ B/op │ B/op vs base │ RecordingSpanSetAttributes/WithLimit/false-12 7.062Ki ± 0% 6.891Ki ± 0% -2.43% (p=0.000 n=10) RecordingSpanSetAttributes/WithLimit/true-12 7.564Ki ± 0% 7.393Ki ± 0% -2.27% (p=0.000 n=10) SpanEnd-12 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ TraceStart/with_a_simple_span-12 704.0 ± 0% 528.0 ± 0% -25.00% (p=0.000 n=10) TraceStart/with_several_links-12 880.0 ± 0% 704.0 ± 0% -20.00% (p=0.000 n=10) TraceStart/with_attributes-12 960.0 ± 0% 784.0 ± 0% -18.33% (p=0.000 n=10) SpanProcessorOnEnd/batch:_10,_spans:_10-12 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ SpanProcessorOnEnd/batch:_10,_spans:_100-12 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ SpanProcessorOnEnd/batch:_100,_spans:_10-12 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ SpanProcessorOnEnd/batch:_100,_spans:_100-12 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ SpanProcessorVerboseLogging-12 10.938Ki ± 0% 9.219Ki ± 0% -15.71% (p=0.000 n=10) SpanLimits/AttributeValueLengthLimit-12 10.76Ki ± 0% 10.59Ki ± 0% -1.60% (p=0.000 n=10) SpanLimits/AttributeCountLimit-12 10.016Ki ± 0% 9.844Ki ± 0% -1.72% (p=0.000 n=10) SpanLimits/EventCountLimit-12 9.594Ki ± 0% 9.422Ki ± 0% -1.79% (p=0.000 n=10) SpanLimits/LinkCountLimit-12 9.203Ki ± 0% 9.031Ki ± 0% -1.87% (p=0.000 n=10) SpanLimits/AttributePerEventCountLimit-12 10.64Ki ± 0% 10.47Ki ± 0% -1.62% (p=0.000 n=10) SpanLimits/AttributePerLinkCountLimit-12 10.64Ki ± 0% 10.47Ki ± 0% -1.62% (p=0.000 n=10) SpanSetAttributesOverCapacity-12 768.0 ± 0% 592.0 ± 0% -22.92% (p=0.000 n=10) StartEndSpan/AlwaysSample-12 704.0 ± 0% 528.0 ± 0% -25.00% (p=0.000 n=10) StartEndSpan/NeverSample-12 144.0 ± 0% 144.0 ± 0% ~ (p=1.000 n=10) ¹ SpanWithAttributes_4/AlwaysSample-12 1.188Ki ± 0% 1.016Ki ± 0% -14.47% (p=0.000 n=10) SpanWithAttributes_4/NeverSample-12 400.0 ± 0% 400.0 ± 0% ~ (p=1.000 n=10) ¹ SpanWithAttributes_8/AlwaysSample-12 1.688Ki ± 0% 1.516Ki ± 0% -10.19% (p=0.000 n=10) SpanWithAttributes_8/NeverSample-12 656.0 ± 0% 656.0 ± 0% ~ (p=1.000 n=10) ¹ SpanWithAttributes_all/AlwaysSample-12 1.312Ki ± 0% 1.141Ki ± 0% -13.10% (p=0.000 n=10) SpanWithAttributes_all/NeverSample-12 464.0 ± 0% 464.0 ± 0% ~ (p=1.000 n=10) ¹ SpanWithAttributes_all_2x/AlwaysSample-12 2.062Ki ± 0% 1.891Ki ± 0% -8.33% (p=0.000 n=10) SpanWithAttributes_all_2x/NeverSample-12 848.0 ± 0% 848.0 ± 0% ~ (p=1.000 n=10) ¹ SpanWithEvents_4/AlwaysSample-12 1.188Ki ± 0% 1.016Ki ± 0% -14.47% (p=0.000 n=10) SpanWithEvents_4/NeverSample-12 144.0 ± 0% 144.0 ± 0% ~ (p=1.000 n=10) ¹ SpanWithEvents_8/AlwaysSample-12 1.812Ki ± 0% 1.641Ki ± 0% -9.48% (p=0.000 n=10) SpanWithEvents_8/NeverSample-12 144.0 ± 0% 144.0 ± 0% ~ (p=1.000 n=10) ¹ SpanWithEvents_WithStackTrace/AlwaysSample-12 800.0 ± 0% 624.0 ± 0% -22.00% (p=0.000 n=10) SpanWithEvents_WithStackTrace/NeverSample-12 160.0 ± 0% 160.0 ± 0% ~ (p=1.000 n=10) ¹ SpanWithEvents_WithTimestamp/AlwaysSample-12 824.0 ± 0% 648.0 ± 0% -21.36% (p=0.000 n=10) SpanWithEvents_WithTimestamp/NeverSample-12 184.0 ± 0% 184.0 ± 0% ~ (p=1.000 n=10) ¹ TraceID_DotString-12 64.00 ± 0% 64.00 ± 0% ~ (p=1.000 n=10) ¹ SpanID_DotString-12 32.00 ± 0% 32.00 ± 0% ~ (p=1.000 n=10) ¹ geomean ² -7.15% ² ¹ all samples are equal ² summaries must be >0 to compute geomean │ 1727937489-attr-old.txt │ evic-new1728018933.txt │ │ allocs/op │ allocs/op vs base │ RecordingSpanSetAttributes/WithLimit/false-12 15.000 ± 0% 3.000 ± 0% -80.00% (p=0.000 n=10) RecordingSpanSetAttributes/WithLimit/true-12 20.000 ± 0% 8.000 ± 0% -60.00% (p=0.000 n=10) SpanEnd-12 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ TraceStart/with_a_simple_span-12 14.000 ± 0% 2.000 ± 0% -85.71% (p=0.000 n=10) TraceStart/with_several_links-12 15.000 ± 0% 3.000 ± 0% -80.00% (p=0.000 n=10) TraceStart/with_attributes-12 16.000 ± 0% 4.000 ± 0% -75.00% (p=0.000 n=10) SpanProcessorOnEnd/batch:_10,_spans:_10-12 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ SpanProcessorOnEnd/batch:_10,_spans:_100-12 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ SpanProcessorOnEnd/batch:_100,_spans:_10-12 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ SpanProcessorOnEnd/batch:_100,_spans:_100-12 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ SpanProcessorVerboseLogging-12 155.00 ± 0% 35.00 ± 0% -77.42% (p=0.000 n=10) SpanLimits/AttributeValueLengthLimit-12 54.00 ± 0% 42.00 ± 0% -22.22% (p=0.000 n=10) SpanLimits/AttributeCountLimit-12 50.00 ± 0% 38.00 ± 0% -24.00% (p=0.000 n=10) SpanLimits/EventCountLimit-12 47.00 ± 0% 35.00 ± 0% -25.53% (p=0.000 n=10) SpanLimits/LinkCountLimit-12 47.00 ± 0% 35.00 ± 0% -25.53% (p=0.000 n=10) SpanLimits/AttributePerEventCountLimit-12 50.00 ± 0% 38.00 ± 0% -24.00% (p=0.000 n=10) SpanLimits/AttributePerLinkCountLimit-12 50.00 ± 0% 38.00 ± 0% -24.00% (p=0.000 n=10) SpanSetAttributesOverCapacity-12 15.000 ± 0% 3.000 ± 0% -80.00% (p=0.000 n=10) StartEndSpan/AlwaysSample-12 14.000 ± 0% 2.000 ± 0% -85.71% (p=0.000 n=10) StartEndSpan/NeverSample-12 2.000 ± 0% 2.000 ± 0% ~ (p=1.000 n=10) ¹ SpanWithAttributes_4/AlwaysSample-12 16.000 ± 0% 4.000 ± 0% -75.00% (p=0.000 n=10) SpanWithAttributes_4/NeverSample-12 3.000 ± 0% 3.000 ± 0% ~ (p=1.000 n=10) ¹ SpanWithAttributes_8/AlwaysSample-12 16.000 ± 0% 4.000 ± 0% -75.00% (p=0.000 n=10) SpanWithAttributes_8/NeverSample-12 3.000 ± 0% 3.000 ± 0% ~ (p=1.000 n=10) ¹ SpanWithAttributes_all/AlwaysSample-12 16.000 ± 0% 4.000 ± 0% -75.00% (p=0.000 n=10) SpanWithAttributes_all/NeverSample-12 3.000 ± 0% 3.000 ± 0% ~ (p=1.000 n=10) ¹ SpanWithAttributes_all_2x/AlwaysSample-12 16.000 ± 0% 4.000 ± 0% -75.00% (p=0.000 n=10) SpanWithAttributes_all_2x/NeverSample-12 3.000 ± 0% 3.000 ± 0% ~ (p=1.000 n=10) ¹ SpanWithEvents_4/AlwaysSample-12 17.000 ± 0% 5.000 ± 0% -70.59% (p=0.000 n=10) SpanWithEvents_4/NeverSample-12 2.000 ± 0% 2.000 ± 0% ~ (p=1.000 n=10) ¹ SpanWithEvents_8/AlwaysSample-12 18.000 ± 0% 6.000 ± 0% -66.67% (p=0.000 n=10) SpanWithEvents_8/NeverSample-12 2.000 ± 0% 2.000 ± 0% ~ (p=1.000 n=10) ¹ SpanWithEvents_WithStackTrace/AlwaysSample-12 16.000 ± 0% 4.000 ± 0% -75.00% (p=0.000 n=10) SpanWithEvents_WithStackTrace/NeverSample-12 3.000 ± 0% 3.000 ± 0% ~ (p=1.000 n=10) ¹ SpanWithEvents_WithTimestamp/AlwaysSample-12 17.000 ± 0% 5.000 ± 0% -70.59% (p=0.000 n=10) SpanWithEvents_WithTimestamp/NeverSample-12 4.000 ± 0% 4.000 ± 0% ~ (p=1.000 n=10) ¹ TraceID_DotString-12 2.000 ± 0% 2.000 ± 0% ~ (p=1.000 n=10) ¹ SpanID_DotString-12 2.000 ± 0% 2.000 ± 0% ~ (p=1.000 n=10) ¹ geomean ² -47.73% ² ¹ all samples are equal ² summaries must be >0 to compute geomean ``` --------- Co-authored-by: Damien Mathieu <[email protected]> Co-authored-by: Robert Pająk <[email protected]>
1 parent 4a911f9 commit 8fbaa97

File tree

3 files changed

+31
-14
lines changed

3 files changed

+31
-14
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm
1919
- `FilterProcessor.Enabled` in `go.opentelemetry.io/otel/sdk/log/internal/x` now accepts `EnabledParameters` instead of `Record`. (#5791)
2020
- The `Record` type in `go.opentelemetry.io/otel/log` is no longer comparable. (#5847)
2121
- Performance improvements for the trace SDK `SetAttributes` method in `Span`. (#5864)
22+
- Reduce memory allocations for the `Event` and `Link` lists in `Span`. (#5858)
2223

2324
### Deprecated
2425

sdk/trace/evictedqueue.go

+13-8
Original file line numberDiff line numberDiff line change
@@ -12,25 +12,26 @@ import (
1212

1313
// evictedQueue is a FIFO queue with a configurable capacity.
1414
type evictedQueue[T any] struct {
15-
queue []T
16-
capacity int
17-
droppedCount int
18-
logDropped func()
15+
queue []T
16+
capacity int
17+
droppedCount int
18+
logDroppedMsg string
19+
logDroppedOnce sync.Once
1920
}
2021

2122
func newEvictedQueueEvent(capacity int) evictedQueue[Event] {
2223
// Do not pre-allocate queue, do this lazily.
2324
return evictedQueue[Event]{
24-
capacity: capacity,
25-
logDropped: sync.OnceFunc(func() { global.Warn("limit reached: dropping trace trace.Event") }),
25+
capacity: capacity,
26+
logDroppedMsg: "limit reached: dropping trace trace.Event",
2627
}
2728
}
2829

2930
func newEvictedQueueLink(capacity int) evictedQueue[Link] {
3031
// Do not pre-allocate queue, do this lazily.
3132
return evictedQueue[Link]{
32-
capacity: capacity,
33-
logDropped: sync.OnceFunc(func() { global.Warn("limit reached: dropping trace trace.Link") }),
33+
capacity: capacity,
34+
logDroppedMsg: "limit reached: dropping trace trace.Link",
3435
}
3536
}
3637

@@ -53,6 +54,10 @@ func (eq *evictedQueue[T]) add(value T) {
5354
eq.queue = append(eq.queue, value)
5455
}
5556

57+
func (eq *evictedQueue[T]) logDropped() {
58+
eq.logDroppedOnce.Do(func() { global.Warn(eq.logDroppedMsg) })
59+
}
60+
5661
// copy returns a copy of the evictedQueue.
5762
func (eq *evictedQueue[T]) copy() []T {
5863
return slices.Clone(eq.queue)

sdk/trace/evictedqueue_test.go

+17-6
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,11 @@ import (
77
"reflect"
88
"testing"
99

10+
"github.com/go-logr/logr"
11+
"github.com/go-logr/logr/funcr"
1012
"github.com/stretchr/testify/assert"
13+
14+
"go.opentelemetry.io/otel/internal/global"
1115
)
1216

1317
func init() {
@@ -36,18 +40,25 @@ func TestCopy(t *testing.T) {
3640

3741
func TestDropCount(t *testing.T) {
3842
q := newEvictedQueueEvent(3)
39-
var called bool
40-
q.logDropped = func() { called = true }
43+
44+
var called int
45+
t.Cleanup(func(l logr.Logger) func() {
46+
return func() { global.SetLogger(l) }
47+
}(global.GetLogger()))
48+
global.SetLogger(funcr.New(func(prefix, args string) {
49+
called++
50+
}, funcr.Options{Verbosity: 1}))
4151

4252
q.add(Event{Name: "value1"})
43-
assert.False(t, called, `"value1" logged as dropped`)
53+
assert.Equal(t, 0, called, `"value1" logged as dropped`)
4454
q.add(Event{Name: "value2"})
45-
assert.False(t, called, `"value2" logged as dropped`)
55+
assert.Equal(t, 0, called, `"value2" logged as dropped`)
4656
q.add(Event{Name: "value3"})
47-
assert.False(t, called, `"value3" logged as dropped`)
57+
assert.Equal(t, 0, called, `"value3" logged as dropped`)
4858
q.add(Event{Name: "value1"})
49-
assert.True(t, called, `"value2" not logged as dropped`)
59+
assert.Equal(t, 1, called, `"value2" not logged as dropped`)
5060
q.add(Event{Name: "value4"})
61+
assert.Equal(t, 1, called, `"value4" logged as dropped`)
5162
if wantLen, gotLen := 3, len(q.queue); wantLen != gotLen {
5263
t.Errorf("got queue length %d want %d", gotLen, wantLen)
5364
}

0 commit comments

Comments
 (0)