Skip to content

Commit aa95895

Browse files
authored
Fix sdk/log record attr value limit (#6032)
Fix #6004 Copy of #5997 ### Correctness From the [OTel specification](https://github.com/open-telemetry/opentelemetry-specification/blob/88bffeac48aa5eb37ac8044e931af63a0b55fe00/specification/common/README.md#attribute-limits): > - set an attribute value length limit such that for each attribute value: > - if it is a string, if it exceeds that limit (counting any character in it as 1), SDKs MUST truncate that value, so that its length is at most equal to the limit... Our current implementation truncates on number of bytes not characters. Unit tests are added/updated to validate this fix and prevent regressions. ### Performance ``` goos: linux goarch: amd64 pkg: go.opentelemetry.io/otel/sdk/log cpu: Intel(R) Core(TM) i7-8550U CPU @ 1.80GHz โ”‚ commit-e9c7aac2(old).txt โ”‚ commit-878043b9(new).txt โ”‚ โ”‚ sec/op โ”‚ sec/op vs base โ”‚ Truncate/Unlimited-8 0.8323n ยฑ 3% 0.7367n ยฑ 3% -11.49% (p=0.000 n=10) Truncate/Zero-8 1.923n ยฑ 32% 1.359n ยฑ 2% -29.34% (p=0.000 n=10) Truncate/Short-8 14.6050n ยฑ 4% 0.8785n ยฑ 1% -93.98% (p=0.000 n=10) Truncate/ASCII-8 8.205n ยฑ 2% 3.601n ยฑ 7% -56.12% (p=0.000 n=10) Truncate/ValidUTF-8-8 11.335n ยฑ 1% 7.206n ยฑ 1% -36.43% (p=0.000 n=10) Truncate/InvalidUTF-8-8 58.26n ยฑ 1% 36.61n ยฑ 1% -37.17% (p=0.000 n=10) Truncate/MixedUTF-8-8 81.16n ยฑ 1% 52.30n ยฑ 1% -35.56% (p=0.000 n=10) geomean 10.04n 4.601n -54.16% โ”‚ commit-e9c7aac2(old).txt โ”‚ commit-878043b9(new).txt โ”‚ โ”‚ B/op โ”‚ B/op vs base โ”‚ Truncate/Unlimited-8 0.000 ยฑ 0% 0.000 ยฑ 0% ~ (p=1.000 n=10) ยน Truncate/Zero-8 0.000 ยฑ 0% 0.000 ยฑ 0% ~ (p=1.000 n=10) ยน Truncate/Short-8 0.000 ยฑ 0% 0.000 ยฑ 0% ~ (p=1.000 n=10) ยน Truncate/ASCII-8 0.000 ยฑ 0% 0.000 ยฑ 0% ~ (p=1.000 n=10) ยน Truncate/ValidUTF-8-8 0.000 ยฑ 0% 0.000 ยฑ 0% ~ (p=1.000 n=10) ยน Truncate/InvalidUTF-8-8 16.00 ยฑ 0% 16.00 ยฑ 0% ~ (p=1.000 n=10) ยน Truncate/MixedUTF-8-8 32.00 ยฑ 0% 32.00 ยฑ 0% ~ (p=1.000 n=10) ยน geomean ยฒ +0.00% ยฒ ยน all samples are equal ยฒ summaries must be >0 to compute geomean โ”‚ commit-e9c7aac2(old).txt โ”‚ commit-878043b9(new).txt โ”‚ โ”‚ allocs/op โ”‚ allocs/op vs base โ”‚ Truncate/Unlimited-8 0.000 ยฑ 0% 0.000 ยฑ 0% ~ (p=1.000 n=10) ยน Truncate/Zero-8 0.000 ยฑ 0% 0.000 ยฑ 0% ~ (p=1.000 n=10) ยน Truncate/Short-8 0.000 ยฑ 0% 0.000 ยฑ 0% ~ (p=1.000 n=10) ยน Truncate/ASCII-8 0.000 ยฑ 0% 0.000 ยฑ 0% ~ (p=1.000 n=10) ยน Truncate/ValidUTF-8-8 0.000 ยฑ 0% 0.000 ยฑ 0% ~ (p=1.000 n=10) ยน Truncate/InvalidUTF-8-8 1.000 ยฑ 0% 1.000 ยฑ 0% ~ (p=1.000 n=10) ยน Truncate/MixedUTF-8-8 1.000 ยฑ 0% 1.000 ยฑ 0% ~ (p=1.000 n=10) ยน geomean ยฒ +0.00% ยฒ ยน all samples are equal ยฒ summaries must be >0 to compute geomean ```
1 parent 58fdf2a commit aa95895

File tree

3 files changed

+188
-76
lines changed

3 files changed

+188
-76
lines changed

โ€ŽCHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm
3333
- Fix inconsistent request body closing in `go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp`. (#5954)
3434
- Fix invalid exemplar keys in `go.opentelemetry.io/otel/exporters/prometheus`. (#5995)
3535
- Fix attribute value truncation in `go.opentelemetry.io/otel/sdk/trace`. (#5997)
36+
- Fix attribute value truncation in `go.opentelemetry.io/otel/sdk/log`. (#6032)
3637

3738
<!-- Released section -->
3839
<!-- Don't change this section unless doing release -->

โ€Žsdk/log/record.go

+66-28
Original file line numberDiff line numberDiff line change
@@ -406,7 +406,7 @@ func (r *Record) applyValueLimits(val log.Value) log.Value {
406406
case log.KindString:
407407
s := val.AsString()
408408
if len(s) > r.attributeValueLengthLimit {
409-
val = log.StringValue(truncate(s, r.attributeValueLengthLimit))
409+
val = log.StringValue(truncate(r.attributeValueLengthLimit, s))
410410
}
411411
case log.KindSlice:
412412
sl := val.AsSlice()
@@ -427,40 +427,78 @@ func (r *Record) applyValueLimits(val log.Value) log.Value {
427427
return val
428428
}
429429

430-
// truncate returns a copy of str truncated to have a length of at most n
431-
// characters. If the length of str is less than n, str itself is returned.
430+
// truncate returns a truncated version of s such that it contains less than
431+
// the limit number of characters. Truncation is applied by returning the limit
432+
// number of valid characters contained in s.
432433
//
433-
// The truncate of str ensures that no valid UTF-8 code point is split. The
434-
// copy returned will be less than n if a characters straddles the length
435-
// limit.
434+
// If limit is negative, it returns the original string.
436435
//
437-
// No truncation is performed if n is less than zero.
438-
func truncate(str string, n int) string {
439-
if n < 0 {
440-
return str
436+
// UTF-8 is supported. When truncating, all invalid characters are dropped
437+
// before applying truncation.
438+
//
439+
// If s already contains less than the limit number of bytes, it is returned
440+
// unchanged. No invalid characters are removed.
441+
func truncate(limit int, s string) string {
442+
// This prioritize performance in the following order based on the most
443+
// common expected use-cases.
444+
//
445+
// - Short values less than the default limit (128).
446+
// - Strings with valid encodings that exceed the limit.
447+
// - No limit.
448+
// - Strings with invalid encodings that exceed the limit.
449+
if limit < 0 || len(s) <= limit {
450+
return s
441451
}
442452

443-
// cut returns a copy of the s truncated to not exceed a length of n. If
444-
// invalid UTF-8 is encountered, s is returned with false. Otherwise, the
445-
// truncated copy will be returned with true.
446-
cut := func(s string) (string, bool) {
447-
var i int
448-
for i = 0; i < n; {
449-
r, size := utf8.DecodeRuneInString(s[i:])
450-
if r == utf8.RuneError {
451-
return s, false
453+
// Optimistically, assume all valid UTF-8.
454+
var b strings.Builder
455+
count := 0
456+
for i, c := range s {
457+
if c != utf8.RuneError {
458+
count++
459+
if count > limit {
460+
return s[:i]
452461
}
453-
if i+size > n {
454-
break
455-
}
456-
i += size
462+
continue
463+
}
464+
465+
_, size := utf8.DecodeRuneInString(s[i:])
466+
if size == 1 {
467+
// Invalid encoding.
468+
b.Grow(len(s) - 1)
469+
_, _ = b.WriteString(s[:i])
470+
s = s[i:]
471+
break
457472
}
458-
return s[:i], true
459473
}
460474

461-
cp, ok := cut(str)
462-
if !ok {
463-
cp, _ = cut(strings.ToValidUTF8(str, ""))
475+
// Fast-path, no invalid input.
476+
if b.Cap() == 0 {
477+
return s
464478
}
465-
return cp
479+
480+
// Truncate while validating UTF-8.
481+
for i := 0; i < len(s) && count < limit; {
482+
c := s[i]
483+
if c < utf8.RuneSelf {
484+
// Optimization for single byte runes (common case).
485+
_ = b.WriteByte(c)
486+
i++
487+
count++
488+
continue
489+
}
490+
491+
_, size := utf8.DecodeRuneInString(s[i:])
492+
if size == 1 {
493+
// We checked for all 1-byte runes above, this is a RuneError.
494+
i++
495+
continue
496+
}
497+
498+
_, _ = b.WriteString(s[i : i+size])
499+
i += size
500+
count++
501+
}
502+
503+
return b.String()
466504
}

โ€Žsdk/log/record_test.go

+121-48
Original file line numberDiff line numberDiff line change
@@ -570,74 +570,147 @@ func assertKV(t *testing.T, r Record, kv log.KeyValue) {
570570
}
571571

572572
func TestTruncate(t *testing.T) {
573-
testcases := []struct {
574-
input, want string
575-
limit int
573+
type group struct {
574+
limit int
575+
input string
576+
expected string
577+
}
578+
579+
tests := []struct {
580+
name string
581+
groups []group
576582
}{
583+
// Edge case: limit is negative, no truncation should occur
577584
{
578-
input: "value",
579-
want: "value",
580-
limit: -1,
581-
},
582-
{
583-
input: "value",
584-
want: "",
585-
limit: 0,
586-
},
587-
{
588-
input: "value",
589-
want: "v",
590-
limit: 1,
591-
},
592-
{
593-
input: "value",
594-
want: "va",
595-
limit: 2,
585+
name: "NoTruncation",
586+
groups: []group{
587+
{-1, "No truncation!", "No truncation!"},
588+
},
596589
},
590+
591+
// Edge case: string is already shorter than the limit, no truncation
592+
// should occur
597593
{
598-
input: "value",
599-
want: "val",
600-
limit: 3,
594+
name: "ShortText",
595+
groups: []group{
596+
{10, "Short text", "Short text"},
597+
{15, "Short text", "Short text"},
598+
{100, "Short text", "Short text"},
599+
},
601600
},
601+
602+
// Edge case: truncation happens with ASCII characters only
602603
{
603-
input: "value",
604-
want: "valu",
605-
limit: 4,
604+
name: "ASCIIOnly",
605+
groups: []group{
606+
{1, "Hello World!", "H"},
607+
{5, "Hello World!", "Hello"},
608+
{12, "Hello World!", "Hello World!"},
609+
},
606610
},
611+
612+
// Truncation including multi-byte characters (UTF-8)
607613
{
608-
input: "value",
609-
want: "value",
610-
limit: 5,
614+
name: "ValidUTF-8",
615+
groups: []group{
616+
{7, "Hello, ไธ–็•Œ", "Hello, "},
617+
{8, "Hello, ไธ–็•Œ", "Hello, ไธ–"},
618+
{2, "ใ“ใ‚“ใซใกใฏ", "ใ“ใ‚“"},
619+
{3, "ใ“ใ‚“ใซใกใฏ", "ใ“ใ‚“ใซ"},
620+
{5, "ใ“ใ‚“ใซใกใฏ", "ใ“ใ‚“ใซใกใฏ"},
621+
{12, "ใ“ใ‚“ใซใกใฏ", "ใ“ใ‚“ใซใกใฏ"},
622+
},
611623
},
624+
625+
// Truncation with invalid UTF-8 characters
612626
{
613-
input: "value",
614-
want: "value",
615-
limit: 6,
627+
name: "InvalidUTF-8",
628+
groups: []group{
629+
{11, "Invalid\x80text", "Invalidtext"},
630+
// Do not modify invalid text if equal to limit.
631+
{11, "Valid text\x80", "Valid text\x80"},
632+
// Do not modify invalid text if under limit.
633+
{15, "Valid text\x80", "Valid text\x80"},
634+
{5, "Hello\x80World", "Hello"},
635+
{11, "Hello\x80World\x80!", "HelloWorld!"},
636+
{15, "Hello\x80World\x80Test", "HelloWorldTest"},
637+
{15, "Hello\x80\x80\x80World\x80Test", "HelloWorldTest"},
638+
{15, "\x80\x80\x80Hello\x80\x80\x80World\x80Test\x80\x80", "HelloWorldTest"},
639+
},
616640
},
641+
642+
// Truncation with mixed validn and invalid UTF-8 characters
617643
{
618-
input: "โ‚ฌโ‚ฌโ‚ฌโ‚ฌ", // 3 bytes each
619-
want: "โ‚ฌโ‚ฌโ‚ฌ",
620-
limit: 10,
644+
name: "MixedUTF-8",
645+
groups: []group{
646+
{6, "โ‚ฌ"[0:2] + "helloโ‚ฌโ‚ฌ", "helloโ‚ฌ"},
647+
{6, "โ‚ฌ" + "โ‚ฌ"[0:2] + "hello", "โ‚ฌhello"},
648+
{11, "Valid text\x80๐Ÿ“œ", "Valid text๐Ÿ“œ"},
649+
{11, "Valid text๐Ÿ“œ\x80", "Valid text๐Ÿ“œ"},
650+
{14, "๐Ÿ˜Š Hello\x80World๐ŸŒ๐Ÿš€", "๐Ÿ˜Š HelloWorld๐ŸŒ๐Ÿš€"},
651+
{14, "๐Ÿ˜Š\x80 Hello\x80World๐ŸŒ๐Ÿš€", "๐Ÿ˜Š HelloWorld๐ŸŒ๐Ÿš€"},
652+
{14, "๐Ÿ˜Š\x80 Hello\x80World๐ŸŒ\x80๐Ÿš€", "๐Ÿ˜Š HelloWorld๐ŸŒ๐Ÿš€"},
653+
{14, "๐Ÿ˜Š\x80 Hello\x80World๐ŸŒ\x80๐Ÿš€\x80", "๐Ÿ˜Š HelloWorld๐ŸŒ๐Ÿš€"},
654+
{14, "\x80๐Ÿ˜Š\x80 Hello\x80World๐ŸŒ\x80๐Ÿš€\x80", "๐Ÿ˜Š HelloWorld๐ŸŒ๐Ÿš€"},
655+
},
621656
},
657+
658+
// Edge case: empty string, should return empty string
622659
{
623-
input: "โ‚ฌ"[0:2] + "helloโ‚ฌโ‚ฌ", // corrupted first rune, then over limit
624-
want: "helloโ‚ฌ",
625-
limit: 10,
660+
name: "Empty",
661+
groups: []group{
662+
{5, "", ""},
663+
},
626664
},
665+
666+
// Edge case: limit is 0, should return an empty string
627667
{
628-
input: "โ‚ฌ"[0:2] + "hello", // corrupted first rune, then not over limit
629-
want: "hello",
630-
limit: 10,
668+
name: "Zero",
669+
groups: []group{
670+
{0, "Some text", ""},
671+
{0, "", ""},
672+
},
631673
},
632674
}
633675

634-
for _, tc := range testcases {
635-
name := fmt.Sprintf("%s/%d", tc.input, tc.limit)
636-
t.Run(name, func(t *testing.T) {
637-
t.Log(tc.input, len(tc.input), tc.limit)
638-
assert.Equal(t, tc.want, truncate(tc.input, tc.limit))
639-
})
676+
for _, tt := range tests {
677+
for _, g := range tt.groups {
678+
t.Run(tt.name, func(t *testing.T) {
679+
t.Parallel()
680+
681+
got := truncate(g.limit, g.input)
682+
assert.Equalf(
683+
t, g.expected, got,
684+
"input: %q([]rune%v))\ngot: %q([]rune%v)\nwant %q([]rune%v)",
685+
g.input, []rune(g.input),
686+
got, []rune(got),
687+
g.expected, []rune(g.expected),
688+
)
689+
})
690+
}
691+
}
692+
}
693+
694+
func BenchmarkTruncate(b *testing.B) {
695+
run := func(limit int, input string) func(b *testing.B) {
696+
return func(b *testing.B) {
697+
b.ReportAllocs()
698+
b.RunParallel(func(pb *testing.PB) {
699+
var out string
700+
for pb.Next() {
701+
out = truncate(limit, input)
702+
}
703+
_ = out
704+
})
705+
}
640706
}
707+
b.Run("Unlimited", run(-1, "hello ๐Ÿ˜Š world ๐ŸŒ๐Ÿš€"))
708+
b.Run("Zero", run(0, "Some text"))
709+
b.Run("Short", run(10, "Short Text"))
710+
b.Run("ASCII", run(5, "Hello, World!"))
711+
b.Run("ValidUTF-8", run(10, "hello ๐Ÿ˜Š world ๐ŸŒ๐Ÿš€"))
712+
b.Run("InvalidUTF-8", run(6, "โ‚ฌ"[0:2]+"helloโ‚ฌโ‚ฌ"))
713+
b.Run("MixedUTF-8", run(14, "\x80๐Ÿ˜Š\x80 Hello\x80World๐ŸŒ\x80๐Ÿš€\x80"))
641714
}
642715

643716
func BenchmarkWalkAttributes(b *testing.B) {

0 commit comments

Comments
ย (0)