Skip to content

Commit b8efeeb

Browse files
authored
[pkg/stanza/fileconsumer] Reduce noise in benchmark (#31516)
This PR reworks the end-to-end benchmark in `pkg/stanza/fileconsumer`. The primary goal is to surface a more accurate representation of performance within the benchmark. Several factors were leading to artificial delays which I believe may have drowned out actual impacts. Three significant sources of noise were mitigated: 1. Every log read was passed to a channel. This caused us to measure lot of time waiting for channels. Now, we only signal when the file is fully consumed. 2. Every line was generated on the fly. Now we generate a set of lines ahead of time and write these over and over. 3. Each line was written to file independently. Now we group the pre-generated lines write them all at once. We also write longer lines so that much more content is written per call. The overall result is a substantially clearer CPU profile and a slightly clearer memory profile. CPU Profile Before <img width="860" alt="image" src="https://github.com/open-telemetry/opentelemetry-collector-contrib/assets/5255616/a8bcd396-8c4a-4dcb-8c0b-0a3cbc852a86"> CPU Profile After <img width="859" alt="image" src="https://github.com/open-telemetry/opentelemetry-collector-contrib/assets/5255616/14ee9a7b-af2f-4df5-a8a5-10cb8830c9b6"> Memory Profile Before <img width="859" alt="image" src="https://github.com/open-telemetry/opentelemetry-collector-contrib/assets/5255616/86604420-754b-43f2-a143-39f60faed9d8"> Memory Profile After <img width="860" alt="image" src="https://github.com/open-telemetry/opentelemetry-collector-contrib/assets/5255616/6752be62-2d73-4f9d-bcea-aa5835cce601"> Additionally, this adds a new benchmark to cover a scenario with many (100) files. Finally, it reduces the time to run the benchmark from ~30s to ~20s.
1 parent c3eb407 commit b8efeeb

File tree

1 file changed

+64
-43
lines changed

1 file changed

+64
-43
lines changed

pkg/stanza/fileconsumer/benchmark_test.go

Lines changed: 64 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,12 @@ package fileconsumer
55

66
import (
77
"context"
8+
"fmt"
89
"os"
910
"path/filepath"
1011
"sync"
1112
"testing"
13+
"time"
1214

1315
"github.com/stretchr/testify/require"
1416

@@ -23,22 +25,6 @@ type fileInputBenchmark struct {
2325
config func() *Config
2426
}
2527

26-
type benchFile struct {
27-
*os.File
28-
log func(int)
29-
}
30-
31-
func simpleTextFile(b *testing.B, file *os.File) *benchFile {
32-
line := string(filetest.TokenWithLength(49)) + "\n"
33-
return &benchFile{
34-
File: file,
35-
log: func(_ int) {
36-
_, err := file.WriteString(line)
37-
require.NoError(b, err)
38-
},
39-
}
40-
}
41-
4228
func BenchmarkFileInput(b *testing.B) {
4329
cases := []fileInputBenchmark{
4430
{
@@ -144,63 +130,98 @@ func BenchmarkFileInput(b *testing.B) {
144130
return cfg
145131
},
146132
},
133+
{
134+
name: "Many",
135+
paths: func() []string {
136+
paths := make([]string, 100)
137+
for i := range paths {
138+
paths[i] = fmt.Sprintf("file%d.log", i)
139+
}
140+
return paths
141+
}(),
142+
config: func() *Config {
143+
cfg := NewConfig()
144+
cfg.Include = []string{"file*.log"}
145+
cfg.MaxConcurrentFiles = 100
146+
return cfg
147+
},
148+
},
149+
}
150+
151+
// Pregenerate some lines which we can write to the files
152+
// to avoid measuring the time it takes to generate them
153+
// and to reduce the amount of syscalls in the benchmark.
154+
uniqueLines := 10
155+
severalLines := ""
156+
for i := 0; i < uniqueLines; i++ {
157+
severalLines += string(filetest.TokenWithLength(999)) + "\n"
147158
}
148159

149160
for _, bench := range cases {
150161
b.Run(bench.name, func(b *testing.B) {
151162
rootDir := b.TempDir()
152163

153-
var files []*benchFile
164+
var files []*os.File
154165
for _, path := range bench.paths {
155-
file := filetest.OpenFile(b, filepath.Join(rootDir, path))
156-
files = append(files, simpleTextFile(b, file))
166+
f := filetest.OpenFile(b, filepath.Join(rootDir, path))
167+
// Initialize the file to ensure a unique fingerprint
168+
_, err := f.WriteString(f.Name() + "\n")
169+
require.NoError(b, err)
170+
// Write half the content before starting the benchmark
171+
for i := 0; i < b.N/2; i++ {
172+
_, err := f.WriteString(severalLines)
173+
require.NoError(b, err)
174+
}
175+
require.NoError(b, f.Sync())
176+
files = append(files, f)
157177
}
158178

159179
cfg := bench.config()
160180
for i, inc := range cfg.Include {
161181
cfg.Include[i] = filepath.Join(rootDir, inc)
162182
}
163183
cfg.StartAt = "beginning"
184+
// Use aggressive poll interval so we're not measuring excess sleep time
185+
cfg.PollInterval = time.Microsecond
164186

165-
received := make(chan []byte)
187+
doneChan := make(chan bool, len(files))
166188
callback := func(_ context.Context, token []byte, _ map[string]any) error {
167-
received <- token
189+
if len(token) == 0 {
190+
doneChan <- true
191+
}
168192
return nil
169193
}
170194
op, err := cfg.Build(testutil.Logger(b), callback)
171195
require.NoError(b, err)
172196

173-
// write half the lines before starting
174-
mid := b.N / 2
175-
for i := 0; i < mid; i++ {
176-
for _, file := range files {
177-
file.log(i)
178-
}
179-
}
180-
181197
b.ResetTimer()
182-
err = op.Start(testutil.NewUnscopedMockPersister())
198+
require.NoError(b, op.Start(testutil.NewUnscopedMockPersister()))
183199
defer func() {
184200
require.NoError(b, op.Stop())
185201
}()
186-
require.NoError(b, err)
187202

188-
// write the remainder of lines while running
189203
var wg sync.WaitGroup
190-
wg.Add(1)
191-
go func() {
192-
for i := mid; i < b.N; i++ {
193-
for _, file := range files {
194-
file.log(i)
204+
for _, file := range files {
205+
wg.Add(1)
206+
go func(f *os.File) {
207+
defer wg.Done()
208+
// Write the other half of the content while running
209+
for i := 0; i < b.N/2; i++ {
210+
_, err := f.WriteString(severalLines)
211+
require.NoError(b, err)
195212
}
196-
}
197-
wg.Done()
198-
}()
199-
wg.Wait()
213+
// Signal end of file
214+
_, err := f.WriteString("\n")
215+
require.NoError(b, err)
216+
require.NoError(b, f.Sync())
217+
}(file)
218+
}
200219

201-
for i := 0; i < b.N*len(files); i++ {
202-
<-received
220+
// Timer continues to run until all files have been read
221+
for dones := 0; dones < len(files); dones++ {
222+
<-doneChan
203223
}
224+
wg.Wait()
204225
})
205226
}
206227
}

0 commit comments

Comments
 (0)