Skip to content

Commit 8e48635

Browse files
authored
Merge pull request opensearch-project#354 from kakkoyun/keep_track_of_stack_unwinding
profiler: Keep track of stack unwinding failures
2 parents 64b003e + 14fc8ba commit 8e48635

File tree

3 files changed

+227
-58
lines changed

3 files changed

+227
-58
lines changed

parca-agent.bpf.c

+29-7
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
#define KBUILD_MODNAME "parca-agent"
1818

1919
#undef container_of
20-
//#include "bpf_core_read.h"
2120
#include <bpf_core_read.h>
2221
#include <bpf_endian.h>
2322
#include <bpf_helpers.h>
@@ -37,6 +36,8 @@ volatile const char bpf_metadata_name[] SEC(".rodata") =
3736
// Max depth of each stack trace to track
3837
#define MAX_STACK_DEPTH 127
3938

39+
/*================================ eBPF MAPS =================================*/
40+
4041
#define BPF_MAP(_name, _type, _key_type, _value_type, _max_entries) \
4142
struct { \
4243
__uint(type, _type); \
@@ -87,6 +88,8 @@ bpf_map_lookup_or_try_init(void *map, const void *key, const void *init) {
8788
return bpf_map_lookup_elem(map, key);
8889
}
8990

91+
/*================================= HOOKS ==================================*/
92+
9093
// This code gets a bit complex. Probably not suitable for casual hacking.
9194
SEC("perf_event")
9295
int do_sample(struct bpf_perf_event_data *ctx) {
@@ -98,11 +101,31 @@ int do_sample(struct bpf_perf_event_data *ctx) {
98101
return 0;
99102

100103
// create map key
101-
stack_count_key_t key = {.pid = tgid};
102-
103-
// get stacks
104-
key.user_stack_id = bpf_get_stackid(ctx, &stack_traces, BPF_F_USER_STACK);
105-
key.kernel_stack_id = bpf_get_stackid(ctx, &stack_traces, 0);
104+
stack_count_key_t key = {
105+
.pid = tgid,
106+
.user_stack_id = 0,
107+
.kernel_stack_id = 0,
108+
};
109+
110+
// get user stack id
111+
int stack_id = bpf_get_stackid(ctx, &stack_traces, BPF_F_USER_STACK);
112+
if (stack_id >= 0)
113+
key.user_stack_id = stack_id;
114+
115+
// get kernel stack id
116+
int kernel_stack_id = bpf_get_stackid(ctx, &stack_traces, 0);
117+
if (kernel_stack_id >= 0)
118+
key.kernel_stack_id = kernel_stack_id;
119+
120+
// TODO(kakkoyun): failed bpf_get_stackid() could indicate stack unwinding
121+
// issues; this could be a useful place to hook eh_frame-based stack
122+
// unwinding.
123+
// TODO(kakkoyun): Does returned error code help?
124+
// if (key.user_stack_id == 0 && key.kernel_stack_id == 0)
125+
// Both user and kernel stacks are empty.
126+
// However, for now, we still want to count the event, to keep track of the
127+
// number of the failed stack unwinding attempts.
128+
// return 0;
106129

107130
u64 zero = 0;
108131
u64 *count;
@@ -111,7 +134,6 @@ int do_sample(struct bpf_perf_event_data *ctx) {
111134
return 0;
112135

113136
__sync_fetch_and_add(count, 1);
114-
115137
return 0;
116138
}
117139

pkg/profiler/profiler.go

+139-51
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,8 @@ import (
5656
//go:embed parca-agent.bpf.o
5757
var bpfObj []byte
5858

59+
var errUnrecoverable = errors.New("unrecoverable error")
60+
5961
const (
6062
stackDepth = 127 // Always needs to be sync with MAX_STACK_DEPTH in parca-agent.bpf.c
6163
doubleStackDepth = 254
@@ -118,9 +120,51 @@ func (m bpfMaps) clean() error {
118120
return nil
119121
}
120122

123+
type metrics struct {
124+
reg prometheus.Registerer
125+
126+
missingStacks *prometheus.CounterVec
127+
missingPIDs prometheus.Counter
128+
failedStackUnwindingAttempts *prometheus.CounterVec
129+
}
130+
131+
func (m metrics) unregister() bool {
132+
return m.reg.Unregister(m.missingStacks) &&
133+
m.reg.Unregister(m.missingPIDs) &&
134+
m.reg.Unregister(m.failedStackUnwindingAttempts)
135+
}
136+
137+
func newMetrics(reg prometheus.Registerer, target model.LabelSet) *metrics {
138+
return &metrics{
139+
reg: reg,
140+
missingStacks: promauto.With(reg).NewCounterVec(
141+
prometheus.CounterOpts{
142+
Name: "parca_agent_profiler_missing_stacks_total",
143+
Help: "Number of missing profile stacks",
144+
ConstLabels: map[string]string{"target": target.String()},
145+
},
146+
[]string{"type"},
147+
),
148+
missingPIDs: promauto.With(reg).NewCounter(
149+
prometheus.CounterOpts{
150+
Name: "parca_agent_profiler_missing_pid_total",
151+
Help: "Number of missing PIDs",
152+
ConstLabels: map[string]string{"target": target.String()},
153+
},
154+
),
155+
failedStackUnwindingAttempts: promauto.With(reg).NewCounterVec(
156+
prometheus.CounterOpts{
157+
Name: "parca_agent_profiler_failed_stack_unwinding_attempts_total",
158+
Help: "Number of failed stack unwinding attempts",
159+
ConstLabels: map[string]string{"target": target.String()},
160+
},
161+
[]string{"type"},
162+
),
163+
}
164+
}
165+
121166
type CgroupProfiler struct {
122167
logger log.Logger
123-
reg prometheus.Registerer
124168

125169
mtx *sync.RWMutex
126170
cancel func()
@@ -130,9 +174,9 @@ type CgroupProfiler struct {
130174
ksymCache *ksym.Cache
131175
objCache objectfile.Cache
132176

133-
bpfMaps *bpfMaps
177+
bpfMaps *bpfMaps
178+
byteOrder binary.ByteOrder
134179

135-
missingStacks *prometheus.CounterVec
136180
lastError error
137181
lastProfileTakenAt time.Time
138182

@@ -143,6 +187,8 @@ type CgroupProfiler struct {
143187
profilingDuration time.Duration
144188

145189
profileBufferPool sync.Pool
190+
191+
metrics *metrics
146192
}
147193

148194
func NewCgroupProfiler(
@@ -158,7 +204,6 @@ func NewCgroupProfiler(
158204
) *CgroupProfiler {
159205
return &CgroupProfiler{
160206
logger: log.With(logger, "labels", target.String()),
161-
reg: reg,
162207
mtx: &sync.RWMutex{},
163208
target: target,
164209
profilingDuration: profilingDuration,
@@ -172,19 +217,13 @@ func NewCgroupProfiler(
172217
debugInfoClient,
173218
tmp,
174219
),
175-
missingStacks: promauto.With(reg).NewCounterVec(
176-
prometheus.CounterOpts{
177-
Name: "parca_agent_profiler_missing_stacks_total",
178-
Help: "Number of missing profile stacks",
179-
ConstLabels: map[string]string{"target": target.String()},
180-
},
181-
[]string{"type"},
182-
),
183220
profileBufferPool: sync.Pool{
184221
New: func() interface{} {
185222
return bytes.NewBuffer(nil)
186223
},
187224
},
225+
byteOrder: byteorder.GetHostByteOrder(),
226+
metrics: newMetrics(reg, target),
188227
}
189228
}
190229

@@ -212,8 +251,8 @@ func (p *CgroupProfiler) Stop() {
212251
p.mtx.Lock()
213252
defer p.mtx.Unlock()
214253
level.Debug(p.logger).Log("msg", "stopping cgroup profiler")
215-
if !p.reg.Unregister(p.missingStacks) {
216-
level.Debug(p.logger).Log("msg", "cannot unregister metric")
254+
if !p.metrics.unregister() {
255+
level.Debug(p.logger).Log("msg", "cannot unregister metrics")
217256
}
218257
if p.cancel != nil {
219258
p.cancel()
@@ -366,64 +405,55 @@ func (p *CgroupProfiler) profileLoop(ctx context.Context, captureTime time.Time)
366405
samples := map[[doubleStackDepth]uint64]*profile.Sample{}
367406

368407
it := p.bpfMaps.counts.Iterator()
369-
byteOrder := byteorder.GetHostByteOrder()
370408

371409
// TODO(brancz): Use libbpf batch functions.
372410
for it.Next() {
373411
// This byte slice is only valid for this iteration, so it must be
374412
// copied if we want to do anything with it outside of this loop.
375413
keyBytes := it.Key()
376-
377414
r := bytes.NewBuffer(keyBytes)
378415

379416
pidBytes := make([]byte, 4)
380417
if _, err := io.ReadFull(r, pidBytes); err != nil {
381418
return fmt.Errorf("read pid bytes: %w", err)
382419
}
383-
pid := byteOrder.Uint32(pidBytes)
384-
385-
userStackIDBytes := make([]byte, 4)
386-
if _, err := io.ReadFull(r, userStackIDBytes); err != nil {
387-
return fmt.Errorf("read user stack ID bytes: %w", err)
420+
pid := p.byteOrder.Uint32(pidBytes)
421+
if pid == 0 {
422+
level.Debug(p.logger).Log("msg", "missing pid")
423+
p.metrics.missingPIDs.Inc()
424+
continue
388425
}
389-
userStackID := int32(byteOrder.Uint32(userStackIDBytes))
390426

391-
kernelStackIDBytes := make([]byte, 4)
392-
if _, err := io.ReadFull(r, kernelStackIDBytes); err != nil {
393-
return fmt.Errorf("read kernel stack ID bytes: %w", err)
427+
// Twice the stack depth because we have a user and a potential Kernel stack.
428+
// Read order matters, since we read from the key buffer.
429+
stack := [doubleStackDepth]uint64{}
430+
userErr := p.readUserStack(r, &stack)
431+
if userErr != nil {
432+
if errors.Is(userErr, errUnrecoverable) {
433+
return userErr
434+
}
435+
level.Debug(p.logger).Log("msg", "failed to read user stack", "err", userErr)
394436
}
395-
kernelStackID := int32(byteOrder.Uint32(kernelStackIDBytes))
396-
397-
valueBytes, err := p.bpfMaps.counts.GetValue(unsafe.Pointer(&keyBytes[0]))
398-
if err != nil {
399-
return fmt.Errorf("get count value: %w", err)
437+
kernelErr := p.readKernelStack(r, &stack)
438+
if kernelErr != nil {
439+
if errors.Is(kernelErr, errUnrecoverable) {
440+
return kernelErr
441+
}
442+
level.Debug(p.logger).Log("msg", "failed to read kernel stack", "err", kernelErr)
400443
}
401-
value := byteOrder.Uint64(valueBytes)
402-
403-
stackBytes, err := p.bpfMaps.stackTraces.GetValue(unsafe.Pointer(&userStackID))
404-
if err != nil {
405-
p.missingStacks.WithLabelValues("user").Inc()
444+
if userErr != nil && kernelErr != nil {
445+
// Both stacks are missing. Nothing to do.
406446
continue
407447
}
408448

409-
// Twice the stack depth because we have a user and a potential Kernel stack.
410-
stack := [doubleStackDepth]uint64{}
411-
err = binary.Read(bytes.NewBuffer(stackBytes), byteOrder, stack[:stackDepth])
449+
value, err := p.readValue(keyBytes)
412450
if err != nil {
413-
return fmt.Errorf("read user stack trace: %w", err)
451+
return fmt.Errorf("read value: %w", err)
414452
}
415-
416-
if kernelStackID >= 0 {
417-
stackBytes, err = p.bpfMaps.stackTraces.GetValue(unsafe.Pointer(&kernelStackID))
418-
if err != nil {
419-
p.missingStacks.WithLabelValues("kernel").Inc()
420-
continue
421-
}
422-
423-
err = binary.Read(bytes.NewBuffer(stackBytes), byteOrder, stack[stackDepth:])
424-
if err != nil {
425-
return fmt.Errorf("read kernel stack trace: %w", err)
426-
}
453+
if value == 0 {
454+
// This should never happen, but it's here just in case.
455+
// If we have a zero value, we don't want to add it to the profile.
456+
continue
427457
}
428458

429459
sample, ok := samples[stack]
@@ -587,6 +617,64 @@ func (p *CgroupProfiler) profileLoop(ctx context.Context, captureTime time.Time)
587617
return nil
588618
}
589619

620+
func (p *CgroupProfiler) readUserStack(r *bytes.Buffer, stack *[254]uint64) error {
621+
userStackIDBytes := make([]byte, 4)
622+
if _, err := io.ReadFull(r, userStackIDBytes); err != nil {
623+
return fmt.Errorf("read user stack bytes, %s: %w", err, errUnrecoverable)
624+
}
625+
626+
userStackID := int32(p.byteOrder.Uint32(userStackIDBytes))
627+
if userStackID == 0 {
628+
p.metrics.failedStackUnwindingAttempts.WithLabelValues("user").Inc()
629+
return errors.New("user stack ID is 0, probably stack unwinding failed")
630+
}
631+
632+
stackBytes, err := p.bpfMaps.stackTraces.GetValue(unsafe.Pointer(&userStackID))
633+
if err != nil {
634+
p.metrics.missingStacks.WithLabelValues("user").Inc()
635+
return fmt.Errorf("read user stack trace: %w", err)
636+
}
637+
638+
if err := binary.Read(bytes.NewBuffer(stackBytes), p.byteOrder, stack[:stackDepth]); err != nil {
639+
return fmt.Errorf("read user stack bytes, %s: %w", err, errUnrecoverable)
640+
}
641+
642+
return nil
643+
}
644+
645+
func (p *CgroupProfiler) readKernelStack(r *bytes.Buffer, stack *[254]uint64) error {
646+
kernelStackIDBytes := make([]byte, 4)
647+
if _, err := io.ReadFull(r, kernelStackIDBytes); err != nil {
648+
return fmt.Errorf("read kernel stack bytes, %s: %w", err, errUnrecoverable)
649+
}
650+
651+
kernelStackID := int32(p.byteOrder.Uint32(kernelStackIDBytes))
652+
if kernelStackID == 0 {
653+
p.metrics.failedStackUnwindingAttempts.WithLabelValues("kernel").Inc()
654+
return errors.New("kernel stack ID is 0, probably stack unwinding failed")
655+
}
656+
657+
stackBytes, err := p.bpfMaps.stackTraces.GetValue(unsafe.Pointer(&kernelStackID))
658+
if err != nil {
659+
p.metrics.missingStacks.WithLabelValues("kernel").Inc()
660+
return fmt.Errorf("read kernel stack trace: %w", err)
661+
}
662+
663+
if err := binary.Read(bytes.NewBuffer(stackBytes), p.byteOrder, stack[stackDepth:]); err != nil {
664+
return fmt.Errorf("read kernel stack bytes, %s: %w", err, errUnrecoverable)
665+
}
666+
667+
return nil
668+
}
669+
670+
func (p *CgroupProfiler) readValue(keyBytes []byte) (uint64, error) {
671+
valueBytes, err := p.bpfMaps.counts.GetValue(unsafe.Pointer(&keyBytes[0]))
672+
if err != nil {
673+
return 0, fmt.Errorf("get count value: %w", err)
674+
}
675+
return p.byteOrder.Uint64(valueBytes), nil
676+
}
677+
590678
func (p *CgroupProfiler) normalizeAddress(m *profile.Mapping, pid uint32, addr uint64) uint64 {
591679
if m == nil {
592680
return addr

0 commit comments

Comments
 (0)