Skip to content

Commit e6959a4

Browse files
fix: de-duplicate php package data (#871)
adds a filter for package data already sent during the current connection instance to prevent sending duplicate data between harvests.
1 parent 8f0ae2c commit e6959a4

File tree

6 files changed

+161
-8
lines changed

6 files changed

+161
-8
lines changed

daemon/internal/newrelic/app.go

+81-4
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,7 @@ type App struct {
146146
HarvestTrigger HarvestTriggerFunc
147147
LastActivity time.Time
148148
Rules MetricRules
149+
PhpPackages map[PhpPackagesKey]struct{}
149150
}
150151

151152
func (app *App) String() string {
@@ -180,6 +181,7 @@ func NewApp(info *AppInfo) *App {
180181
info: info,
181182
HarvestTrigger: nil,
182183
LastActivity: now,
184+
PhpPackages: make(map[PhpPackagesKey]struct{}),
183185
}
184186
}
185187

@@ -303,10 +305,10 @@ func (app *App) NeedsConnectAttempt(now time.Time, backoff time.Duration) bool {
303305
return false
304306
}
305307

306-
//Since span events are not included in Faster Event Harvest due to concerns
307-
//about downsampling within a distributed trace, the report period and harvest
308-
//limit are reported separately in span_event_harvest_config instead of
309-
//event_harvest_config. Combine them both into EventHarvestConfig here.
308+
// Since span events are not included in Faster Event Harvest due to concerns
309+
// about downsampling within a distributed trace, the report period and harvest
310+
// limit are reported separately in span_event_harvest_config instead of
311+
// event_harvest_config. Combine them both into EventHarvestConfig here.
310312
func combineEventConfig(ehc collector.EventHarvestConfig, sehc collector.SpanEventHarvestConfig) collector.EventHarvestConfig {
311313
ehc.EventConfigs.SpanEventConfig.Limit = sehc.SpanEventConfig.Limit
312314
ehc.EventConfigs.SpanEventConfig.ReportPeriod = sehc.SpanEventConfig.ReportPeriod
@@ -338,3 +340,78 @@ func (app *App) Inactive(threshold time.Duration) bool {
338340
}
339341
return time.Since(app.LastActivity) > threshold
340342
}
343+
344+
// filter seen php packages data to avoid sending duplicates
345+
//
346+
// the `App` structure contains a map of PHP Packages the reporting
347+
// application has encountered.
348+
//
349+
// the map of packages should persist for the duration of the
350+
// current connection
351+
//
352+
// takes the `PhpPackages.data` byte array as input and unmarshals
353+
// into an anonymous interface array
354+
//
355+
// the JSON format received from the agent is:
356+
//
357+
// [["package_name","version",{}],...]
358+
//
359+
// for each entry, assign the package name and version to the `PhpPackagesKey`
360+
// struct and use the key to verify data does not exist in the map. If the
361+
// key does not exist, add it to the map and the array of 'new' packages.
362+
//
363+
// convert the array of 'new' packages into a byte array representing
364+
// the expected data that should match input, minus the duplicates.
365+
func (app *App) filterPhpPackages(data []byte) []byte {
366+
if data == nil {
367+
return nil
368+
}
369+
370+
var pkgKey PhpPackagesKey
371+
var newPkgs []PhpPackagesKey
372+
var x []interface{}
373+
374+
err := json.Unmarshal(data, &x)
375+
if nil != err {
376+
log.Errorf("failed to unmarshal php package json: %s", err)
377+
return nil
378+
}
379+
380+
for _, pkgJson := range x {
381+
pkg, _ := pkgJson.([]interface{})
382+
if len(pkg) != 3 {
383+
log.Errorf("invalid php package json structure: %+v", pkg)
384+
return nil
385+
}
386+
name, ok := pkg[0].(string)
387+
version, ok := pkg[1].(string)
388+
pkgKey = PhpPackagesKey{name, version}
389+
_, ok = app.PhpPackages[pkgKey]
390+
if !ok {
391+
app.PhpPackages[pkgKey] = struct{}{}
392+
newPkgs = append(newPkgs, pkgKey)
393+
}
394+
}
395+
396+
if newPkgs == nil {
397+
return nil
398+
}
399+
400+
buf := &bytes.Buffer{}
401+
buf.WriteString(`[`)
402+
for _, pkg := range newPkgs {
403+
buf.WriteString(`["`)
404+
buf.WriteString(pkg.Name)
405+
buf.WriteString(`","`)
406+
buf.WriteString(pkg.Version)
407+
buf.WriteString(`",{}],`)
408+
}
409+
410+
resJson := buf.Bytes()
411+
412+
// swap last ',' character with ']'
413+
resJson = resJson[:len(resJson)-1]
414+
resJson = append(resJson, ']')
415+
416+
return resJson
417+
}

daemon/internal/newrelic/app_test.go

+51
Original file line numberDiff line numberDiff line change
@@ -613,3 +613,54 @@ func TestMaxPayloadSizeInBytesFromConnectReply(t *testing.T) {
613613
t.Errorf("parseConnectReply(something), got [%v], expected [%v]", c.MaxPayloadSizeInBytes, expectedMaxPayloadSizeInBytes)
614614
}
615615
}
616+
617+
func TestFilterPhpPackages(t *testing.T) {
618+
app := App{
619+
PhpPackages: make(map[PhpPackagesKey]struct{}),
620+
}
621+
var nilData []byte = nil
622+
emptyData := []byte(`[[{}]]`)
623+
validData := []byte(`[["drupal","6.0",{}]]`)
624+
moreValidData := []byte(`[["wordpress","7.0",{}],["symfony","5.1",{}]]`)
625+
duplicateData := []byte(`[["drupal","6.0",{}]]`)
626+
versionData := []byte(`[["drupal","9.0",{}]]`)
627+
invalidData := []byte(`[[["1","2","3"],["4","5"]{}]]`)
628+
629+
filteredData := app.filterPhpPackages(nilData)
630+
if filteredData != nil {
631+
t.Errorf("expected 'nil' result on 'nil' input, got [%v]", filteredData)
632+
}
633+
634+
filteredData = app.filterPhpPackages(emptyData)
635+
if filteredData != nil {
636+
t.Errorf("expected 'nil' result on empty data input, got [%v]", filteredData)
637+
}
638+
639+
expect := []byte(`[["drupal","6.0",{}]]`)
640+
filteredData = app.filterPhpPackages(validData)
641+
if string(filteredData) != string(expect) {
642+
t.Errorf("expected [%v], got [%v]", string(expect), string(filteredData))
643+
}
644+
645+
expect = []byte(`[["wordpress","7.0",{}],["symfony","5.1",{}]]`)
646+
filteredData = app.filterPhpPackages(moreValidData)
647+
if string(filteredData) != string(expect) {
648+
t.Errorf("expected [%v], got [%v]", string(expect), string(filteredData))
649+
}
650+
651+
filteredData = app.filterPhpPackages(duplicateData)
652+
if filteredData != nil {
653+
t.Errorf("expected 'nil', got [%v]", filteredData)
654+
}
655+
656+
expect = []byte(`[["drupal","9.0",{}]]`)
657+
filteredData = app.filterPhpPackages(versionData)
658+
if string(filteredData) != string(expect) {
659+
t.Errorf("expected [%v], got [%v]", string(expect), string(filteredData))
660+
}
661+
662+
filteredData = app.filterPhpPackages(invalidData)
663+
if filteredData != nil {
664+
t.Errorf("expected 'nil', go [%v]", filteredData)
665+
}
666+
}

daemon/internal/newrelic/harvest_test.go

+15
Original file line numberDiff line numberDiff line change
@@ -234,4 +234,19 @@ func TestHarvestEmpty(t *testing.T) {
234234
if h.empty() {
235235
t.Errorf("Harvest.empty() = true, want false")
236236
}
237+
238+
// verify that php packages does not send harvest when data is nil
239+
h = NewHarvest(startTime, collector.NewHarvestLimits(nil))
240+
h.PhpPackages.AddPhpPackagesFromData(nil)
241+
if !h.empty() {
242+
t.Errorf("Harvest.empty = false, want true")
243+
}
244+
245+
// verify that valid php package data sends a harvest
246+
h = NewHarvest(startTime, collector.NewHarvestLimits(nil))
247+
h.PhpPackages.AddPhpPackagesFromData([]byte(`[["testpackage","testversion",{}]]`))
248+
if h.empty() {
249+
t.Errorf("Harvest.empty = true, want false")
250+
}
251+
237252
}

daemon/internal/newrelic/php_packages.go

+5
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,11 @@ import (
1313
"github.com/newrelic/newrelic-php-agent/daemon/internal/newrelic/log"
1414
)
1515

16+
type PhpPackagesKey struct {
17+
Name string
18+
Version string
19+
}
20+
1621
// phpPackages represents all detected packages reported by an agent.
1722
type PhpPackages struct {
1823
numSeen int

daemon/internal/newrelic/processor.go

+3
Original file line numberDiff line numberDiff line change
@@ -673,6 +673,8 @@ func harvestByType(ah *AppHarvest, args *harvestArgs, ht HarvestType, du_chan ch
673673
// In such cases, harvest all types and return.
674674
if ht&HarvestAll == HarvestAll {
675675
ah.Harvest = NewHarvest(time.Now(), ah.App.connectReply.EventHarvestConfig.EventConfigs)
676+
// filter already seen php packages
677+
harvest.PhpPackages.data = ah.App.filterPhpPackages(harvest.PhpPackages.data)
676678
if args.blocking {
677679
// Invoked primarily by CleanExit
678680
harvestAll(harvest, args, ah.connectReply.EventHarvestConfig, ah.TraceObserver, du_chan)
@@ -698,6 +700,7 @@ func harvestByType(ah *AppHarvest, args *harvestArgs, ht HarvestType, du_chan ch
698700
slowSQLs := harvest.SlowSQLs
699701
txnTraces := harvest.TxnTraces
700702
phpPackages := harvest.PhpPackages
703+
phpPackages.data = ah.App.filterPhpPackages(phpPackages.data)
701704

702705
harvest.Metrics = NewMetricTable(limits.MaxMetrics, time.Now())
703706
harvest.Errors = NewErrorHeap(limits.MaxErrors)

daemon/internal/newrelic/processor_test.go

+6-4
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ var (
6464
sampleSpanEvent = []byte("belated birthday")
6565
sampleLogEvent = []byte("log event test birthday")
6666
sampleErrorEvent = []byte("forgotten birthday")
67-
samplePhpPackages = []byte(`["package", "1.2.3",{}]`)
67+
samplePhpPackages = []byte(`[["package","1.2.3",{}]]`)
6868
)
6969

7070
type ClientReturn struct {
@@ -297,9 +297,11 @@ func TestProcessorHarvestDefaultDataPhpPackages(t *testing.T) {
297297
// collect php packages
298298
m.clientReturn <- ClientReturn{nil, nil, 202}
299299
cp_pkgs := <-m.clientParams
300+
300301
// collect metrics
301302
m.clientReturn <- ClientReturn{nil, nil, 202}
302303
cp_metrics := <-m.clientParams
304+
303305
// collect usage metrics
304306
m.clientReturn <- ClientReturn{nil, nil, 202}
305307
cp_usage := <-m.clientParams
@@ -308,7 +310,7 @@ func TestProcessorHarvestDefaultDataPhpPackages(t *testing.T) {
308310

309311
// check pkgs and metric data - it appears these can
310312
// come in different orders so check both
311-
toTestPkgs := `["Jars",["package", "1.2.3",{}]]`
313+
toTestPkgs := `["Jars",[["package","1.2.3",{}]]]`
312314
if toTestPkgs != string(cp_pkgs.data) {
313315
if toTestPkgs != string(cp_metrics.data) {
314316
t.Fatalf("packages data: expected '%s', got '%s'", toTestPkgs, string(cp_pkgs.data))
@@ -318,9 +320,9 @@ func TestProcessorHarvestDefaultDataPhpPackages(t *testing.T) {
318320
time1 := strings.Split(string(cp_usage.data), ",")[1]
319321
time2 := strings.Split(string(cp_usage.data), ",")[2]
320322
usageMetrics := `["one",` + time1 + `,` + time2 + `,` +
321-
`[[{"name":"Supportability/C/Collector/Output/Bytes"},[2,1285,0,0,0,0]],` +
323+
`[[{"name":"Supportability/C/Collector/Output/Bytes"},[2,1286,0,0,0,0]],` +
322324
`[{"name":"Supportability/C/Collector/metric_data/Output/Bytes"},[1,1253,0,0,0,0]],` +
323-
`[{"name":"Supportability/C/Collector/update_loaded_modules/Output/Bytes"},[1,32,0,0,0,0]]]]`
325+
`[{"name":"Supportability/C/Collector/update_loaded_modules/Output/Bytes"},[1,33,0,0,0,0]]]]`
324326
if got, _ := OrderScrubMetrics(cp_usage.data, nil); string(got) != usageMetrics {
325327
t.Fatalf("metrics data: expected '%s', got '%s'", string(usageMetrics), string(got))
326328
}

0 commit comments

Comments
 (0)