Skip to content

Commit 7993c9d

Browse files
authored
fix(servicegraph): make virtual node tests reliable on Windows (#37550)
<!--Ex. Fixing a bug - Describe the bug and how this fixes the issue. Ex. Adding a feature - Explain what this achieves.--> #### Description The virtual node tests (TestVirtualNodeServerLabels and TestVirtualNodeClientLabels) were failing intermittently on Windows due to timing issues. This change: - Reduces MetricsFlushInterval and StoreExpirationLoop to 10ms - Adds a timeout-based polling mechanism to wait for metrics generation - Replaces unreliable sleep with proper metric availability checks - Improves error messaging for test failures <!-- Issue number (e.g. #1234) or full URL to issue, if applicable. --> #### Link to tracking issue Fixes #33679 <!--Describe what testing was performed and which tests were added.--> #### Testing Updates tests <!--Describe the documentation added.--> #### Documentation <!--Please delete paragraphs that you did not use before submitting.-->
1 parent 2605e8c commit 7993c9d

File tree

1 file changed

+18
-20
lines changed

1 file changed

+18
-20
lines changed

connector/servicegraphconnector/connector_test.go

Lines changed: 18 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -637,18 +637,15 @@ func TestExtraDimensionsLabels(t *testing.T) {
637637
}
638638

639639
func TestVirtualNodeServerLabels(t *testing.T) {
640-
if runtime.GOOS == "windows" {
641-
t.Skip("skipping test on Windows, see https://github.com/open-telemetry/opentelemetry-collector-contrib/issues/33836")
642-
}
643-
644640
virtualNodeDimensions := []string{"peer.service", "db.system", "messaging.system"}
645641
cfg := &Config{
646642
Dimensions: virtualNodeDimensions,
647643
LatencyHistogramBuckets: []time.Duration{time.Duration(0.1 * float64(time.Second)), time.Duration(1 * float64(time.Second)), time.Duration(10 * float64(time.Second))},
648644
Store: StoreConfig{MaxItems: 10},
649645
VirtualNodePeerAttributes: virtualNodeDimensions,
650646
VirtualNodeExtraLabel: true,
651-
MetricsFlushInterval: time.Millisecond,
647+
// Reduce flush interval for faster test execution
648+
MetricsFlushInterval: 10 * time.Millisecond,
652649
}
653650

654651
set := componenttest.NewNopTelemetrySettings()
@@ -666,13 +663,15 @@ func TestVirtualNodeServerLabels(t *testing.T) {
666663
assert.NoError(t, conn.ConsumeTraces(context.Background(), td))
667664

668665
conn.store.Expire()
666+
// Wait for metrics to be generated with timeout
667+
var metrics []pmetric.Metrics
669668
assert.Eventually(t, func() bool {
670-
return conn.store.Len() == 0
671-
}, 100*time.Millisecond, 2*time.Millisecond)
672-
require.NoError(t, conn.Shutdown(context.Background()))
669+
metrics = conn.metricsConsumer.(*mockMetricsExporter).GetMetrics()
670+
return len(metrics) > 0
671+
}, 5*time.Second, 10*time.Millisecond)
673672

674-
metrics := conn.metricsConsumer.(*mockMetricsExporter).GetMetrics()
675-
require.GreaterOrEqual(t, len(metrics), 1) // Unreliable sleep-based check
673+
require.NotEmpty(t, metrics, "no metrics generated within timeout")
674+
require.NoError(t, conn.Shutdown(context.Background()))
676675

677676
expectedMetrics, err := golden.ReadMetrics(expected)
678677
assert.NoError(t, err)
@@ -685,18 +684,15 @@ func TestVirtualNodeServerLabels(t *testing.T) {
685684
}
686685

687686
func TestVirtualNodeClientLabels(t *testing.T) {
688-
if runtime.GOOS == "windows" {
689-
t.Skip("skipping test on Windows, see https://github.com/open-telemetry/opentelemetry-collector-contrib/issues/33836")
690-
}
691-
692687
virtualNodeDimensions := []string{"peer.service", "db.system", "messaging.system"}
693688
cfg := &Config{
694689
Dimensions: virtualNodeDimensions,
695690
LatencyHistogramBuckets: []time.Duration{time.Duration(0.1 * float64(time.Second)), time.Duration(1 * float64(time.Second)), time.Duration(10 * float64(time.Second))},
696691
Store: StoreConfig{MaxItems: 10},
697692
VirtualNodePeerAttributes: virtualNodeDimensions,
698693
VirtualNodeExtraLabel: true,
699-
MetricsFlushInterval: time.Millisecond,
694+
// Reduce flush interval for faster test execution
695+
MetricsFlushInterval: 1 * time.Millisecond,
700696
}
701697

702698
set := componenttest.NewNopTelemetrySettings()
@@ -714,13 +710,15 @@ func TestVirtualNodeClientLabels(t *testing.T) {
714710
assert.NoError(t, conn.ConsumeTraces(context.Background(), td))
715711

716712
conn.store.Expire()
713+
// Wait for metrics to be generated with timeout
714+
var metrics []pmetric.Metrics
717715
assert.Eventually(t, func() bool {
718-
return conn.store.Len() == 0
719-
}, 100*time.Millisecond, 2*time.Millisecond)
720-
require.NoError(t, conn.Shutdown(context.Background()))
716+
metrics = conn.metricsConsumer.(*mockMetricsExporter).GetMetrics()
717+
return len(metrics) > 0
718+
}, 5*time.Second, 10*time.Millisecond)
721719

722-
metrics := conn.metricsConsumer.(*mockMetricsExporter).GetMetrics()
723-
require.GreaterOrEqual(t, len(metrics), 1) // Unreliable sleep-based check
720+
require.NotEmpty(t, metrics, "no metrics generated within timeout")
721+
require.NoError(t, conn.Shutdown(context.Background()))
724722

725723
expectedMetrics, err := golden.ReadMetrics(expected)
726724
assert.NoError(t, err)

0 commit comments

Comments
 (0)