-
Notifications
You must be signed in to change notification settings - Fork 74
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Using new otlpfile pipeline for self metrics #1922
Changes from 5 commits
843a16f
3c459b2
7a90e76
6b5aedb
cf393d6
9ccd96c
4cf50d9
ed7ee16
3f788ef
730e968
0fff72d
edfeac6
238c4a6
88e0c05
2b0476b
5657c66
5efb332
e440609
7cae18f
3365aac
77c1cd2
762a64e
1ae2cbc
3adc7d9
f09c2a9
07c68bc
71d134e
f94eff6
256bad3
a26f1ec
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -24,20 +24,19 @@ import ( | |
"syscall" | ||
|
||
"github.com/GoogleCloudPlatform/ops-agent/cmd/google_cloud_ops_agent_diagnostics/utils" | ||
"github.com/GoogleCloudPlatform/ops-agent/internal/self_metrics" | ||
) | ||
|
||
var ( | ||
config = flag.String("config", "/etc/google-cloud-ops-agent/config.yaml", "path to the user specified agent config") | ||
) | ||
|
||
func run(ctx context.Context) error { | ||
userUc, mergedUc, err := utils.GetUserAndMergedConfigs(ctx, *config) | ||
_, _, err := utils.GetUserAndMergedConfigs(ctx, *config) | ||
if err != nil { | ||
return err | ||
} | ||
|
||
ctx, cancel := context.WithCancel(ctx) | ||
_, cancel := context.WithCancel(ctx) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we need this anymore then? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This all will go away on the next PR that removes the diagnostics binary. I'm trying to change the least i can and maintain functionality to make it easier to review. |
||
defer cancel() | ||
|
||
go func() { | ||
|
@@ -54,10 +53,5 @@ func run(ctx context.Context) error { | |
} | ||
}() | ||
|
||
err = self_metrics.CollectOpsAgentSelfMetrics(ctx, userUc, mergedUc) | ||
if err != nil { | ||
return err | ||
} | ||
|
||
return nil | ||
} |
Original file line number | Diff line number | Diff line change | ||
---|---|---|---|---|
|
@@ -185,13 +185,6 @@ func runSubagents(ctx context.Context, cancel context.CancelFunc, pluginInstallD | |||
}) | ||||
|
||||
var wg sync.WaitGroup | ||||
// Starting the diagnostics service | ||||
runDiagnosticsCmd := exec.CommandContext(ctx, | ||||
path.Join(pluginInstallDirectory, DiagnosticsBinary), | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please also remove the
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please fix There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done |
||||
"-config", OpsAgentConfigLocationLinux, | ||||
) | ||||
wg.Add(1) | ||||
go runSubAgentCommand(ctx, cancel, runDiagnosticsCmd, runCommand, &wg) | ||||
|
||||
// Starting Otel | ||||
runOtelCmd := exec.CommandContext(ctx, | ||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -29,13 +29,10 @@ import ( | |
"unsafe" | ||
|
||
"github.com/GoogleCloudPlatform/ops-agent/apps" | ||
"github.com/GoogleCloudPlatform/ops-agent/cmd/google_cloud_ops_agent_diagnostics/utils" | ||
"github.com/GoogleCloudPlatform/ops-agent/confgenerator" | ||
"github.com/GoogleCloudPlatform/ops-agent/internal/healthchecks" | ||
"github.com/GoogleCloudPlatform/ops-agent/internal/logs" | ||
"github.com/GoogleCloudPlatform/ops-agent/internal/self_metrics" | ||
"github.com/kardianos/osext" | ||
"go.opentelemetry.io/otel" | ||
"golang.org/x/sys/windows" | ||
"golang.org/x/sys/windows/svc/debug" | ||
"golang.org/x/sys/windows/svc/eventlog" | ||
|
@@ -160,8 +157,7 @@ func (ps *OpsAgentPluginServer) Start(ctx context.Context, msg *pb.StartRequest) | |
windowsEventLogger.Close() | ||
} | ||
|
||
otelErrorHandler := &otelErrorHandler{windowsEventLogger: windowsEventLogger, windowsEventId: OpsAgentUAPPluginEventID} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you also remove the definition of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please fix. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done |
||
go runSubagents(pContext, cancelFunc, pluginInstallDir, pluginStateDir, runSubAgentCommand, ps.runCommand, otelErrorHandler) | ||
go runSubagents(pContext, cancelFunc, pluginInstallDir, pluginStateDir, runSubAgentCommand, ps.runCommand) | ||
|
||
return &pb.StartResponse{}, nil | ||
} | ||
|
@@ -302,11 +298,6 @@ func generateSubAgentConfigs(ctx context.Context, userConfigPath string, pluginS | |
windowsEventLogger.Info(OpsAgentUAPPluginEventID, fmt.Sprintf("Built-in config:\n%s\n", apps.BuiltInConfStructs["windows"])) | ||
windowsEventLogger.Info(OpsAgentUAPPluginEventID, fmt.Sprintf("Merged config:\n%s\n", uc)) | ||
|
||
// The generated otlp metric json files are used only by the otel service. | ||
if err = self_metrics.GenerateOpsAgentSelfMetricsOTLPJSON(ctx, userConfigPath, filepath.Join(pluginStateDir, GeneratedConfigsOutDir, "otel")); err != nil { | ||
return err | ||
} | ||
|
||
for _, subagent := range []string{ | ||
"otel", | ||
"fluentbit", | ||
|
@@ -378,14 +369,9 @@ func createWindowsJobHandle() (windows.Handle, error) { | |
// | ||
// cancel: the cancel function for the parent context. By calling this function, the parent context is canceled, | ||
// and GetStatus() returns a non-healthy status, signaling UAP to re-trigger Start(). | ||
// | ||
// otelErrorHandler: an implementation of otel.ErrorHandler that is used in the diagnostics service to log otel errors to the Windows event log. | ||
func runSubagents(ctx context.Context, cancel context.CancelFunc, pluginInstallDirectory string, pluginStateDirectory string, runSubAgentCommand RunSubAgentCommandFunc, runCommand RunCommandFunc, otelErrorHandler otel.ErrorHandler) { | ||
func runSubagents(ctx context.Context, cancel context.CancelFunc, pluginInstallDirectory string, pluginStateDirectory string, runSubAgentCommand RunSubAgentCommandFunc, runCommand RunCommandFunc) { | ||
|
||
var wg sync.WaitGroup | ||
// Starting the diagnostics service | ||
wg.Add(1) | ||
go runDiagnosticsService(ctx, cancel, otelErrorHandler, &wg) | ||
|
||
// Starting Otel | ||
runOtelCmd := exec.CommandContext(ctx, | ||
|
@@ -411,27 +397,6 @@ func runSubagents(ctx context.Context, cancel context.CancelFunc, pluginInstallD | |
wg.Wait() | ||
} | ||
|
||
func runDiagnosticsService(ctx context.Context, cancel context.CancelFunc, otelErrorHandler otel.ErrorHandler, wg *sync.WaitGroup) { | ||
defer wg.Done() | ||
|
||
userUc, mergedUc, err := utils.GetUserAndMergedConfigs(ctx, OpsAgentConfigLocationWindows) | ||
if err != nil { | ||
log.Printf("Failed to run the diagnostics service: %v", err) | ||
cancel() | ||
return | ||
} | ||
|
||
// Set otel error handler | ||
otel.SetErrorHandler(otelErrorHandler) | ||
|
||
err = self_metrics.CollectOpsAgentSelfMetrics(ctx, userUc, mergedUc) | ||
if err != nil { | ||
log.Printf("Failed to run the diagnostics service: %v", err) | ||
cancel() | ||
return | ||
} | ||
} | ||
|
||
func runCommand(cmd *exec.Cmd) (string, error) { | ||
if cmd == nil { | ||
return "", nil | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -80,7 +80,7 @@ func (uc *UnifiedConfig) getOTelLogLevel() string { | |
return logLevel | ||
} | ||
|
||
func (uc *UnifiedConfig) GenerateOtelConfig(ctx context.Context) (string, error) { | ||
func (uc *UnifiedConfig) GenerateOtelConfig(ctx context.Context, outDir string) (string, error) { | ||
p := platform.FromContext(ctx) | ||
userAgent, _ := p.UserAgent("Google-Cloud-Ops-Agent-Metrics") | ||
metricVersionLabel, _ := p.VersionLabel("google-cloud-ops-agent-metrics") | ||
|
@@ -100,6 +100,12 @@ func (uc *UnifiedConfig) GenerateOtelConfig(ctx context.Context) (string, error) | |
ReceiverPipelineName: "otel", | ||
} | ||
|
||
receiverPipelines["enabled_receivers_feature_tracking"] = EnabledReceiversFeatureTrackingMetricsPipeline(ctx, outDir) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I suggest a more descriptive name for the pipeline instead of Another alternative is to call the pipeline |
||
pipelines["enabled_receivers_feature_tracking"] = otel.Pipeline{ | ||
Type: "metrics", | ||
ReceiverPipelineName: "enabled_receivers_feature_tracking", | ||
} | ||
|
||
receiverPipelines["fluentbit"] = AgentSelfMetrics{ | ||
Version: loggingVersionLabel, | ||
Port: fluentbit.MetricsPort, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -59,7 +59,7 @@ func (uc *UnifiedConfig) GenerateFilesFromConfig(ctx context.Context, service, l | |
} | ||
} | ||
case "otel": | ||
otelConfig, err := uc.GenerateOtelConfig(ctx) | ||
otelConfig, err := uc.GenerateOtelConfig(ctx, outDir) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [nit] In the fluent bit case, the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can do on a follow up There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No, we don't write the otel generated configs inside In the PR, @rafaelwestphal is passing |
||
if err != nil { | ||
return fmt.Errorf("can't parse configuration: %w", err) | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This method can just return an err now. We don't ever use the first 2 return values I don't think
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This all will go away on the next PR that removes the diagnostics binary. I'm trying to change the least i can and maintain functionality to make it easier to review.