-
Notifications
You must be signed in to change notification settings - Fork 73
/
Copy pathagents.go
1201 lines (1100 loc) · 51.8 KB
/
agents.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//go:build integration_test
/*
Package agents holds various helpers for interacting with the agents on GCE
instances.
Note: This file contains various things that appear unused, but in the future
we would like them to be used for a few tests inside Google's code silo. So
please don't delete things just because they are unused within the Ops Agent
repo.
*/
package agents
import (
"context"
"embed"
"fmt"
"log"
"os"
"path"
"regexp"
"strings"
"testing"
"time"
"github.com/GoogleCloudPlatform/ops-agent/integration_test/gce"
"github.com/GoogleCloudPlatform/ops-agent/integration_test/logging"
"github.com/GoogleCloudPlatform/ops-agent/integration_test/util"
"github.com/blang/semver"
"github.com/cenkalti/backoff/v4"
"go.uber.org/multierr"
)
// TrailingQueryWindow represents how far into the past to look when querying
// for uptime metrics.
const TrailingQueryWindow = 2 * time.Minute
// OpsAgentPluginServerPort defines the port on which the Ops Agent UAP Plugin gRPC server runs.
const OpsAgentPluginServerPort = "1234"
//go:embed testdata
var scriptsDir embed.FS
// AgentPackage represents a thing that we ask OS Config to install for us.
// One agentPackage can expand out and result in multiple AgentServices
// being installed on the VM.
type AgentPackage struct {
// Passed to the --agent-rules flag of "gcloud ... policies create|update"
Type string
// The name of the package that actually gets installed on the VM.
PackageName string
}
// AgentService represents an agent service/process that will end up running
// on the VM.
type AgentService struct {
// The name of the systemctl service for this agent.
ServiceName string
// The name of the agent according to package managers. The PackageName
// is what is passed to dpkg/apt/yum/zypper/etc commands.
PackageName string
// The name of the agent according to the uptime metric that it uploads to
// the monitoring backend.
// If this is "", it means that this service doesn't upload an uptime metric.
UptimeMetricName string
}
// TestCase holds the name of a test scenario and the packages being tested.
type TestCase struct {
Name string
AgentPackages []AgentPackage
}
// AgentServices expands the list of packages to install into the list of services
// that will end up running on the machine. This is necessary for the ops agent,
// which is one package but results in multiple running agent services.
func AgentServices(t *testing.T, imageSpec string, pkgs []AgentPackage) []AgentService {
t.Helper()
if gce.IsWindows(imageSpec) {
if len(pkgs) != 1 || pkgs[0].Type != OpsAgentType {
t.Fatalf("AgentServices() assumes that the only package we want to install on Windows is the ops agent. Requested packages: %v", pkgs)
}
return []AgentService{
{
ServiceName: "google-cloud-ops-agent",
PackageName: "google-cloud-ops-agent",
// This service doesn't currently have an uptime metric.
UptimeMetricName: "",
},
{
ServiceName: "google-cloud-ops-agent-fluent-bit",
PackageName: "google-cloud-ops-agent-fluent-bit",
// TODO(b/170138116): Enable this metric once it is being uploaded for
// Fluent-Bit.
UptimeMetricName: "",
},
{
ServiceName: "google-cloud-ops-agent-opentelemetry-collector",
PackageName: "google-cloud-ops-agent-opentelemetry-collector",
UptimeMetricName: "google-cloud-ops-agent-metrics",
},
}
}
var services []AgentService
for _, pkg := range pkgs {
switch pkg.Type {
case LoggingAgentType:
services = append(services,
AgentService{
ServiceName: "google-fluentd",
PackageName: "google-fluentd",
UptimeMetricName: "google-fluentd",
},
)
case MetricsAgentType:
services = append(services,
AgentService{
ServiceName: "stackdriver-agent",
PackageName: "stackdriver-agent",
UptimeMetricName: "stackdriver_agent",
},
)
case OpsAgentType:
services = append(services,
AgentService{
ServiceName: "google-cloud-ops-agent-fluent-bit",
PackageName: "google-cloud-ops-agent",
// TODO(b/170138116): Enable this check once the uptime metric is
// being uploaded for Fluent-Bit.
UptimeMetricName: "",
},
AgentService{
ServiceName: "google-cloud-ops-agent-opentelemetry-collector",
PackageName: "google-cloud-ops-agent",
UptimeMetricName: "google-cloud-ops-agent-metrics",
},
)
default:
t.Fatalf("Package %#v has a Type that is not supported by this test", pkg)
}
}
return services
}
var (
// LoggingAgentType represents the type var for the Logging Agent.
LoggingAgentType = "logging"
// MetricsAgentType represents the type var for the Monitoring Agent.
MetricsAgentType = "metrics"
// OpsAgentType represents the type var for the Ops Agent.
OpsAgentType = "ops-agent"
// LoggingPackage holds the type var and package name of the Logging Agent.
LoggingPackage = AgentPackage{Type: LoggingAgentType, PackageName: "google-fluentd"}
// MetricsPackage holds the type var and package name of the Metrics Agent.
MetricsPackage = AgentPackage{Type: MetricsAgentType, PackageName: "stackdriver-agent"}
// OpsPackage holds the type var and package name of the Ops Agent.
OpsPackage = AgentPackage{Type: OpsAgentType, PackageName: "google-cloud-ops-agent"}
// This suffix helps Kokoro set the right Content-type for log files. See b/202432085.
txtSuffix = ".txt"
)
// RunOpsAgentDiagnostics will fetch as much debugging info as it can from the
// given VM. This function assumes that the VM is running the ops agent.
// All the commands run and their output are dumped to various files in the
// directory managed by the given DirectoryLogger.
func RunOpsAgentDiagnostics(ctx context.Context, logger *logging.DirectoryLogger, vm *gce.VM) {
logger.ToMainLog().Printf("Starting RunOpsAgentDiagnostics()...")
if gce.IsWindows(vm.ImageSpec) {
runOpsAgentDiagnosticsWindows(ctx, logger, vm)
return
}
// Tests like TestPortsAndAPIHealthChecks will make these curl operations
// hang, so give them a shorter timeout to avoid hanging the whole test.
metricsCtx, cancel := context.WithTimeout(ctx, 30*time.Second)
defer cancel()
gce.RunRemotely(metricsCtx, logger.ToFile("fluent_bit_metrics.txt"), vm, "sudo curl -s localhost:20202/metrics")
gce.RunRemotely(metricsCtx, logger.ToFile("otel_metrics.txt"), vm, "sudo curl -s localhost:20201/metrics")
isUAPPlugin := gce.IsOpsAgentUAPPlugin()
if isUAPPlugin {
gce.RunRemotely(ctx, logger.ToFile("status_for_ops_agent_uap_plugin.txt"), vm, fmt.Sprintf("grpcurl -plaintext -d '{}' localhost:%s plugin_comm.GuestAgentPlugin/GetStatus", OpsAgentPluginServerPort))
} else {
gce.RunRemotely(ctx, logger.ToFile("systemctl_status_for_ops_agent.txt"), vm, "sudo systemctl status google-cloud-ops-agent*")
}
gce.RunRemotely(ctx, logger.ToFile("journalctl_output.txt"), vm, "sudo journalctl -xe")
fileList := getOpsAgentLogFilesList(vm.ImageSpec)
for _, log := range fileList {
_, basename := path.Split(log)
gce.RunRemotely(ctx, logger.ToFile(basename+txtSuffix), vm, "sudo cat "+log)
}
}
func getOpsAgentLogFilesList(imageSpec string) []string {
if gce.IsOpsAgentUAPPlugin() {
return []string{
gce.SyslogLocation(imageSpec),
"/var/lib/google-guest-agent/agent_state/plugins/ops-agent-plugin/log/google-cloud-ops-agent/health-checks.log",
"/etc/google-cloud-ops-agent/config.yaml",
"/var/lib/google-guest-agent/agent_state/plugins/ops-agent-plugin/log/google-cloud-ops-agent/subagents/logging-module.log",
"/var/lib/google-guest-agent/agent_state/plugins/ops-agent-plugin/log/google-cloud-ops-agent/subagents/metrics-module.log",
"/var/lib/google-guest-agent/agent_state/plugins/ops-agent-plugin/log/nvidia-installer.log",
"/var/lib/google-guest-agent/agent_state/plugins/ops-agent-plugin/run/google-cloud-ops-agent-fluent-bit/fluent_bit_main.conf",
"/var/lib/google-guest-agent/agent_state/plugins/ops-agent-plugin/run/google-cloud-ops-agent-fluent-bit/fluent_bit_parser.conf",
"/var/lib/google-guest-agent/agent_state/plugins/ops-agent-plugin/run/google-cloud-ops-agent-opentelemetry-collector/otel.yaml",
"/var/lib/google-guest-agent/agent_state/plugins/ops-agent-plugin/run/google-cloud-ops-agent-opentelemetry-collector/feature_tracking_otlp.json",
"/var/lib/google-guest-agent/agent_state/plugins/ops-agent-plugin/run/google-cloud-ops-agent-opentelemetry-collector/enabled_receivers_otlp.json",
}
}
return []string{
gce.SyslogLocation(imageSpec),
"/var/log/google-cloud-ops-agent/health-checks.log",
"/etc/google-cloud-ops-agent/config.yaml",
"/var/log/google-cloud-ops-agent/subagents/logging-module.log",
"/var/log/google-cloud-ops-agent/subagents/metrics-module.log",
"/var/log/nvidia-installer.log",
"/run/google-cloud-ops-agent-fluent-bit/fluent_bit_main.conf",
"/run/google-cloud-ops-agent-fluent-bit/fluent_bit_parser.conf",
"/run/google-cloud-ops-agent-opentelemetry-collector/otel.yaml",
"/run/google-cloud-ops-agent-opentelemetry-collector/feature_tracking_otlp.json",
"/run/google-cloud-ops-agent-opentelemetry-collector/enabled_receivers_otlp.json",
}
}
func runOpsAgentDiagnosticsWindows(ctx context.Context, logger *logging.DirectoryLogger, vm *gce.VM) {
stateDir := `C:\ProgramData\Google\Cloud Operations\Ops Agent\`
if gce.IsOpsAgentUAPPlugin() {
stateDir = `C:\ProgramData\Google\Compute Engine\google-guest-agent\agent_state\plugins\ops-agent-plugin\`
gce.RunRemotely(ctx, logger.ToFile("ops_agent_uap_plugin_logs.txt"), vm, "Get-WinEvent -FilterHashtable @{ Logname='Application'; ProviderName='google-cloud-ops-agent-uap-plugin' } | Format-Table -AutoSize -Wrap")
gce.RunRemotely(ctx, logger.ToFile("status_for_ops_agent_uap_plugin.txt"), vm, fmt.Sprintf(`C:\grpcurl.exe -plaintext -d '{}' localhost:%s plugin_comm.GuestAgentPlugin/GetStatus`, OpsAgentPluginServerPort))
gce.RunRemotely(ctx, logger.ToFile("active_processes_in_vm.txt"), vm, `Get-WmiObject -Class Win32_Process | Select-Object Name, ProcessId, ParentProcessId`)
} else {
gce.RunRemotely(ctx, logger.ToFile("windows_System_log.txt"), vm, "Get-WinEvent -LogName System | Format-Table -AutoSize -Wrap")
gce.RunRemotely(ctx, logger.ToFile("Get-Service_output.txt"), vm, "Get-Service google-cloud-ops-agent* | Format-Table -AutoSize -Wrap")
gce.RunRemotely(ctx, logger.ToFile("ops_agent_logs.txt"), vm, "Get-WinEvent -FilterHashtable @{ Logname='Application'; ProviderName='google-cloud-ops-agent' } | Format-Table -AutoSize -Wrap")
gce.RunRemotely(ctx, logger.ToFile("open_telemetry_agent_logs.txt"), vm, "Get-WinEvent -FilterHashtable @{ Logname='Application'; ProviderName='google-cloud-ops-agent-opentelemetry-collector' } | Format-Table -AutoSize -Wrap")
}
// Fluent-Bit has not implemented exporting logs to the Windows event log yet.
gce.RunRemotely(ctx, logger.ToFile("fluent_bit_agent_logs.txt"), vm, fmt.Sprintf("Get-Content -Path '%s' -Raw", stateDir+`log\logging-module.log`))
gce.RunRemotely(ctx, logger.ToFile("health-checks.txt"), vm, fmt.Sprintf("Get-Content -Path '%s' -Raw", stateDir+`log\health-checks.log`))
for _, conf := range []string{
`C:\Program Files\Google\Cloud Operations\Ops Agent\config\config.yaml`,
stateDir + `generated_configs\fluentbit\fluent_bit_main.conf`,
stateDir + `generated_configs\fluentbit\fluent_bit_parser.conf`,
stateDir + `generated_configs\otel\otel.yaml`,
stateDir + `generated_configs\otel\feature_tracking_otlp.json`,
stateDir + `generated_configs\otel\enabled_receivers_otlp.json`,
} {
pathParts := strings.Split(conf, `\`)
basename := pathParts[len(pathParts)-1]
gce.RunRemotely(ctx, logger.ToFile(basename+txtSuffix), vm, fmt.Sprintf("Get-Content -Path '%s' -Raw", conf))
}
}
// WaitForUptimeMetrics waits for the given uptime metrics to be visible in
// the monitoring backend, returning an error if the lookup failed or the
// metrics didn't become visible.
func WaitForUptimeMetrics(ctx context.Context, logger *log.Logger, vm *gce.VM, services []AgentService) error {
// Run gce.WaitForMetric in parallel for each agent.
var err error
c := make(chan error, len(services))
for _, service := range services {
service := service // https://golang.org/doc/faq#closures_and_goroutines
go func() {
if service.UptimeMetricName == "" {
logger.Printf("Skipping lookup of uptime metric for %v", service.ServiceName)
c <- nil
return
}
_, err := gce.WaitForMetric(
ctx, logger, vm, "agent.googleapis.com/agent/uptime", TrailingQueryWindow,
[]string{fmt.Sprintf("metric.labels.version = starts_with(%q)", service.UptimeMetricName)}, false)
c <- err
}()
}
for range services {
err = multierr.Append(err, <-c)
}
if err != nil {
return fmt.Errorf("WaitForUptimeMetrics() error: %v", err)
}
return nil
}
// CheckServicesRunning asserts that the given services are currently running
// on the given VM, returning an error if they are not (or if there was an
// error determining their status).
func CheckServicesRunning(ctx context.Context, logger *log.Logger, vm *gce.VM, services []AgentService) error {
var err error
for _, service := range services {
if gce.IsWindows(vm.ImageSpec) {
output, statusErr := gce.RunRemotely(ctx, logger, vm, fmt.Sprintf("(Get-Service -Name '%s').Status", service.ServiceName))
if statusErr != nil {
err = multierr.Append(err, fmt.Errorf("no service named %q could be found: %v", service.ServiceName, statusErr))
continue
}
status := strings.TrimSpace(output.Stdout)
if status != "Running" {
err = multierr.Append(err, fmt.Errorf(`for service %q, got status=%q, want "Running"`, service.ServiceName, status))
continue
}
continue
}
if _, statusErr := gce.RunRemotely(ctx, logger, vm, fmt.Sprintf("sudo service %s status", service.ServiceName)); statusErr != nil {
err = multierr.Append(err, fmt.Errorf("RunRemotely(): status of %s was not OK. error was: %v", service.ServiceName, statusErr))
}
}
if err != nil {
return fmt.Errorf("CheckServicesRunning() error: %v", err)
}
return nil
}
// CheckServicesRunningAndWaitForMetrics asserts that the agent services are
// currently running on the given VM and waits for their uptime metrics to
// become visible.
func CheckServicesRunningAndWaitForMetrics(ctx context.Context, logger *log.Logger, vm *gce.VM, services []AgentService) error {
logger.Print("Checking that services are running.")
if err := CheckServicesRunning(ctx, logger, vm, services); err != nil {
return err
}
logger.Print("Waiting for uptime metrics to be visible.")
if err := WaitForUptimeMetrics(ctx, logger, vm, services); err != nil {
return err
}
logger.Print("Agents are successfully running.")
return nil
}
// CheckServicesNotRunning asserts that the given services are not running on the
// given VM.
func CheckServicesNotRunning(ctx context.Context, logger *log.Logger, vm *gce.VM, services []AgentService) error {
logger.Print("Checking that agents are NOT running")
for _, service := range services {
if gce.IsWindows(vm.ImageSpec) {
// There are two possible cases that are acceptable here:
// 1) the service has been deleted
// 2) the service still exists but is in some state other than "Running",
// such as "Stopped".
// We currently only see case #1, but let's permit case #2 as well.
// The following command should output nothing, or maybe just a blank
// line, in either case.
output, err := gce.RunRemotely(ctx, logger, vm, fmt.Sprintf("Get-Service | Where-Object {$_.Name -eq '%s' -and $_.Status -eq 'Running'}", service.ServiceName))
if err != nil {
return fmt.Errorf("CheckServicesNotRunning(): error looking up running services named %v: %v", service.ServiceName, err)
}
if strings.TrimSpace(output.Stdout) != "" {
return fmt.Errorf("CheckServicesNotRunning(): service %v was unexpectedly 'Running'. output: %s", service.ServiceName, output.Stdout)
}
continue
}
if service.ServiceName == "google-fluentd" {
continue // TODO(b/159817885): Assert that google-fluentd is not running.
}
if _, err := gce.RunRemotely(ctx, logger, vm, fmt.Sprintf("! sudo service %s status", service.ServiceName)); err != nil {
return fmt.Errorf("CheckServicesNotRunning(): command could not be run or status of %s was unexpectedly OK. err: %v", service.ServiceName, err)
}
}
return nil
}
func packageManagerCmd(vm *gce.VM) (string, error) {
if gce.IsWindows(vm.ImageSpec) {
return "googet -noconfirm", nil
}
switch vm.OS.ID {
case "centos", "rhel", "rocky":
return "sudo yum -y", nil
case "opensuse-leap", "sles", "sles-sap":
return "sudo zypper --non-interactive", nil
case "debian", "ubuntu":
return "sudo apt-get update; sudo apt-get -y", nil
default:
return "", fmt.Errorf("packageManagerCmd() doesn't support image spec %s with value '%s'", vm.ImageSpec, vm.OS.ID)
}
}
// managePackages calls the package manager of the vm with the provided instruction (install/remove) for a set of packages.
func managePackages(ctx context.Context, logger *log.Logger, vm *gce.VM, instruction string, pkgs []string) error {
pkgMCmd, err := packageManagerCmd(vm)
if err != nil {
return err
}
cmd := fmt.Sprintf("%s %s %s", pkgMCmd, instruction, strings.Join(pkgs, " "))
_, err = gce.RunRemotely(ctx, logger, vm, cmd)
return err
}
// tryInstallPackages attempts once to install the given packages.
func tryInstallPackages(ctx context.Context, logger *log.Logger, vm *gce.VM, pkgs []string) error {
return managePackages(ctx, logger, vm, "install", pkgs)
}
// InstallPackages installs the given packages on the given VM. Assumes repo is
// configured. This function has some retries to paper over transient issues
// that often happen when installing packages.
func InstallPackages(ctx context.Context, logger *log.Logger, vm *gce.VM, pkgs []string) error {
attemptFunc := func() error { return tryInstallPackages(ctx, logger, vm, pkgs) }
if err := RunInstallFuncWithRetry(ctx, logger, vm, attemptFunc); err != nil {
return fmt.Errorf("could not install %v. err: %v", pkgs, err)
}
return nil
}
// UninstallPackages removes the given packages from the given VM.
func UninstallPackages(ctx context.Context, logger *log.Logger, vm *gce.VM, pkgs []string) error {
return managePackages(ctx, logger, vm, "remove", pkgs)
}
// checkPackages asserts that the given packages on the given VM are in a state matching the installed argument.
func checkPackages(ctx context.Context, logger *log.Logger, vm *gce.VM, pkgs []string, installed bool) error {
errPrefix := ""
if installed {
errPrefix = "not "
}
if gce.IsWindows(vm.ImageSpec) {
output, err := gce.RunRemotely(ctx, logger, vm, "googet installed")
if err != nil {
return err
}
for _, pkg := range pkgs {
// Look for the package name in the output. \b means word boundary.
expr := `\b` + pkg + `\b`
re, err := regexp.Compile(expr)
if err != nil {
return fmt.Errorf("regexp %q failed to compile: %v", expr, err)
}
if m := re.FindString(output.Stdout); (m == "") == installed {
return fmt.Errorf("package %q was unexpectedly %sinstalled", pkg, errPrefix)
}
}
// Success: the expected set of packages was found in the output of "googet installed".
return nil
}
for _, pkg := range pkgs {
cmd := ""
if IsRPMBased(vm.ImageSpec) {
cmd = fmt.Sprintf("rpm --query %s", pkg)
if !installed {
cmd = "! " + cmd
}
} else if strings.HasPrefix(vm.ImageSpec, "debian-cloud") ||
strings.HasPrefix(vm.ImageSpec, "ubuntu-os-cloud") {
// dpkg's package states are documented in "man 1 dpkg".
// "config-files" means that the package is not installed but its config files
// are still on the system. Accepting both that and "not-installed" is more
// future-proof than just accepting "config-files".
cmd = fmt.Sprintf(
`(dpkg-query --show '--showformat=${db:Status-Status}' %s || echo 'not-installed') | grep --extended-regexp '(not-installed)|(config-files)'`, pkg)
if installed {
cmd = "! (" + cmd + ")"
}
} else {
return fmt.Errorf("checkPackages() does not support image spec: %s", vm.ImageSpec)
}
if _, err := gce.RunRemotely(ctx, logger, vm, cmd); err != nil {
return fmt.Errorf("command could not be run or %q was unexpectedly %sinstalled. err: %v", pkg, errPrefix, err)
}
}
return nil
}
// CheckPackagesNotInstalled asserts that the given packages are not installed on the given VM.
func CheckPackagesNotInstalled(ctx context.Context, logger *log.Logger, vm *gce.VM, pkgs []string) error {
return checkPackages(ctx, logger, vm, pkgs, false)
}
// CheckPackagesInstalled asserts that the given packages are installed on the given VM.
func CheckPackagesInstalled(ctx context.Context, logger *log.Logger, vm *gce.VM, pkgs []string) error {
return checkPackages(ctx, logger, vm, pkgs, true)
}
// CheckAgentsUninstalled asserts that the given agent services and packages
// are not installed on the given VM.
func CheckAgentsUninstalled(ctx context.Context, logger *log.Logger, vm *gce.VM, services []AgentService) error {
if err := CheckServicesNotRunning(ctx, logger, vm, services); err != nil {
return err
}
var pkgs []string
for _, service := range services {
pkgs = append(pkgs, service.PackageName)
}
if err := CheckPackagesNotInstalled(ctx, logger, vm, pkgs); err != nil {
return err
}
// TODO(martijnvs): Also check that the ops agent package itself is uninstalled.
return nil
}
// IsRPMBased checks if the image spec is RPM based.
func IsRPMBased(imageSpec string) bool {
return strings.HasPrefix(imageSpec, "centos-cloud") ||
strings.HasPrefix(imageSpec, "rhel-") ||
strings.HasPrefix(imageSpec, "rocky-linux-cloud") ||
strings.HasPrefix(imageSpec, "suse-cloud") ||
strings.HasPrefix(imageSpec, "suse-sap-cloud") ||
strings.HasPrefix(imageSpec, "opensuse-cloud") ||
strings.Contains(imageSpec, "sles-")
}
// StripTildeSuffix strips off everything after the first ~ character. We see
// version numbers with tildes on debian (e.g. 1.0.1~debian10) and the semver
// library doesn't parse them out properly.
func StripTildeSuffix(version string) string {
ind := strings.Index(version, "~")
if ind == -1 {
return version
}
return version[:ind]
}
func fetchPackageVersionWindows(ctx context.Context, logger *log.Logger, vm *gce.VM, pkg string) (semver.Version, error) {
output, err := gce.RunRemotely(ctx, logger, vm, "googet installed -info "+pkg)
if err != nil {
return semver.Version{}, err
}
// Look for a line in the output with a version number. Some real examples:
// " Version : 20201229.01.0+win@1"
// " Version : 1.0.6@1"
re, err := regexp.Compile(`\s*Version\s*:\s*([0-9]+\.[0-9]+\.[0-9]+)[^0-9]+.*`)
if err != nil {
return semver.Version{}, fmt.Errorf("Could not compile regular expression: %v", err)
}
lines := strings.Split(output.Stdout, "\r\n")
var version string
for _, line := range lines {
matches := re.FindStringSubmatch(line)
if len(matches) > 1 {
version = matches[1]
break
}
}
if version == "" {
return semver.Version{}, fmt.Errorf("Could not parse version from googet output: %v", output.Stdout)
}
return semver.Make(version)
}
// fetchPackageVersion runs a googet/dpkg/rpm command remotely to get the
// installed version of a package. It has some retries to accommodate the fact
// that sometimes it takes ~1 minute between when the agents are up and running
// and the time that the package shows up as installed with rpm --query
// (b/178096139).
// TODO(martijnvs): Understand why b/178096139 happens and remove the retries.
func fetchPackageVersion(ctx context.Context, logger *log.Logger, vm *gce.VM, pkg string) (semver.Version, error) {
logger.Printf("Getting %s version", pkg)
if gce.IsWindows(vm.ImageSpec) {
return fetchPackageVersionWindows(ctx, logger, vm, pkg)
}
var version semver.Version
tryGetVersion := func() error {
cmd := `dpkg-query --show --showformat=\$\{Version} ` + pkg
if IsRPMBased(vm.ImageSpec) {
cmd = `rpm --query --queryformat=%\{VERSION} ` + pkg
}
output, err := gce.RunRemotely(ctx, logger, vm, cmd)
if err != nil {
return err
}
vStr := strings.TrimSpace(output.Stdout)
if !IsRPMBased(vm.ImageSpec) {
vStr = StripTildeSuffix(vStr)
}
version, err = semver.Make(vStr)
if err != nil {
err = fmt.Errorf("semver.Make() could not convert %q to a Version: %v", vStr, err)
logger.Print(err)
return err
}
return nil
}
backoffPolicy := backoff.WithContext(backoff.WithMaxRetries(backoff.NewConstantBackOff(20*time.Second), 10), ctx)
if err := backoff.Retry(tryGetVersion, backoffPolicy); err != nil {
return semver.Version{}, err
}
return version, nil
}
// FetchPackageVersions retrieves the semver.Version on the given VM for all the agents
// in the given []agents.AgentPackage.
func FetchPackageVersions(ctx context.Context, logger *log.Logger, vm *gce.VM, packages []AgentPackage) (map[AgentPackage]semver.Version, error) {
versions := make(map[AgentPackage]semver.Version)
var err error
for _, pkg := range packages {
current, versionErr := fetchPackageVersion(ctx, logger, vm, pkg.PackageName)
logger.Printf("fetchPackageVersion() returned version=%v, err=%v", current, versionErr)
if versionErr != nil {
err = multierr.Append(err, fmt.Errorf("fetchPackageVersion(%q) failed: %v", pkg.PackageName, versionErr))
continue
}
versions[pkg] = current
}
return versions, err
}
// isRetriableInstallError checks to see if the error may be transient.
func isRetriableInstallError(imageSpec string, err error) bool {
if strings.Contains(err.Error(), "Could not refresh zypper repositories.") ||
strings.Contains(err.Error(), "Credentials are invalid") ||
strings.Contains(err.Error(), "Resource temporarily unavailable") ||
strings.Contains(err.Error(), "System management is locked by the application") {
return true
}
if gce.IsWindows(imageSpec) &&
strings.Contains(err.Error(), "context deadline exceeded") {
return true // See b/197127877 for history.
}
if strings.Contains(imageSpec, "rhel-8-") && strings.HasSuffix(imageSpec, "-sap-ha") &&
strings.Contains(err.Error(), "Could not refresh the google-cloud-ops-agent yum repositories") {
return true // See b/174039270 for history.
}
if strings.Contains(imageSpec, "rhel-8-") && strings.HasSuffix(imageSpec, "-sap-ha") &&
strings.Contains(err.Error(), "Failed to download metadata for repo 'rhui-rhel-8-") {
return true // This happens when the RHEL servers are down. See b/189950957.
}
if strings.HasPrefix(imageSpec, "rhel-") && strings.Contains(err.Error(), "SSL_ERROR_SYSCALL") {
return true // b/187661497. Example: screen/3PMwAvhNBKWVYub
}
if strings.HasPrefix(imageSpec, "rhel-") && strings.Contains(err.Error(), "Encountered end of file") {
return true // b/184729120#comment31. Example: screen/4yK9evoY68LiaLr
}
if strings.HasPrefix(imageSpec, "ubuntu-os-cloud") && strings.Contains(err.Error(), "Clearsigned file isn't valid") {
// The upstream repo was in an inconsistent state. The error looks like:
// screen/7U24zrRwADyYKqb
return true
}
if strings.HasPrefix(imageSpec, "ubuntu-os-cloud") && strings.Contains(err.Error(), "Mirror sync in progress?") {
// The mirror repo was in an inconsistent state. The error looks like:
// http://screen/Ai2CHc7fcRosHJu
return true
}
if strings.HasPrefix(imageSpec, "ubuntu-os-cloud") && strings.Contains(err.Error(), "Hash Sum mismatch") {
// The mirror repo was in an inconsistent state. The error looks like:
// http://screen/8HjakedwVnXZvw6
return true
}
return false
}
// RunInstallFuncWithRetry runs the given installFunc, retrying errors that
// seem transient. In between attempts, this function may attempt to recover
// from certain errors.
// This function is intended to paper over various problems that arise when
// running apt/zypper/yum. installFunc can be any function that is invoking
// apt/zypper/yum, directly or indirectly.
func RunInstallFuncWithRetry(ctx context.Context, logger *log.Logger, vm *gce.VM, installFunc func() error) error {
shouldRetry := func(err error) bool { return isRetriableInstallError(vm.ImageSpec, err) }
installWithRecovery := func() error {
err := installFunc()
if err != nil && shouldRetry(err) && strings.Contains(vm.ImageSpec, "rhel-8-") && strings.HasSuffix(vm.ImageSpec, "-sap-ha") {
logger.Println("attempting recovery steps from https://access.redhat.com/discussions/4656371 so that subsequent attempts are more likely to succeed... see b/189950957")
gce.RunRemotely(ctx, logger, vm, "sudo dnf clean all && sudo rm -r /var/cache/dnf && sudo dnf upgrade")
}
if err != nil && !shouldRetry(err) {
err = backoff.Permanent(err)
}
// Returning a non-permanent error triggers retries.
return err
}
backoffPolicy := backoff.WithContext(backoff.WithMaxRetries(backoff.NewConstantBackOff(30*time.Second), 5), ctx)
return backoff.Retry(installWithRecovery, backoffPolicy)
}
// InstallStandaloneWindowsLoggingAgent installs the Stackdriver Logging agent
// on a Windows VM.
func InstallStandaloneWindowsLoggingAgent(ctx context.Context, logger *log.Logger, vm *gce.VM) error {
// https://cloud.google.com/logging/docs/agent/installation#joint-install
// The command needed to be adjusted to work in a non-GUI context.
cmd := `(New-Object Net.WebClient).DownloadFile("https://dl.google.com/cloudagents/windows/StackdriverLogging-v1-16.exe", "${env:UserProfile}\StackdriverLogging-v1-16.exe")
Start-Process -FilePath "${env:UserProfile}\StackdriverLogging-v1-16.exe" -ArgumentList "/S" -Wait -NoNewWindow`
_, err := gce.RunRemotely(ctx, logger, vm, cmd)
return err
}
// InstallStandaloneWindowsMonitoringAgent installs the Stackdriver Monitoring
// agent on a Windows VM.
func InstallStandaloneWindowsMonitoringAgent(ctx context.Context, logger *log.Logger, vm *gce.VM) error {
// https://cloud.google.com/monitoring/agent/installation#joint-install
// The command needed to be adjusted to work in a non-GUI context.
cmd := `(New-Object Net.WebClient).DownloadFile("https://repo.stackdriver.com/windows/StackdriverMonitoring-GCM-46.exe", "${env:UserProfile}\StackdriverMonitoring-GCM-46.exe")
Start-Process -FilePath "${env:UserProfile}\StackdriverMonitoring-GCM-46.exe" -ArgumentList "/S" -Wait -NoNewWindow`
_, err := gce.RunRemotely(ctx, logger, vm, cmd)
return err
}
func getRestartOpsAgentCmd(imageSpec string) string {
if gce.IsOpsAgentUAPPlugin() {
grpcurlExecutable := "grpcurl"
if gce.IsWindows(imageSpec) {
grpcurlExecutable = `C:\grpcurl.exe`
return fmt.Sprintf("%s -plaintext -d '{}' localhost:%s plugin_comm.GuestAgentPlugin/Stop; Start-Sleep -Seconds 5; %s -plaintext -d '{}' localhost:%s plugin_comm.GuestAgentPlugin/Start", grpcurlExecutable, OpsAgentPluginServerPort, grpcurlExecutable, OpsAgentPluginServerPort)
}
return fmt.Sprintf("%s -plaintext -d '{}' localhost:%s plugin_comm.GuestAgentPlugin/Stop && sleep 5 && %s -plaintext -d '{}' localhost:%s plugin_comm.GuestAgentPlugin/Start", grpcurlExecutable, OpsAgentPluginServerPort, grpcurlExecutable, OpsAgentPluginServerPort)
}
if gce.IsWindows(imageSpec) {
return "Restart-Service google-cloud-ops-agent -Force"
}
// Return a command that works for both < 2.0.0 and >= 2.0.0 agents.
return "sudo service google-cloud-ops-agent restart || sudo systemctl restart google-cloud-ops-agent"
}
// PackageLocation describes a location where packages
// (currently, only the Ops Agent packages) live.
type PackageLocation struct {
// If provided, a URL for a directory in GCS containing .deb/.rpm/.goo files
// to install on the testing VMs.
// This setting is mutually exclusive with repoSuffix.
packagesInGCS string
// Package repository suffix to install from. Setting this and packagesInGCS
// to "" means to install the latest stable release.
repoSuffix string
// Override the codename for the agent repository.
// This setting is only used for ARM builds at the moment, and ignored when
// installing from Artifact Registry.
repoCodename string
// Package repository GCP project to install from. Requires repoSuffix
// to be nonempty.
artifactRegistryProject string
// Region the packages live in in Artifact Registry. Requires repoSuffix
// to be nonempty.
artifactRegistryRegion string
}
// LocationFromEnvVars assembles a PackageLocation from environment variables.
func LocationFromEnvVars() PackageLocation {
return PackageLocation{
packagesInGCS: os.Getenv("AGENT_PACKAGES_IN_GCS"),
repoSuffix: os.Getenv("REPO_SUFFIX"),
repoCodename: os.Getenv("REPO_CODENAME"),
artifactRegistryProject: os.Getenv("ARTIFACT_REGISTRY_PROJECT"),
artifactRegistryRegion: os.Getenv("ARTIFACT_REGISTRY_REGION"),
}
}
func toEnvironment(environment map[string]string, format string, separator string) string {
var assignments []string
for k, v := range environment {
if v != "" {
assignments = append(assignments, fmt.Sprintf(format, k, v))
}
}
return strings.Join(assignments, separator)
}
func linuxEnvironment(environment map[string]string) string {
return toEnvironment(environment, "%s='%s'", " ")
}
func windowsEnvironment(environment map[string]string) string {
return toEnvironment(environment, `$env:%s='%s'`, "\n")
}
// InstallOpsAgent installs the Ops Agent on the given VM. Consults the given
// PackageLocation to determine where to install the agent from. For details
// about PackageLocation, see the documentation for the PackageLocation struct.
func InstallOpsAgent(ctx context.Context, logger *log.Logger, vm *gce.VM, location PackageLocation) error {
if location.packagesInGCS != "" && location.repoSuffix != "" {
return fmt.Errorf("invalid PackageLocation: cannot provide both location.packagesInGCS and location.repoSuffix. location=%#v", location)
}
if location.artifactRegistryRegion != "" && location.repoSuffix == "" {
return fmt.Errorf("invalid PackageLocation: location.artifactRegistryRegion was nonempty yet location.repoSuffix was empty. location=%#v", location)
}
if gce.IsOpsAgentUAPPlugin() {
return InstallOpsAgentUAPPlugin(ctx, logger, vm, location)
}
if location.packagesInGCS != "" {
return InstallPackageFromGCS(ctx, logger, vm, location.packagesInGCS)
}
preservedEnvironment := map[string]string{
"REPO_SUFFIX": location.repoSuffix,
"REPO_CODENAME": location.repoCodename,
"ARTIFACT_REGISTRY_PROJECT": location.artifactRegistryProject,
"ARTIFACT_REGISTRY_REGION": location.artifactRegistryRegion,
}
if gce.IsWindows(vm.ImageSpec) {
// Note that these commands match the ones from our public install docs
// (https://cloud.google.com/stackdriver/docs/solutions/agents/ops-agent/installation)
// and keeping them in sync is encouraged so that we are testing the
// same commands that our customers are running.
if _, err := gce.RunRemotely(ctx, logger, vm, `(New-Object Net.WebClient).DownloadFile("https://dl.google.com/cloudagents/add-google-cloud-ops-agent-repo.ps1", "${env:UserProfile}\add-google-cloud-ops-agent-repo.ps1")`); err != nil {
return fmt.Errorf("InstallOpsAgent() failed to download repo script: %w", err)
}
runScript := func() error {
scriptCmd := fmt.Sprintf(`%s
& "${env:UserProfile}\add-google-cloud-ops-agent-repo.ps1" -AlsoInstall`, windowsEnvironment(preservedEnvironment))
_, err := gce.RunRemotely(ctx, logger, vm, scriptCmd)
return err
}
// TODO: b/202526819 - Remove retries once the script does retries internally.
if err := RunInstallFuncWithRetry(ctx, logger, vm, runScript); err != nil {
return fmt.Errorf("InstallOpsAgent() failed to run repo script: %w", err)
}
return nil
}
if _, err := gce.RunRemotely(ctx,
logger, vm, "curl -sSO https://dl.google.com/cloudagents/add-google-cloud-ops-agent-repo.sh"); err != nil {
return fmt.Errorf("InstallOpsAgent() failed to download repo script: %w", err)
}
runInstallScript := func() error {
envVars := linuxEnvironment(preservedEnvironment)
_, err := gce.RunRemotely(ctx, logger, vm, "sudo "+envVars+" bash -x add-google-cloud-ops-agent-repo.sh --also-install")
return err
}
if err := RunInstallFuncWithRetry(ctx, logger, vm, runInstallScript); err != nil {
return fmt.Errorf("InstallOpsAgent() error running repo script: %w", err)
}
return nil
}
// SetupOpsAgent installs the Ops Agent and installs the given config.
func SetupOpsAgent(ctx context.Context, logger *log.Logger, vm *gce.VM, config string) error {
return SetupOpsAgentFrom(ctx, logger, vm, config, LocationFromEnvVars())
}
// RestartOpsAgent restarts the Ops Agent and waits for it to become available.
func RestartOpsAgent(ctx context.Context, logger *log.Logger, vm *gce.VM) error {
if _, err := gce.RunRemotely(ctx, logger, vm, getRestartOpsAgentCmd(vm.ImageSpec)); err != nil {
return fmt.Errorf("RestartOpsAgent() failed to restart ops agent: %v", err)
}
// Give agents time to shut down. Fluent-Bit's default shutdown grace period
// is 5 seconds, so we should probably give it at least that long.
time.Sleep(10 * time.Second)
return nil
}
// StartOpsAgentPluginServer starts the Ops Agent Plugin gRPC server on the testing VM in the background.
func StartOpsAgentPluginServer(ctx context.Context, logger *log.Logger, vm *gce.VM, port string) error {
if gce.IsWindows(vm.ImageSpec) {
startUAPWindowsPlugin, err := scriptsDir.ReadFile(path.Join("testdata", "start-uap-plugin-server.ps1"))
if err != nil {
return fmt.Errorf("StartOpsAgentPluginServer() failed to read start-uap-plugin-server.ps1: %v", err)
}
if _, err := gce.RunScriptRemotely(ctx, logger, vm, string(startUAPWindowsPlugin), nil, nil); err != nil {
return fmt.Errorf("StartOpsAgentPluginServer() failed to start the ops agent plugin: %v", err)
}
return nil
}
if _, err := gce.RunRemotely(ctx, logger, vm, fmt.Sprintf("sudo nohup ~/plugin --address=localhost:%s --errorlogfile=errorlog.txt --protocol=tcp 1>/dev/null 2>/dev/null &", port)); err != nil {
return fmt.Errorf("StartOpsAgentPluginServer() failed to start the ops agent plugin: %v", err)
}
return nil
}
func StartCommandForImage(imageSpec string) string {
if gce.IsOpsAgentUAPPlugin() {
grpcurlExecutable := "grpcurl"
if gce.IsWindows(imageSpec) {
grpcurlExecutable = `C:\grpcurl.exe`
}
return fmt.Sprintf("%s -plaintext -d '{}' localhost:1234 plugin_comm.GuestAgentPlugin/Start", grpcurlExecutable)
}
if gce.IsWindows(imageSpec) {
return "Start-Service google-cloud-ops-agent"
}
// Return a command that works for both < 2.0.0 and >= 2.0.0 agents.
return "sudo service google-cloud-ops-agent start || sudo systemctl start google-cloud-ops-agent"
}
func StartOpsAgentViaUAPCommand(imageSpec string, config string) string {
grpcurlExecutable := "grpcurl"
if gce.IsWindows(imageSpec) {
grpcurlExecutable = `C:\grpcurl.exe`
}
if len(config) > 0 {
return fmt.Sprintf("%s -plaintext -d '{%s}' localhost:1234 plugin_comm.GuestAgentPlugin/Start", grpcurlExecutable, config)
}
return fmt.Sprintf("%s -plaintext -d '{}' localhost:1234 plugin_comm.GuestAgentPlugin/Start", grpcurlExecutable)
}
func StopCommandForImage(imageSpec string) string {
if gce.IsOpsAgentUAPPlugin() {
grpcurlExecutable := "grpcurl"
if gce.IsWindows(imageSpec) {
grpcurlExecutable = `C:\grpcurl.exe`
}
return fmt.Sprintf("%s -plaintext -d '{}' localhost:1234 plugin_comm.GuestAgentPlugin/Stop", grpcurlExecutable)
}
if gce.IsWindows(imageSpec) {
return "Stop-Service google-cloud-ops-agent -Force"
}
// Return a command that works for both < 2.0.0 and >= 2.0.0 agents.
return "sudo service google-cloud-ops-agent stop || sudo systemctl stop google-cloud-ops-agent"
}
func GetUAPPluginStatusForImage(imageSpec string) string {
grpcurlExecutable := "grpcurl"
if gce.IsWindows(imageSpec) {
grpcurlExecutable = `C:\grpcurl.exe`
}
return fmt.Sprintf("%s -plaintext -d '{}' localhost:1234 plugin_comm.GuestAgentPlugin/GetStatus", grpcurlExecutable)
}
func StartOpsAgentPluginWithBackoff(ctx context.Context, logger *log.Logger, vm *gce.VM) error {
tryStartOpsAgent := func() error {
if _, err := gce.RunRemotely(ctx, logger, vm, StartCommandForImage(vm.ImageSpec)); err != nil {
return fmt.Errorf("failed to start Ops Agent: %v", err)
}
time.Sleep(8 * time.Second)
cmdOut, err := gce.RunRemotely(ctx, logger, vm, GetUAPPluginStatusForImage(vm.ImageSpec))
if err != nil {
return fmt.Errorf("failed to retrieve Ops Agent status: %v", err)
}
if !strings.Contains(cmdOut.Stdout, "is running ok") {
return fmt.Errorf("Ops Agent is not started successfully: stdout: %v, stderr: %v", cmdOut.Stdout, cmdOut.Stderr)
}
return nil
}
backoffPolicy := backoff.WithContext(backoff.WithMaxRetries(backoff.NewConstantBackOff(20*time.Second), 10), ctx)
if err := backoff.Retry(tryStartOpsAgent, backoffPolicy); err != nil {
return err
}
return nil
}
// SetupOpsAgentFrom is an overload of setupOpsAgent that allows the callsite to
// decide which version of the agent gets installed.
func SetupOpsAgentFrom(ctx context.Context, logger *log.Logger, vm *gce.VM, config string, location PackageLocation) error {
if err := InstallOpsAgent(ctx, logger, vm, location); err != nil {
return err
}
startupDelay := 20 * time.Second
if len(config) > 0 {
if gce.IsWindows(vm.ImageSpec) {
// Sleep to avoid some flaky errors when restarting the agent because the
// services have not fully started up yet.
time.Sleep(startupDelay)
}
if err := gce.UploadContent(ctx, logger, vm, strings.NewReader(config), util.GetConfigPath(vm.ImageSpec)); err != nil {
return fmt.Errorf("SetupOpsAgentFrom() failed to upload config file: %v", err)
}
}
// UAP Plugin needs to be started regardless, since it's not automatically started upon install.
if gce.IsOpsAgentUAPPlugin() {
return StartOpsAgentPluginWithBackoff(ctx, logger, vm)
}
// Regular Ops Agent only needs a restart if the config is not empty.
if len(config) > 0 {
return RestartOpsAgent(ctx, logger, vm)
}
// Give agents time to start up.
time.Sleep(startupDelay)
return nil
}
// RecommendedMachineType returns a reasonable setting for a VM's machine type
// (https://cloud.google.com/compute/docs/machine-types). Windows instances
// are configured to be larger because they need more CPUs to start up in a
// reasonable amount of time.
func RecommendedMachineType(imageSpec string) string {
if gce.IsWindows(imageSpec) {
return "e2-standard-4"
}
if gce.IsARM(imageSpec) {
return "t2a-standard-2"
}
return "e2-standard-2"
}
// CommonSetup sets up the VM for testing.
func CommonSetup(t *testing.T, imageSpec string) (context.Context, *logging.DirectoryLogger, *gce.VM) {
return CommonSetupWithExtraCreateArguments(t, imageSpec, nil)
}