Skip to content

Commit 22e4625

Browse files
Merge pull request #9146 from andfasano/day2-create-command-report
AGENT-965: improve node-joiner create command troubleshooting
2 parents 689918e + b57423a commit 22e4625

File tree

16 files changed

+1117
-6
lines changed

16 files changed

+1117
-6
lines changed
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
2+
# Verify the report generation for the add-nodes command.
3+
4+
exec node-joiner add-nodes --kubeconfig=$WORK/kubeconfig --log-level=debug --dir=$WORK
5+
6+
grep '"id": "report-addnodes-.*' $WORK/report.json
7+
grep '"id": "add-nodes-cluster-inspection"' $WORK/report.json
8+
grep '"id": "create-manifest"' $WORK/report.json
9+
grep '"id": "ignition"' $WORK/report.json
10+
grep '"id": "fetch-base-iso"' $WORK/report.json
11+
grep '"id": "fetch-base-iso.extract-image"' $WORK/report.json
12+
grep '"id": "fetch-base-iso.verify-version"' $WORK/report.json
13+
grep '"id": "create-agent-artifacts"' $WORK/report.json
14+
grep '"id": "create-agent-artifacts.agent-tui"' $WORK/report.json
15+
grep '"id": "create-agent-artifacts.prepare"' $WORK/report.json
16+
grep '"id": "generate-iso"' $WORK/report.json
17+
grep '"exit_code": 0' $WORK/report.json
18+
19+
-- nodes-config.yaml --
20+
hosts:
21+
- hostname: extra-worker-0
22+
interfaces:
23+
- name: eth0
24+
macAddress: 00:f4:3d:a0:0e:2b

docs/dev/workflow_report.md

Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
# Workflow report
2+
3+
## Introduction
4+
Every installer command is internally based on the [assets framework][../design/assetgeneration.md], which allows to define a directed acyclic graph of assets (a generic work item).
5+
A workflow identifies a set of asset graph paths executed when running a specific command (note that the same asset may behave differently when triggered under different workflows).
6+
7+
A report allows to collect the main relevant workflow events - as well as any eventual error, and detailed results - in a human-readable format, providing an abstraction from the basic asset logging system.
8+
A report also serializes immediately on the disk any update received, thus providing a simple mechanism to support streaming the progression of the current command execution (particulary
9+
useful in case of remote execution).
10+
11+
## Stages
12+
A report is a composed by a number of sequential _stage_. A stage represents a specific workflow phase (given by one or more assets) that could be relevant and/or informative from the user point of view.
13+
A stage is composed by a _stage identifier_ (or briefly, a stage id) and a number of substages:
14+
15+
* The stage id is defined by a short internal identifier and a longer human-readable description, meant to be shown to the user.
16+
* Substages are a sequential of zero or more stages, and they are meant to provide a further level of details for the owner stage, if required.
17+
18+
### Stages ids
19+
A stage id can be defined via the `NewStageID` method, for example:
20+
```
21+
StageFetchBaseISO wr.StageID = wr.NewStageID("fetch-base-iso", "Retrieving the base ISO image")
22+
```
23+
24+
A substage id can be defined by prefixing the internal id with the owner stage id, in the format `<owner stage>.<substage>`, for example:
25+
```
26+
StageFetchBaseISOExtract wr.StageID = wr.NewStageID("fetch-base-iso.extract-image", "Extracting base image from release payload")
27+
StageFetchBaseISOVerify wr.StageID = wr.NewStageID("fetch-base-iso.verify-version", "Verifying base image version")
28+
StageFetchBaseISODownload wr.StageID = wr.NewStageID("fetch-base-iso.download-image", "Downloading base ISO image")
29+
```
30+
31+
### Substages
32+
Substages are completely optional, and they may be useful for detailing a particularly big or lengthy stage. So, it's perfectly fine to define stages without any substage.
33+
The current framework does not support more than two levels of stages, essentially for keeping both a simpler interface and for producing an easy to read output for the
34+
final user.
35+
36+
### (Sub)Stage result
37+
The workflow report framework allows to attach (optionally) an artifact for each stage (or substage), in order to capture the end result of the stage execution. The result field is
38+
a free-text string, and its format depends on the specific stage (even though a JSON format is recommended).
39+
40+
## Populating a report
41+
Every asset can access the current report from the `Generate()` context object (for the installer commands where the reporting have been enabled), using the `GetReport()` method.
42+
Once retrieved, the report `Stage` method can be used to add a new stage to the current report:
43+
```
44+
workflowreport.GetReport(ctx).Stage(workflow.StageFetchBaseISO)
45+
```
46+
47+
A similar approach could be follow to add a new substage to the report:
48+
```
49+
workflowreport.GetReport(ctx).SubStage(workflow.StageFetchBaseISOExtract)
50+
```
51+
52+
_Note: a substage cannot be added before adding the related owner stage._
53+
54+
## Enabling the report for a command
55+
If a report was not previously enabled for a specific command, the previously shown commands will have no effect. To active the reporting,
56+
it is sufficient to use the `workflowreport.Context()` method which will create a dedicated context to be used in the assets generation:
57+
58+
```
59+
func NewAddNodesCommand(directory string, kubeConfig string) error {
60+
61+
ctx := workflowreport.Context(string(workflow.AgentWorkflowTypeAddNodes), directory)
62+
63+
fetcher := store.NewAssetsFetcher(directory)
64+
err = fetcher.FetchAndPersist(ctx, ...) //
65+
66+
workflowreport.GetReport(ctx).Complete(err)
67+
```
68+
69+
At the end, the `Complete(err)` method must be invoked to close the report (and eventually report any error).
70+
71+
## Appendix
72+
73+
### Sample of report.json file (without any result)
74+
```
75+
{
76+
"id": "report-addnodes-202410301546",
77+
"start_time": "2024-10-30T15:46:36.646915757Z",
78+
"end_time": "2024-10-30T15:47:23.689529009Z",
79+
"stages": [
80+
{
81+
"id": "add-nodes-cluster-inspection",
82+
"description": "Gathering additional information from the target cluster",
83+
"start_time": "2024-10-30T15:46:36.646915757Z",
84+
"end_time": "2024-10-30T15:46:36.814616705Z"
85+
},
86+
{
87+
"id": "create-manifest",
88+
"description": "Creating internal configuration manifests",
89+
"start_time": "2024-10-30T15:46:36.8146174Z",
90+
"end_time": "2024-10-30T15:46:37.890492356Z"
91+
},
92+
{
93+
"id": "ignition",
94+
"description": "Rendering ISO ignition",
95+
"start_time": "2024-10-30T15:46:37.890493244Z",
96+
"end_time": "2024-10-30T15:46:38.227771699Z"
97+
},
98+
{
99+
"id": "fetch-base-iso",
100+
"description": "Retrieving the base ISO image",
101+
"start_time": "2024-10-30T15:46:38.227772149Z",
102+
"end_time": "2024-10-30T15:47:00.63994573Z",
103+
"sub_stages": [
104+
{
105+
"id": "fetch-base-iso.extract-image",
106+
"description": "Extracting base image from release payload",
107+
"start_time": "2024-10-30T15:46:38.228032465Z",
108+
"end_time": "2024-10-30T15:46:51.428272041Z"
109+
},
110+
{
111+
"id": "fetch-base-iso.verify-version",
112+
"description": "Verifying base image version",
113+
"start_time": "2024-10-30T15:46:51.428273074Z",
114+
"end_time": "2024-10-30T15:47:00.639945356Z"
115+
}
116+
]
117+
},
118+
{
119+
"id": "create-agent-artifacts",
120+
"description": "Creating agent artifacts for the final image",
121+
"start_time": "2024-10-30T15:47:00.639946343Z",
122+
"end_time": "2024-10-30T15:47:20.70873527Z",
123+
"sub_stages": [
124+
{
125+
"id": "create-agent-artifacts.agent-tui",
126+
"description": "Extracting required artifacts from release payload",
127+
"start_time": "2024-10-30T15:47:00.641937138Z",
128+
"end_time": "2024-10-30T15:47:10.419651275Z"
129+
},
130+
{
131+
"id": "create-agent-artifacts.prepare",
132+
"description": "Preparing artifacts",
133+
"start_time": "2024-10-30T15:47:10.419652528Z",
134+
"end_time": "2024-10-30T15:47:20.708735111Z"
135+
}
136+
]
137+
},
138+
{
139+
"id": "generate-iso",
140+
"description": "Assembling ISO image",
141+
"start_time": "2024-10-30T15:47:20.708735596Z",
142+
"end_time": "2024-10-30T15:47:23.689529009Z"
143+
}
144+
],
145+
"result": {
146+
"exit_code": 0
147+
}
148+
}
149+
```

pkg/asset/agent/image/agentartifacts.go

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ import (
1919
"github.com/openshift/installer/pkg/asset/agent/manifests"
2020
"github.com/openshift/installer/pkg/asset/agent/mirror"
2121
"github.com/openshift/installer/pkg/asset/agent/workflow"
22+
workflowreport "github.com/openshift/installer/pkg/asset/agent/workflow/report"
2223
)
2324

2425
const (
@@ -57,7 +58,7 @@ func (a *AgentArtifacts) Dependencies() []asset.Asset {
5758
}
5859

5960
// Generate generates the configurations for the agent ISO image and PXE assets.
60-
func (a *AgentArtifacts) Generate(_ context.Context, dependencies asset.Parents) error {
61+
func (a *AgentArtifacts) Generate(ctx context.Context, dependencies asset.Parents) error {
6162
ignition := &Ignition{}
6263
kargs := &Kargs{}
6364
baseIso := &BaseIso{}
@@ -68,6 +69,10 @@ func (a *AgentArtifacts) Generate(_ context.Context, dependencies asset.Parents)
6869
agentWorkflow := &workflow.AgentWorkflow{}
6970
dependencies.Get(ignition, kargs, baseIso, agentManifests, agentClusterInstall, registriesConf, agentconfig, agentWorkflow)
7071

72+
if err := workflowreport.GetReport(ctx).Stage(workflow.StageAgentArtifacts); err != nil {
73+
return err
74+
}
75+
7176
ignitionByte, err := json.Marshal(ignition.Config)
7277
if err != nil {
7378
return err
@@ -103,11 +108,18 @@ func (a *AgentArtifacts) Generate(_ context.Context, dependencies asset.Parents)
103108

104109
var agentTuiFiles []string
105110
if agentClusterInstall.GetExternalPlatformName() != agent.ExternalPlatformNameOci {
111+
if err := workflowreport.GetReport(ctx).SubStage(workflow.StageAgentArtifactsAgentTUI); err != nil {
112+
return err
113+
}
106114
agentTuiFiles, err = a.fetchAgentTuiFiles(agentManifests.ClusterImageSet.Spec.ReleaseImage, agentManifests.GetPullSecretData(), registriesConf.MirrorConfig)
107115
if err != nil {
108116
return err
109117
}
110118
}
119+
120+
if err := workflowreport.GetReport(ctx).SubStage(workflow.StageAgentArtifactsPrepare); err != nil {
121+
return err
122+
}
111123
err = a.prepareAgentArtifacts(a.ISOPath, agentTuiFiles)
112124
if err != nil {
113125
return err

pkg/asset/agent/image/agentimage.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ import (
1818
"github.com/openshift/installer/pkg/asset/agent/joiner"
1919
"github.com/openshift/installer/pkg/asset/agent/manifests"
2020
"github.com/openshift/installer/pkg/asset/agent/workflow"
21+
workflowreport "github.com/openshift/installer/pkg/asset/agent/workflow/report"
2122
)
2223

2324
const (
@@ -64,6 +65,10 @@ func (a *AgentImage) Generate(ctx context.Context, dependencies asset.Parents) e
6465
baseIso := &BaseIso{}
6566
dependencies.Get(agentArtifacts, agentManifests, baseIso, agentWorkflow, clusterInfo)
6667

68+
if err := workflowreport.GetReport(ctx).Stage(workflow.StageGenerateISO); err != nil {
69+
return err
70+
}
71+
6772
switch agentWorkflow.Workflow {
6873
case workflow.AgentWorkflowTypeInstall:
6974
a.platform = agentManifests.AgentClusterInstall.Spec.PlatformType

pkg/asset/agent/image/baseiso.go

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ import (
1919
"github.com/openshift/installer/pkg/asset/agent/manifests"
2020
"github.com/openshift/installer/pkg/asset/agent/mirror"
2121
"github.com/openshift/installer/pkg/asset/agent/workflow"
22+
workflowreport "github.com/openshift/installer/pkg/asset/agent/workflow/report"
2223
"github.com/openshift/installer/pkg/rhcos"
2324
"github.com/openshift/installer/pkg/rhcos/cache"
2425
"github.com/openshift/installer/pkg/types"
@@ -146,6 +147,10 @@ func (i *BaseIso) Generate(ctx context.Context, dependencies asset.Parents) erro
146147
var err error
147148
var baseIsoFileName string
148149

150+
if err := workflowreport.GetReport(ctx).Stage(workflow.StageFetchBaseISO); err != nil {
151+
return err
152+
}
153+
149154
if urlOverride, ok := os.LookupEnv("OPENSHIFT_INSTALL_OS_IMAGE_OVERRIDE"); ok && urlOverride != "" {
150155
logrus.Warn("Found override for OS Image. Please be warned, this is not advised")
151156
baseIsoFileName, err = cache.DownloadImageFile(urlOverride, cache.AgentApplicationName)
@@ -214,8 +219,15 @@ func (i *BaseIso) retrieveBaseIso(ctx context.Context, dependencies asset.Parent
214219
// If we have the image registry location and 'oc' command is available then get from release payload
215220
ocRelease := i.getRelease(agentManifests, registriesConf)
216221
logrus.Info("Extracting base ISO from release payload")
222+
223+
if err := workflowreport.GetReport(ctx).SubStage(workflow.StageFetchBaseISOExtract); err != nil {
224+
return "", err
225+
}
217226
baseIsoFileName, err := ocRelease.GetBaseIso(archName)
218227
if err == nil {
228+
if err := workflowreport.GetReport(ctx).SubStage(workflow.StageFetchBaseISOVerify); err != nil {
229+
return "", err
230+
}
219231
i.checkReleasePayloadBaseISOVersion(ctx, ocRelease, archName)
220232

221233
logrus.Debugf("Extracted base ISO image %s from release payload", baseIsoFileName)
@@ -233,6 +245,9 @@ func (i *BaseIso) retrieveBaseIso(ctx context.Context, dependencies asset.Parent
233245
}
234246

235247
logrus.Info("Downloading base ISO")
248+
if err := workflowreport.GetReport(ctx).SubStage(workflow.StageFetchBaseISODownload); err != nil {
249+
return "", err
250+
}
236251
return i.downloadIso(ctx, archName)
237252
}
238253

pkg/asset/agent/image/ignition.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ import (
3131
"github.com/openshift/installer/pkg/asset/agent/manifests"
3232
"github.com/openshift/installer/pkg/asset/agent/mirror"
3333
"github.com/openshift/installer/pkg/asset/agent/workflow"
34+
workflowreport "github.com/openshift/installer/pkg/asset/agent/workflow/report"
3435
"github.com/openshift/installer/pkg/asset/ignition"
3536
"github.com/openshift/installer/pkg/asset/ignition/bootstrap"
3637
"github.com/openshift/installer/pkg/asset/password"
@@ -113,7 +114,7 @@ func (a *Ignition) Dependencies() []asset.Asset {
113114
}
114115

115116
// Generate generates the agent installer ignition.
116-
func (a *Ignition) Generate(_ context.Context, dependencies asset.Parents) error {
117+
func (a *Ignition) Generate(ctx context.Context, dependencies asset.Parents) error {
117118
agentWorkflow := &workflow.AgentWorkflow{}
118119
agentManifests := &manifests.AgentManifests{}
119120
agentConfigAsset := &agentconfig.AgentConfig{}
@@ -123,6 +124,10 @@ func (a *Ignition) Generate(_ context.Context, dependencies asset.Parents) error
123124
infraEnvAsset := &common.InfraEnvID{}
124125
dependencies.Get(agentManifests, agentConfigAsset, agentHostsAsset, extraManifests, authConfig, agentWorkflow, infraEnvAsset)
125126

127+
if err := workflowreport.GetReport(ctx).Stage(workflow.StageIgnition); err != nil {
128+
return err
129+
}
130+
126131
pwd := &password.KubeadminPassword{}
127132
dependencies.Get(pwd)
128133
pwdHash := string(pwd.PasswordHash)

pkg/asset/agent/joiner/clusterinfo.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ import (
2929
"github.com/openshift/installer/pkg/asset"
3030
"github.com/openshift/installer/pkg/asset/agent"
3131
"github.com/openshift/installer/pkg/asset/agent/workflow"
32+
workflowreport "github.com/openshift/installer/pkg/asset/agent/workflow/report"
3233
"github.com/openshift/installer/pkg/types"
3334
"github.com/openshift/installer/pkg/types/aws"
3435
"github.com/openshift/installer/pkg/types/azure"
@@ -93,14 +94,18 @@ func (*ClusterInfo) Dependencies() []asset.Asset {
9394
}
9495

9596
// Generate generates the ClusterInfo.
96-
func (ci *ClusterInfo) Generate(_ context.Context, dependencies asset.Parents) error {
97+
func (ci *ClusterInfo) Generate(ctx context.Context, dependencies asset.Parents) error {
9798
agentWorkflow := &workflow.AgentWorkflow{}
9899
dependencies.Get(agentWorkflow, &ci.addNodesConfig)
99100

100101
if agentWorkflow.Workflow != workflow.AgentWorkflowTypeAddNodes {
101102
return nil
102103
}
103104

105+
if err := workflowreport.GetReport(ctx).Stage(workflow.StageClusterInspection); err != nil {
106+
return err
107+
}
108+
104109
err := ci.initClients()
105110
if err != nil {
106111
return err

pkg/asset/agent/manifests/agent.go

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ import (
1414
"github.com/openshift/assisted-service/models"
1515
hivev1 "github.com/openshift/hive/apis/hive/v1"
1616
"github.com/openshift/installer/pkg/asset"
17+
"github.com/openshift/installer/pkg/asset/agent/workflow"
18+
workflowreport "github.com/openshift/installer/pkg/asset/agent/workflow/report"
1719
)
1820

1921
const (
@@ -57,7 +59,11 @@ func (m *AgentManifests) Dependencies() []asset.Asset {
5759
}
5860

5961
// Generate generates the respective manifest files.
60-
func (m *AgentManifests) Generate(_ context.Context, dependencies asset.Parents) error {
62+
func (m *AgentManifests) Generate(ctx context.Context, dependencies asset.Parents) error {
63+
if err := workflowreport.GetReport(ctx).Stage(workflow.StageCreateManifests); err != nil {
64+
return err
65+
}
66+
6167
for _, a := range []asset.WritableAsset{
6268
&AgentPullSecret{},
6369
&InfraEnv{},

0 commit comments

Comments
 (0)