From 1fad8d0258b124a13fa821a451b673bf9408f14b Mon Sep 17 00:00:00 2001 From: Jerry Gong Date: Sun, 29 Sep 2024 19:42:09 -0400 Subject: [PATCH 01/27] WIP --- e2e_tests/tests/cluster/test_log_policies.py | 82 +++- e2e_tests/tests/cluster/utils.py | 28 ++ master/internal/api_experiment.go | 3 +- master/internal/api_runs.go | 1 + master/internal/api_tasks_intg_test.go | 159 +++++++- master/internal/core_experiment.go | 15 + master/internal/logpattern/logpattern.go | 83 +++- master/internal/trial.go | 7 + master/internal/trial_intg_test.go | 79 ++++ master/pkg/model/experiment.go | 1 + master/pkg/model/task.go | 3 +- master/pkg/model/task_container_defaults.go | 2 +- master/pkg/schemas/defaults.go | 6 + master/pkg/schemas/expconf/const.go | 7 + .../pkg/schemas/expconf/experiment_config.go | 2 +- .../pkg/schemas/expconf/log_pattern_config.go | 29 +- .../expconf/zgen_experiment_config_v0.go | 7 +- .../pkg/schemas/expconf/zgen_log_policy_v0.go | 12 +- master/pkg/schemas/schema.go | 3 + master/pkg/schemas/zgen_schemas.go | 13 +- ...experiment-add-log-signal-labels.tx.up.sql | 2 + proto/buf.image.bin | Bin 669524 -> 669818 bytes proto/pkg/runv1/run.pb.go | 342 ++++++++--------- proto/pkg/trialv1/trial.pb.go | 355 +++++++++--------- proto/src/determined/run/v1/run.proto | 2 +- proto/src/determined/trial/v1/trial.proto | 2 +- schemas/expconf/v0/log-policy.json | 24 +- 27 files changed, 882 insertions(+), 387 deletions(-) create mode 100644 master/static/migrations/20240916170654_experiment-add-log-signal-labels.tx.up.sql diff --git a/e2e_tests/tests/cluster/test_log_policies.py b/e2e_tests/tests/cluster/test_log_policies.py index a810bc43d59..8305b1c7d01 100644 --- a/e2e_tests/tests/cluster/test_log_policies.py +++ b/e2e_tests/tests/cluster/test_log_policies.py @@ -5,6 +5,8 @@ from tests import api_utils from tests import experiment as exp from tests.experiment import noop +from tests import detproc +from tests.cluster import utils @pytest.mark.e2e_cpu @@ -16,7 +18,7 @@ def test_log_policy_cancel_retries(should_match: bool) -> None: regex = r"(.*) this should not match (.*)" config = { - "log_policies": [{"pattern": regex, "action": {"type": "cancel_retries"}}], + "log_policies": [{"pattern": regex, "actions": [{"type": "cancel_retries"}]}], "max_restarts": 1, } exp_ref = noop.create_experiment(sess, [noop.Exit(7)], config=config) @@ -152,3 +154,81 @@ def test_log_policy_exclude_slurm(should_match: bool) -> None: ) # Job fails to start up the second restart since all nodes are excluded. else: assert times_ran == 2 + + +@pytest.mark.e2e_cpu +@pytest.mark.parametrize("should_match", [True, False]) +def test_log_signal(should_match: bool) -> None: + sess = api_utils.user_session() + regex = r"executing.*action.*exit.*code.*7" + if not should_match: + regex = r"(.*) this should not match (.*)" + + expected_signal = "Test Signal" + config = { + "log_policies": [{"pattern": regex, "signal": expected_signal}], + "max_restarts": 1, + } + + exp_ref = noop.create_experiment(sess, [noop.Exit(7)], config=config) + assert exp_ref.wait(interval=0.01) == client.ExperimentState.ERROR + + + searchRes = utils.get_run_by_exp_id(sess, exp_ref.id) + runSignal = searchRes.runs[0].logSignal + + trialRes = bindings.get_GetTrial( + sess, + trialId = searchRes.runs[0].id + ) + trialSignal = trialRes.trial.logSignal + + + if should_match: + assert runSignal == expected_signal + assert trialSignal == expected_signal + else: + assert runSignal is None + assert trialSignal is None + +@pytest.mark.e2e_cpu +def test_signal_clear_after_exp_continue() -> None: + sess = api_utils.user_session() + regex = r"executing.*action.*exit.*code.*7" + + expected_signal = "Test Signal" + config = { + "log_policies": [{"pattern": regex, "signal": expected_signal}], + "max_restarts": 0, + } + + exp_ref = noop.create_experiment(sess, [noop.Exit(7)], config=config) + assert exp_ref.wait(interval=0.01) == client.ExperimentState.ERROR + + searchRes = utils.get_run_by_exp_id(sess, exp_ref.id) + runSignal = searchRes.runs[0].logSignal + + trialRes = bindings.get_GetTrial( + sess, + trialId = searchRes.runs[0].id + ) + trialSignal = trialRes.trial.logSignal + + assert runSignal == expected_signal + assert trialSignal == expected_signal + + + detproc.check_call(sess, ["det", "e", "continue", str(exp_ref.id), "--config", "hyperparameters.crash_on_startup=false"]) + exp.wait_for_experiment_state(sess, exp_ref.id, bindings.experimentv1State.COMPLETED) + + searchRes = utils.get_run_by_exp_id(sess, exp_ref.id) + runSignal = searchRes.runs[0].logSignal + + trialRes = bindings.get_GetTrial( + sess, + trialId = searchRes.runs[0].id + ) + trialSignal = trialRes.trial.logSignal + + assert runSignal == None + assert trialSignal == None diff --git a/e2e_tests/tests/cluster/utils.py b/e2e_tests/tests/cluster/utils.py index de38408e182..f71e1cb290e 100644 --- a/e2e_tests/tests/cluster/utils.py +++ b/e2e_tests/tests/cluster/utils.py @@ -11,6 +11,7 @@ from typing_extensions import Literal # noqa:I2041 from determined.common import api +from determined.common.api import bindings from tests import command from tests import config as conf from tests import detproc @@ -197,3 +198,30 @@ def set_master_port(config: str) -> None: lc = conf.load_config(config_path=config) port = get_master_port(lc) conf.MASTER_PORT = port + + +def get_run_by_exp_id(sess, exp_id) -> int: + return bindings.post_SearchRuns( + sess, + body=bindings.v1SearchRunsRequest( + limit=1, + filter="""{ + "filterGroup": { + "children": [ + { + "columnName": "experimentId", + "kind": "field", + "location": "LOCATION_TYPE_RUN", + "operator": "=", + "type": "COLUMN_TYPE_NUMBER", + "value": %s + } + ], + "conjunction": "and", + "kind": "group" + }, + "showArchived": false + }""" + % exp_id, + ), + ) diff --git a/master/internal/api_experiment.go b/master/internal/api_experiment.go index 9fe3daa6c81..7dc7eabcfae 100644 --- a/master/internal/api_experiment.go +++ b/master/internal/api_experiment.go @@ -1616,9 +1616,10 @@ func (a *apiServer) ContinueExperiment( if _, err := tx.NewUpdate().Table("runs"). // TODO(nick-runs) call runs package. Set("restarts = 0"). Set("end_time = null"). + Set("log_signal = null"). Where("id IN (?)", bun.In(trialIDs)). Exec(ctx); err != nil { - return fmt.Errorf("zeroing out trial restarts: %w", err) + return fmt.Errorf("zeroing out trial stats: %w", err) } } diff --git a/master/internal/api_runs.go b/master/internal/api_runs.go index 7f6322b2958..a5ec14165e8 100644 --- a/master/internal/api_runs.go +++ b/master/internal/api_runs.go @@ -130,6 +130,7 @@ func (a *apiServer) SearchRuns( } resp.Pagination = pagination resp.Runs = runs + fmt.Printf("\n\n runs: %#v\n\n", runs) return resp, nil } diff --git a/master/internal/api_tasks_intg_test.go b/master/internal/api_tasks_intg_test.go index 843762caf57..9298d9e8f12 100644 --- a/master/internal/api_tasks_intg_test.go +++ b/master/internal/api_tasks_intg_test.go @@ -218,11 +218,11 @@ func TestPostTaskLogsLogPattern(t *testing.T) { activeConfig, err := api.m.db.ActiveExperimentConfig(trial.ExperimentID) require.NoError(t, err) - activeConfig.RawLogPolicies = expconf.LogPoliciesConfig{ - expconf.LogPolicy{RawPattern: "sub", RawAction: expconf.LogAction{ + activeConfig.RawLogPolicies = &expconf.LogPoliciesConfig{ + expconf.LogPolicy{RawPattern: "sub", RawAction: &expconf.LogAction{ RawCancelRetries: &expconf.LogActionCancelRetries{}, }}, - expconf.LogPolicy{RawPattern: `\d{5}$`, RawAction: expconf.LogAction{ + expconf.LogPolicy{RawPattern: `\d{5}$`, RawAction: &expconf.LogAction{ RawExcludeNode: &expconf.LogActionExcludeNode{}, }}, } @@ -443,3 +443,156 @@ func TestGetAllocationAcceleratorData(t *testing.T) { require.Equal(t, resp.AcceleratorData[0].ResourcePool, a1.ResourcePool, "failed to get the correct allocation's resource pool data") } + +func TestPostTaskLogsLogSignalDataSaving(t *testing.T) { + api, curUser, ctx := setupAPITest(t, nil) + trial, task := createTestTrial(t, api, curUser) + + activeConfig, err := api.m.db.ActiveExperimentConfig(trial.ExperimentID) + require.NoError(t, err) + + signal := "sub" + activeConfig.RawLogPolicies = &expconf.LogPoliciesConfig{ + expconf.LogPolicy{RawPattern: "sub", RawSignal: &signal}, + expconf.LogPolicy{RawPattern: `\d{5}$`}, + } + + v, err := json.Marshal(activeConfig) + require.NoError(t, err) + + var m map[string]any + require.NoError(t, json.Unmarshal(v, &m)) + + _, err = db.Bun().NewUpdate().Table("experiments"). + Where("id = ?", trial.ExperimentID). + Set("config = ?", m). + Exec(ctx) + require.NoError(t, err) + + _, err = api.PostTaskLogs(ctx, &apiv1.PostTaskLogsRequest{ + Logs: []*taskv1.TaskLog{ + { + TaskId: string(task.TaskID), + AgentId: ptrs.Ptr("a1"), + Log: "stringsubstring", + }, + { + TaskId: string(task.TaskID), + AgentId: ptrs.Ptr("a1"), + Log: "12345", + }, + }, + }) + require.NoError(t, err) + + runsOut := struct { + bun.BaseModel `bun:"table:runs"` + LogSignal *string `db:"log_signal"` + }{} + + err = db.Bun().NewSelect().Model(&runsOut). + Where("id = ?", trial.ID). + Scan(ctx) + require.NoError(t, err) + require.NotNil(t, runsOut) + require.NotNil(t, runsOut.LogSignal) + + require.Equal(t, "sub", *runsOut.LogSignal) + + tasksOut := struct { + bun.BaseModel `bun:"table:tasks"` + LogSignal *string `db:"log_signal"` + }{} + err = db.Bun().NewSelect().Model(&tasksOut). + Join("LEFT JOIN run_id_task_id AS rt on tasks.task_id = rt.task_id"). + Where("run_id = ?", trial.ID). + Scan(ctx) + require.NoError(t, err) + require.NotNil(t, tasksOut) + require.NotNil(t, tasksOut.LogSignal) + + require.Equal(t, "sub", *tasksOut.LogSignal) +} + +func TestPostTaskLogsDefaultLogSignal(t *testing.T) { + api, curUser, ctx := setupAPITest(t, nil) + trial, task := createTestTrial(t, api, curUser) + + _, err := api.PostTaskLogs(ctx, &apiv1.PostTaskLogsRequest{ + Logs: []*taskv1.TaskLog{ + { + TaskId: string(task.TaskID), + AgentId: ptrs.Ptr("a1"), + Log: "log CUDA out of memory more log", + }, + }, + }) + require.NoError(t, err) + + runsOut := struct { + bun.BaseModel `bun:"table:runs"` + LogSignal *string `db:"log_signal"` + }{} + + err = db.Bun().NewSelect().Model(&runsOut). + Where("id = ?", trial.ID). + Scan(ctx) + require.NoError(t, err) + require.NotNil(t, runsOut) + require.NotNil(t, runsOut.LogSignal) + + require.Equal(t, "CUDA OOM", *runsOut.LogSignal) + + tasksOut := struct { + bun.BaseModel `bun:"table:tasks"` + LogSignal *string `db:"log_signal"` + }{} + err = db.Bun().NewSelect().Model(&tasksOut). + Join("LEFT JOIN run_id_task_id AS rt on tasks.task_id = rt.task_id"). + Where("run_id = ?", trial.ID). + Scan(ctx) + require.NoError(t, err) + require.NotNil(t, tasksOut) + require.NotNil(t, tasksOut.LogSignal) + + require.Equal(t, "CUDA OOM", *tasksOut.LogSignal) + + _, err = api.PostTaskLogs(ctx, &apiv1.PostTaskLogsRequest{ + Logs: []*taskv1.TaskLog{ + { + TaskId: string(task.TaskID), + AgentId: ptrs.Ptr("a1"), + Log: "log uncorrectable ECC error encountered more log", + }, + }, + }) + require.NoError(t, err) + + runsOut = struct { + bun.BaseModel `bun:"table:runs"` + LogSignal *string `db:"log_signal"` + }{} + + err = db.Bun().NewSelect().Model(&runsOut). + Where("id = ?", trial.ID). + Scan(ctx) + require.NoError(t, err) + require.NotNil(t, runsOut) + require.NotNil(t, runsOut.LogSignal) + + require.Equal(t, "ECC Error", *runsOut.LogSignal) + + tasksOut = struct { + bun.BaseModel `bun:"table:tasks"` + LogSignal *string `db:"log_signal"` + }{} + err = db.Bun().NewSelect().Model(&tasksOut). + Join("LEFT JOIN run_id_task_id AS rt on tasks.task_id = rt.task_id"). + Where("run_id = ?", trial.ID). + Scan(ctx) + require.NoError(t, err) + require.NotNil(t, tasksOut) + require.NotNil(t, tasksOut.LogSignal) + + require.Equal(t, "ECC Error", *tasksOut.LogSignal) +} diff --git a/master/internal/core_experiment.go b/master/internal/core_experiment.go index 88161fccc94..94507f7d2fe 100644 --- a/master/internal/core_experiment.go +++ b/master/internal/core_experiment.go @@ -291,6 +291,9 @@ func (m *Master) parseCreateExperiment(ctx context.Context, req *apiv1.CreateExp } defaulted := schemas.WithDefaults(config) + fmt.Printf("\n\ndefaulted: %#v\n\n", *defaulted.RawEnvironment.RawImage.RawCUDA) + + fmt.Printf("\n\nconfig: %#v\n\n", config.RawEnvironment) resources := defaulted.Resources() p, err := getCreateExperimentsProject(m, req, owner, defaulted) @@ -317,17 +320,23 @@ func (m *Master) parseCreateExperiment(ctx context.Context, req *apiv1.CreateExp } } + fmt.Printf("\n\nbefore mering taskcontainerdefault: %#v\n\n", m.config.TaskContainerDefaults.Image) + taskContainerDefaults, err := m.rm.TaskContainerDefaults( poolName, m.config.TaskContainerDefaults, ) + fmt.Printf("\n\nafter mering taskcontainerdefault: %#v\n\n", taskContainerDefaults.Image) if err != nil { return nil, nil, config, nil, nil, errors.Wrapf(err, "error getting TaskContainerDefaults") } + fmt.Printf("\n\nbefore mering taskcontainerdefault with config: %#v\n\n", config.RawEnvironment) taskSpec.TaskContainerDefaults = taskContainerDefaults taskSpec.TaskContainerDefaults.MergeIntoExpConfig(&config) + fmt.Printf("\n\nafter mering taskcontainerdefault with config: %#v\n\n", config.RawEnvironment.RawImage) + // Merge log retention into the taskSpec. if config.RawRetentionPolicy != nil { taskSpec.LogRetentionDays = config.RawRetentionPolicy.RawLogRetentionDays @@ -355,9 +364,15 @@ func (m *Master) parseCreateExperiment(ctx context.Context, req *apiv1.CreateExp config.RawResources.RawPriority = &prio } + userLogPolicyConfig := config.RawLogPolicies // Lastly, apply any json-schema-defined defaults. + fmt.Printf("\n\nbefore last withdefaults config: %#v\n\n", config.RawEnvironment.RawImage) + fmt.Printf("\n\nbefore last withdefaults config log: %#v\n\n", userLogPolicyConfig) config = schemas.WithDefaults(config) + fmt.Printf("\n\nafter last withdefaults config: %#v\n\n", *config.RawEnvironment.RawImage.RawCUDA) + fmt.Printf("\n\nafter last withdefaults config log: %#v\n\n", config.RawLogPolicies) + // Make sure the experiment config has all eventuallyRequired fields. if err = schemas.IsComplete(config); err != nil { return nil, nil, config, nil, nil, errors.Wrap(err, "invalid experiment configuration") diff --git a/master/internal/logpattern/logpattern.go b/master/internal/logpattern/logpattern.go index f7f62e4d87c..2a8de32c5d3 100644 --- a/master/internal/logpattern/logpattern.go +++ b/master/internal/logpattern/logpattern.go @@ -76,23 +76,51 @@ func (l *LogPatternPolicies) monitor(ctx context.Context, } if compiledRegex.MatchString(log.Log) { - switch policy.Action().GetUnionMember().(type) { - case expconf.LogActionCancelRetries: - if err := addDontRetry( - ctx, model.TaskID(log.TaskID), *log.AgentID, policy.Pattern(), log.Log, - ); err != nil { - return fmt.Errorf("adding don't retry: %w", err) + if policy.Action() != nil { + switch policy.Action().GetUnionMember().(type) { + case expconf.LogActionCancelRetries: + if err := addDontRetry( + ctx, model.TaskID(log.TaskID), *log.AgentID, policy.Pattern(), log.Log, + ); err != nil { + return fmt.Errorf("adding don't retry: %w", err) + } + + case expconf.LogActionExcludeNode: + if err := addRetryOnDifferentNode( + ctx, model.TaskID(log.TaskID), *log.AgentID, policy.Pattern(), log.Log, + ); err != nil { + return fmt.Errorf("adding retry on different node: %w", err) + } + + default: + return fmt.Errorf("unrecognized log pattern policy type") } + } - case expconf.LogActionExcludeNode: - if err := addRetryOnDifferentNode( - ctx, model.TaskID(log.TaskID), *log.AgentID, policy.Pattern(), log.Log, - ); err != nil { - return fmt.Errorf("adding retry on different node: %w", err) + if policy.Signal() != nil { + signal := policy.Signal() + + err = db.Bun().RunInTx(ctx, nil, func(ctx context.Context, tx bun.Tx) error { + if _, err := tx.NewUpdate().Model(&model.Task{}). + Set("log_signal = ?", signal). + Where("task_id = ?", log.TaskID). + Exec(ctx); err != nil { + return fmt.Errorf("updating log signal of task %s: %w", log.TaskID, err) + } + if _, err := tx.NewUpdate().Model(&model.Run{}). + Table("run_id_task_id"). + Set("log_signal = ?", signal). + Where("run.id = run_id_task_id.run_id"). + Where("run_id_task_id.task_id = ?", log.TaskID). + Exec(ctx); err != nil { + return fmt.Errorf("updating log signal of task %s: %w", log.TaskID, err) + } + + return nil + }) + if err != nil { + return fmt.Errorf("updating log signal: %w", err) } - - default: - return fmt.Errorf("unrecognized log pattern policy type") } } } @@ -231,3 +259,30 @@ func TaskLogsFromDontRetryTriggers(taskID model.TaskID, t []DontRetryTrigger) [] return taskLogs } + +// ClearSignal resets the log_signal. +func ClearSignal(ctx context.Context, taskID model.TaskID) error { + if err := db.Bun().RunInTx(ctx, nil, func(ctx context.Context, tx bun.Tx) error { + if _, err := tx.NewUpdate().Model(&model.Task{}). + Set("log_signal = null"). + Where("task_id = ?", taskID). + Exec(ctx); err != nil { + return fmt.Errorf("resetting log signal of task %s: %w", taskID, err) + } + if _, err := tx.NewUpdate().Model(&model.Run{}). + Table("run_id_task_id"). + Set("log_signal = null"). + Where("run.id = run_id_task_id.run_id"). + Where("run_id_task_id.task_id = ?", taskID). + Exec(ctx); err != nil { + return fmt.Errorf("resetting log signal of task %s: %w", taskID, err) + } + + return nil + }); err != nil { + return fmt.Errorf("resetting log signal: %w", err) + } + + // TODO run + return nil +} diff --git a/master/internal/trial.go b/master/internal/trial.go index 90711bd73d5..552dea6a291 100644 --- a/master/internal/trial.go +++ b/master/internal/trial.go @@ -676,6 +676,13 @@ func (t *trial) handleAllocationExit(exit *task.AllocationExited) error { } } + if err := logpattern.ClearSignal(context.TODO(), t.taskID); err != nil { + return t.transition(model.StateWithReason{ + State: model.ErrorState, + InformationalReason: err.Error(), + }) + } + case exit.UserRequestedStop: return t.transition(model.StateWithReason{ State: model.CompletedState, diff --git a/master/internal/trial_intg_test.go b/master/internal/trial_intg_test.go index 5681a1065ab..ad6ff20e049 100644 --- a/master/internal/trial_intg_test.go +++ b/master/internal/trial_intg_test.go @@ -12,6 +12,7 @@ import ( "time" "github.com/determined-ai/determined/master/pkg/ptrs" + "github.com/uptrace/bun" "github.com/determined-ai/determined/master/pkg/schemas" "github.com/determined-ai/determined/master/pkg/schemas/expconf" @@ -19,6 +20,7 @@ import ( "github.com/stretchr/testify/mock" "github.com/stretchr/testify/require" + "github.com/determined-ai/determined/master/internal/db" internaldb "github.com/determined-ai/determined/master/internal/db" "github.com/determined-ai/determined/master/internal/experiment" "github.com/determined-ai/determined/master/internal/mocks/allocationmocks" @@ -185,3 +187,80 @@ func setup(t *testing.T) ( require.NoError(t, err) return a.m.db, rID, tr, &as, done } + +func TestClearLogSignalAfterTrialRestarts(t *testing.T) { + _, rID, tr, _, done := setup(t) + // Pre-scheduled stage. + require.NoError(t, tr.PatchState( + model.StateWithReason{State: model.ActiveState})) + require.NoError(t, tr.PatchSearcherState(experiment.TrialSearcherState{ + Create: searcher.Create{RequestID: rID}, + Op: searcher.ValidateAfter{ + RequestID: rID, + Length: 10, + }, + Complete: false, + Closed: true, + })) + + signal := "Test signal" + _, err := internaldb.Bun().NewUpdate().Model(&model.Task{}). + Table("run_id_task_id"). + Set("log_signal = ?", signal). + Where("run_id_task_id.run_id = ?", tr.id). + Where("task.task_id = run_id_task_id.task_id"). + Exec(context.Background()) + require.NoError(t, err) + + _, err = internaldb.Bun().NewUpdate().Model(&model.Run{}). + Set("log_signal = ?", signal). + Where("id = ?", tr.id). + Exec(context.Background()) + require.NoError(t, err) + + // Restart + tr.AllocationExitedCallback(&task.AllocationExited{Err: fmt.Errorf("bad stuff went down")}) + + // Verity log signal is reset after restart + runsOut := struct { + bun.BaseModel `bun:"table:runs"` + LogSignal *string `db:"log_signal"` + }{} + + err = db.Bun().NewSelect().Model(&runsOut). + Where("id = ?", tr.id). + Scan(context.Background()) + require.NoError(t, err) + require.NotNil(t, runsOut) + require.Nil(t, runsOut.LogSignal) + + tasksOut := struct { + bun.BaseModel `bun:"table:tasks"` + LogSignal *string `db:"log_signal"` + }{} + err = db.Bun().NewSelect().Model(&tasksOut). + Join("LEFT JOIN run_id_task_id AS rt on tasks.task_id = rt.task_id"). + Where("run_id = ?", tr.id). + Scan(context.Background()) + require.NoError(t, err) + require.NotNil(t, tasksOut) + require.Nil(t, tasksOut.LogSignal) + + // Running and Terminating stage. + require.NoError(t, tr.PatchSearcherState(experiment.TrialSearcherState{ + Create: searcher.Create{RequestID: rID}, + Op: searcher.ValidateAfter{ + RequestID: rID, + Length: 10, + }, + Complete: true, + Closed: true, + })) + tr.AllocationExitedCallback(&task.AllocationExited{}) + select { + case <-done: // success + case <-time.After(5 * time.Second): + require.Error(t, fmt.Errorf("timed out waiting for trial to terminate")) + } + require.True(t, model.TerminalStates[tr.state]) +} diff --git a/master/pkg/model/experiment.go b/master/pkg/model/experiment.go index dc88fa1d8ce..a14ef5f9a9e 100644 --- a/master/pkg/model/experiment.go +++ b/master/pkg/model/experiment.go @@ -522,6 +522,7 @@ type Run struct { LogRetentionDays *int16 `db:"log_retention_days"` Metadata map[string]any `db:"metadata" bun:"metadata,scanonly"` LocalID int `db:"local_id"` + LogSignal *string `db:"log_signal"` } // RunTaskID represents a row from the `run_id_task_id` table. diff --git a/master/pkg/model/task.go b/master/pkg/model/task.go index 8fb37c232d1..5a87070a4d2 100644 --- a/master/pkg/model/task.go +++ b/master/pkg/model/task.go @@ -91,7 +91,8 @@ type Task struct { Config *string `db:"config"` - NoPause *bool `db:"no_pause"` + NoPause *bool `db:"no_pause"` + LogSignal *string `db:"log_signal"` } // AllocationID is the ID of an allocation of a task. It is usually of the form diff --git a/master/pkg/model/task_container_defaults.go b/master/pkg/model/task_container_defaults.go index 4eeedd4258c..2201e3e28f4 100644 --- a/master/pkg/model/task_container_defaults.go +++ b/master/pkg/model/task_container_defaults.go @@ -49,7 +49,7 @@ type TaskContainerDefaultsConfig struct { StartupHook string `json:"startup_hook"` - LogPolicies expconf.LogPoliciesConfig `json:"log_policies"` + LogPolicies *expconf.LogPoliciesConfig `json:"log_policies"` PreemptionTimeout int `json:"preemption_timeout,omitempty"` diff --git a/master/pkg/schemas/defaults.go b/master/pkg/schemas/defaults.go index f93113ac388..93a4b920e4c 100644 --- a/master/pkg/schemas/defaults.go +++ b/master/pkg/schemas/defaults.go @@ -29,6 +29,12 @@ func defaultIfDefaultable(obj reflect.Value) (reflect.Value, bool) { if !ok { return out, false } + if obj.Type().Name() == "LogPoliciesConfigV0" { + fmt.Printf("\n\n found type: %#v\n\n", obj.Type().Name()) + } + if obj.Type().Name() == "*LogPoliciesConfigV0" { + fmt.Printf("\n\n found type: %#v\n\n", obj.Type().Name()) + } // Verify the signature matches our Defaultable psuedointerface: // - one input (the receiver), and one output diff --git a/master/pkg/schemas/expconf/const.go b/master/pkg/schemas/expconf/const.go index 77b73cd0132..8d9befa8c4a 100644 --- a/master/pkg/schemas/expconf/const.go +++ b/master/pkg/schemas/expconf/const.go @@ -12,3 +12,10 @@ const ( CUDAImage = "determinedai/pytorch-ngc-dev:0736b6d" ROCMImage = "determinedai/environments:rocm-5.6-pytorch-1.3-tf-2.10-rocm-mpich-0736b6d" ) + +const ( + ECCErrorPattern = ".*uncorrectable ECC error encountered.*" + ECCErrorSignal = "ECC Error" + CUDAOOMPattern = ".*CUDA out of memory.*" + CUDAOOMSignal = "CUDA OOM" +) diff --git a/master/pkg/schemas/expconf/experiment_config.go b/master/pkg/schemas/expconf/experiment_config.go index b2701d846e4..8ea181b86c4 100644 --- a/master/pkg/schemas/expconf/experiment_config.go +++ b/master/pkg/schemas/expconf/experiment_config.go @@ -29,7 +29,7 @@ type ExperimentConfigV0 struct { RawEnvironment *EnvironmentConfigV0 `json:"environment"` RawHyperparameters HyperparametersV0 `json:"hyperparameters"` RawLabels LabelsV0 `json:"labels"` - RawLogPolicies LogPoliciesConfigV0 `json:"log_policies"` + RawLogPolicies *LogPoliciesConfigV0 `json:"log_policies"` RawRetentionPolicy *RetentionPolicyConfigV0 `json:"retention_policy,omitempty"` RawMaxRestarts *int `json:"max_restarts"` RawMinCheckpointPeriod *LengthV0 `json:"min_checkpoint_period"` diff --git a/master/pkg/schemas/expconf/log_pattern_config.go b/master/pkg/schemas/expconf/log_pattern_config.go index 7692db460a1..3b03fa8466d 100644 --- a/master/pkg/schemas/expconf/log_pattern_config.go +++ b/master/pkg/schemas/expconf/log_pattern_config.go @@ -13,13 +13,29 @@ import ( // LogPoliciesConfigV0 is a list of log policies. type LogPoliciesConfigV0 []LogPolicyV0 +// WithDefaults implements the Defaultable psuedointerface. +func (l *LogPoliciesConfigV0) WithDefaults() *LogPoliciesConfigV0 { + eccErrorPattern := ECCErrorPattern + eccErrorSignal := ECCErrorSignal + cudaOomPattern := CUDAOOMPattern + cudaOomSignal := CUDAOOMSignal + + if l != nil && len(*l) == 0 { + return &LogPoliciesConfigV0{ + LogPolicyV0{RawPattern: eccErrorPattern, RawSignal: &eccErrorSignal}, + LogPolicyV0{RawPattern: cudaOomPattern, RawSignal: &cudaOomSignal}, + } + } + return l +} + // Merge implemenets the mergable interface. -func (b LogPoliciesConfigV0) Merge( - other LogPoliciesConfigV0, -) LogPoliciesConfigV0 { +func (b *LogPoliciesConfigV0) Merge( + other *LogPoliciesConfigV0, +) *LogPoliciesConfigV0 { var out LogPoliciesConfigV0 seen := make(map[string]bool) - for _, p := range append(other, b...) { + for _, p := range append(*other, *b...) { json, err := json.Marshal(p) if err != nil { log.Errorf("marshaling error %+v %v", p, err) @@ -31,7 +47,7 @@ func (b LogPoliciesConfigV0) Merge( out = append(out, p) } - return out + return &out } // LogPolicyV0 is an action to take if we match against trial logs. @@ -40,7 +56,8 @@ func (b LogPoliciesConfigV0) Merge( type LogPolicyV0 struct { RawPattern string `json:"pattern"` - RawAction LogActionV0 `json:"action"` + RawAction *LogActionV0 `json:"action,omitempty"` + RawSignal *string `json:"signal,omitempty"` } // LogActionV0 is a policy to take after matching. diff --git a/master/pkg/schemas/expconf/zgen_experiment_config_v0.go b/master/pkg/schemas/expconf/zgen_experiment_config_v0.go index f502fb04942..af2de47013c 100644 --- a/master/pkg/schemas/expconf/zgen_experiment_config_v0.go +++ b/master/pkg/schemas/expconf/zgen_experiment_config_v0.go @@ -109,11 +109,14 @@ func (e *ExperimentConfigV0) SetLabels(val LabelsV0) { } func (e ExperimentConfigV0) LogPolicies() LogPoliciesConfigV0 { - return e.RawLogPolicies + if e.RawLogPolicies == nil { + panic("You must call WithDefaults on ExperimentConfigV0 before .LogPolicies") + } + return *e.RawLogPolicies } func (e *ExperimentConfigV0) SetLogPolicies(val LogPoliciesConfigV0) { - e.RawLogPolicies = val + e.RawLogPolicies = &val } func (e ExperimentConfigV0) RetentionPolicy() *RetentionPolicyConfigV0 { diff --git a/master/pkg/schemas/expconf/zgen_log_policy_v0.go b/master/pkg/schemas/expconf/zgen_log_policy_v0.go index 91aa8f2eb3f..8da43d74c33 100644 --- a/master/pkg/schemas/expconf/zgen_log_policy_v0.go +++ b/master/pkg/schemas/expconf/zgen_log_policy_v0.go @@ -16,14 +16,22 @@ func (l *LogPolicyV0) SetPattern(val string) { l.RawPattern = val } -func (l LogPolicyV0) Action() LogActionV0 { +func (l LogPolicyV0) Action() *LogActionV0 { return l.RawAction } -func (l *LogPolicyV0) SetAction(val LogActionV0) { +func (l *LogPolicyV0) SetAction(val *LogActionV0) { l.RawAction = val } +func (l LogPolicyV0) Signal() *string { + return l.RawSignal +} + +func (l *LogPolicyV0) SetSignal(val *string) { + l.RawSignal = val +} + func (l LogPolicyV0) ParsedSchema() interface{} { return schemas.ParsedLogPolicyV0() } diff --git a/master/pkg/schemas/schema.go b/master/pkg/schemas/schema.go index 4a6ff5f54c7..29b8dbea181 100644 --- a/master/pkg/schemas/schema.go +++ b/master/pkg/schemas/schema.go @@ -5,6 +5,7 @@ package schemas import ( "bytes" "encoding/json" + "fmt" "sync" "github.com/pkg/errors" @@ -135,6 +136,7 @@ func GetCompletenessValidator(url string) *jsonschema.Schema { compiler.Extensions["checks"] = extensions.ChecksExtension() compiler.Extensions["compareProperties"] = extensions.ComparePropertiesExtension() compiler.Extensions["optionalRef"] = extensions.OptionalRefExtension() + fmt.Printf("url: %#v\n", url) compiler.Extensions["eventuallyRequired"] = extensions.EventuallyRequiredExtension() compiler.Extensions["eventually"] = extensions.EventuallyExtension() @@ -142,6 +144,7 @@ func GetCompletenessValidator(url string) *jsonschema.Schema { if err != nil { panic("uncompilable schema: " + url) } + fmt.Printf("after Compiple url\n") completenessValidators[url] = validator diff --git a/master/pkg/schemas/zgen_schemas.go b/master/pkg/schemas/zgen_schemas.go index f90c94923c8..36474bc91fc 100644 --- a/master/pkg/schemas/zgen_schemas.go +++ b/master/pkg/schemas/zgen_schemas.go @@ -1570,8 +1570,6 @@ var ( "title": "LogPolicy", "additionalProperties": false, "required": [ - "pattern", - "action" ], "type": "object", "properties": { @@ -1582,9 +1580,16 @@ var ( }, "action": { "type": [ - "object" + "object", + "null" ], - "$ref": "http://determined.ai/schemas/expconf/v0/log-action.json" + "$ref": "http://determined.ai/schemas/expconf/v0/log-action.json", + "default": null + }, + "signal": { + "type": [ + "string" + ] } } } diff --git a/master/static/migrations/20240916170654_experiment-add-log-signal-labels.tx.up.sql b/master/static/migrations/20240916170654_experiment-add-log-signal-labels.tx.up.sql new file mode 100644 index 00000000000..7112682ef4e --- /dev/null +++ b/master/static/migrations/20240916170654_experiment-add-log-signal-labels.tx.up.sql @@ -0,0 +1,2 @@ +ALTER table public.tasks + ADD COLUMN log_signal text; \ No newline at end of file diff --git a/proto/buf.image.bin b/proto/buf.image.bin index 1321740333ca95e57ff941bd12c6931b3863511d..7110db10061d3296ad615fdfcb33a09741ea200c 100644 GIT binary patch delta 7231 zcmb7|dvH|OeaG)P_v-BKT_mwr570_ND_%7M2_Y{S5+er$NCrjaVT_Fdv4}+o30Bqv zNf;~{NFZ*^D>cUsq#@1N7&3MqK$J|Ul+sSpPDyLG8IMb5TxW*1e#EXrJ82r~wBO%( zNOD4I^$*SHd+s^s_ub#;-t+kV_=9_X_2E53huLL5w~2r5(D1w_e#Ye|{NcY{!?$?T zruu$IrnR}dt#wbTWAfw#yC>4X*GAB`PBk56Os;&G59Ke-_M^?CU8kOL{UZ6=b0SY( z>*Heh?T7hd-~W$IekjOtoWJ7osb_h<-1?4}ke|-UiVp8d@plW@RQYHhFG+fV_Z;KC zpY3OPoEK)1jK-{FB!dzwU6N(xN50(0@#o^t8~$=%)(=m*H`;vp?OIVXJQNXiF1s@P zmlMRRZsOxVX_ER$;W0mHcxjO+Ve)TcqB!?Ae%0{5#6*6C$>BS3F_hadHJ+Vl%(NzS zcB*`T0t_===mqXR*f%N*3hs?!P%v+l3sqJ=e5FD>n#WJ98J<`x_PU{y+|T`NX0QDD z3Nce&S|VN&HK&%$^($p+si>6CEfpE|MH#LWJ4Ml{C53)IwdczZc8Uq|T%Bl%9ASQJ zS-K4* z`n-3A=wY9i@2(JScYS`aUUVuSYt!AmyML#T^IvQb-2c~w`n5QuUW=q`emWG9FCTWZ zhu>N${!Xwp@>^@fs>E_nG0zH}3I|>thivpcFN-^Y+rUfX56EMWieh-1r zok;KQ>={V3?MU=?x9w?7>}c!gNp~4e7za)bh7%MxHAE*WGEm^u5SwfjI5iZP_(>HH zk5dynWQNFcS)KoN%^ylc`7@lJKbM$X4pXw)6 ze~2de+Imq^o5fvtY#QY^j{q+ zvrQZ_p_hD&7+6pUnK7`S5HbT;afFNnUbl0A#d8vBSxJw@ z6~U-(y!rIfMBt!@@6`?>%7Kzia1bE}{h-B`C|qsZX@wdeFN_7Re&ZVosp>bppx~h2 z@PdNxe)FwHD2w~#*EZl=-Dkj{jT#LYDD>ZFz(fele#ssYQ}5qD5{ku*ZeDyX-f1jAqs;k|ocE}hl|0JA zG{*I!p+J0r@jqampNlD?M8r0%M)W8}KgIxMDKVLz{)BIb0%HvJXAIK&GbHxsV+=IR zQ@A|FfWt_*JjRfr`6?bBo{(ENiFx-%Im5sc3|MqeV1Wb|BQfv<1DJ9(kO$r=858y>8M}TzJLV`r}vlbF0qMs#5a}Yf{5a*cl4f&t!SQ*BJl0iJq;Zn})TBo=wqTBuLBj4wt=2M8`u41Cc0<)TAJhs243MNLYQ* zl7fWQ7cHr5%Hm5z>b}aCEF&~ht7Qaje_5E?ZWR@N(I*f`pgL#A-RbXR}zA{2s#+ORREX;ropL ziLsEYESLsbqRS<8*Y)Rx|Ka2Ms1E{o-2%i@ zgt~44LSmllRi% zp#c_dSPPKgcEfUmgoPWH8zd~;Aa3=r5DUbs%=w8tuvM%`zG^MRVBs~!|G`*@DGR27 z&Jr=Q@H%lDbBY|#H->?*ijkTcB(n9or3MMBuUl%6u=+Y>Yl#Yqhi}NODN$GXh6NQ< zpx&^cATi_(3o1sSZfa2YAE9rKh9*!qEhs$2>^ChaNX&DSLeo|i1maER{IjgyCMv7n zwA)31Ed3MXKQ&njRF+HwfeI)~x6C^Dr`xa&-m=m02#ju7Mv#bp%QAvQ^jpMeKBDIZ z;%(-9AiWlmocp%@yz>zK9md}?(eqSv)35;1^C9O_xhi|&zI%j3Fpa` zJH#Kc+wz?q;>yRapKrE`z2W1`&!cm@9BLQsk&z3gytPyOdh9jxXKkV%yRTm;|EFIR z%H!=~(_OENzuF}RLVw8o$?Nx|P7jEJ-`z9XA)W}IV1E8;tgLddU3A=a!uW2{=bdDJ zft=VH`9G$cxkr36@|~Ib{&`u(QttfWyo{K_SVGoxiY8fbE-T2Pvl2r--6Ecd*U598 zV#au`m>7a%YZ2B4eci{};1-!T3LtQN`x) z3Pq31MH336ip@on(c!g}E}C;N3X6eTIx>nTEH&3nJj19`bKPVl%Jg*;|HE(Dh+n#H zmYIVuo?(tMbMS?tVP-YKnl#O+kY!z>%AFQ2k^jBHE6w>da>glVvv@_kN}k+?TE<&F zB3r)FCB(v6CJ1`q4l*lRs?q?3AhV{-unIwD-80KC!CQ_OwbHpy{;W%sR8`{OM)wUD z_l+uEt?wHyeSKQ4L2Q>|*MQeD;#D?%JhwwR)Z$I5#ILR~lrRYl)fh@pFjNCd^l`hC z)TBJuEf!TIO{URCjplLzh5ku%xp0Zk0+nf5)`J`@Fgd^jgj`?}4TUKdm_$QiiUmkC z9bd8DFLEA`n|s7ucM%f({C1OQkA7_J#}b{lu-43vhsbfQ$uSh>uQfS_!u++!@f6ID z4P>d)C};PIDOF2x>Y^NHA;+rNLhI|8+6x^`Rqdr7IbN=EJox{((&_R`y|JjvFZCwt zy8KdavJM4{^~gG1WV0wc58?_w`?s&)@|V40dc}i=1$t6f!vYGkJZM;C5sOu}$b$XQ zT{Yr_V5~AQ&<}I0GBBVp$11>}(<=76Cg&k}ZZB5qCVX4AYV!Mhjea3@)kI197*Guf z+;tyglwFHP1Yc)@LxI#fLkbGP*BMe!2)+)as0aWk8=P&jvro*f+F*iHvE&iHQ3uz> z5+&CmIO-UIyBVZbC{nvpT`fD?_I~oolol?Gg~!dtBNP&}*`R`g$IS*66g+M=%NU9t zf!i!oo5Y-?o`;sQW`jo;NSX~ET_9;TcvOKTr3)mfk+=jVWx(hHNy>oH1(K8jgH<5y zbjW|}2beT=Ljps!ksW-e&W)~(C`n*cZKVA-z@QO7y4?ha0+@CK1`5Bd-GG6@k8TGT zIs&6ql5zIQKN}F0Rhe)?1*ft|C-2h1by-Bo^$3oFMd0=VOoIX=7wr?#@$4-a2Y9AW@N%ag8=9AY42BzBEM3}_l)0X32%%sC~?_KW)D z5qtfn8p%<{pEhSWT_Z6Kw2!D7$ukUu^v1ty+%*ym8e1cI#@-4rBMxEDFz_LlkZ}17 z1E4w;4-b#yE*GCKf4W6Xk=F)Au3U5gw>h+qEtVX&mjskjFvD?sNkD-l@J@~`mYf`Q z0$w;50hkVrPZScwjKn-A>5@PT9Ev5+GUu%P#{;5z?z8roPQ{Ybj6Y}crHdt|foQ2> z$r(jUc0MV@k~22Au9cjzuyn2DjD@9ZC1(iMN(4u(3YcvmYS}Yyg<~Jt9VG{ zdHIz=QD1r9V$$`J^A?k?mz=klRK4Vas+TOjFdCEqU9f<3z2t%gr0XRY2v7}jgnG%# z%(*NdAHt>NWqT>1GRZ59f6Jssmq|`%d1wNqET(gzR}iKu92A2@qs`vdNYPXt3012;mun2_Xwc78AgL zqmBba?9ozSgmzG%<5EdxY@J~kI<;d5DcULHP>0$nF0Dnaov~JHzu#Gs=uF4ulE1U3e{v;1?D8)n{TEm9yS##Y-*0MZsBLd- z*j(3?l@(*-BC8{4S)&@{ch>S<{co@3clhkj{kPQdDVZ!=Hh1#8F`>YFlJT&g>W4he zGa121hJ`M{%&doA>E!rx@#pow(i!^IL3XNtZ$#9&?CJh@Q^Y0r8w5svgFyezY?006 z-_u2Ie|{i_M@b!j*C#$$fLkTIC}V5Eon z=?fE0i4K{*L~KZl_Qq5Ep@CjpC|V=E%n#yi_4hQ&-^RtwvSopoouAhm&-O=NX8xA8 zmQ9KJ4x}9t`}iW!mVAi$!>c#f{a+KGSS&i&ap^A+jb)|1@e+Uhe@StfMceOf+)~@T zwW*`Aqph*7N$y)BwnR=~L8}w(TbtE_{@)lUJ8l!)KQcqxxtGgTk0#BLmwMdP{#TcY z*9BW6pIjxD$Ch~Np$aASh68T_2Q2eFFT|a|UB>fsM#=PQkt+*U3$a+cjV_P+1`xbo zmYk;CMjyOimY!u1ykC}^=a;LH=vu)G%Xu%CrdMS7+z=#Xk2$;H`D;l%em6;^MX8 zEI-+IJ&_c6xAS#cVWAXOumbS~DO@Ax)r(2pBnB`N))<~Z0w_60WdcIpn%q$qA#cr? zv3?fvBE@=V6U#}7(F+p8YtxR`(nt0r%vm2EshA{TSRJp|Op+861xehb2vJGE?3>8! zQSy~)5uQzK5{69@cc5Crm<)u-31c!4*d(CZREkA*G{{5MVp?&7@d$0y7!DMBAdGJq zk*PdN;xM+6Jj#>NwL(-h4y5Er*=UTyFob9{Mgbv2BT|+?06ex$PMci0R+JPs8R5fN zZZmJu%MGjLDoDbIX|=6`!UJQettKxJgl{#%10ip#5grJ6TOoW7@KcYfH=K_5{{LQuz3c?PlQ| z>^J~Pf!M=%5A*yeo`0W|u?;iflMkP~eR6KSn6?1FJpcw)^g#r~>$Z= z;`_Ld!6$k#lLPSxbDof!*NO4vkJ#+V$o?qfkD2VrD!XZ*2RWItA7F51jB;j`+WhZR z)=(1kgAoR3Mu7NYA7C)Y^n%d=26qeuqXP_5O;fSa*(-lvBPN$8(;n#cGPtBi!X-f1 zWFRK?GWb-aCi1{L#K@>DS+!eu@>rJ;^9~KNfLj;1qicd=A@83N3}Wl4JnRV5`Zx0s5J)= zB#v5h072p?nKJ<-QUh_EIX{&j-XTiLkF#)|%AShsCm8>U$)2jRn+6gvm9jrW=G?5z zS+iAtn+NT|2yns>Z4V%XdB)lU2u{yfdjP@d8Ea1}sdkF&nNfPm8iPh^wZ;Hq$|-A1 zDqbM(%%Cy;yfcHT$&)kI6O2ThGu9J8#5qH$Cqs^5fq0%d{qW?#O`$A#_D*>6JPYS5 zRfa*87a0GUQDvA?#Wav7!$_4r<%ul2ONe-%<$xYo(q}CJ1cyFr2_QK1ktK`3AsUF6 zne$7$$TMX00Wn1`s}p`j6f<6C*k0-Bh=R$_8Gpqvi7F^diRv>klBJT_6Gzp-oyKUS-bfa^89|q3l(L9iHS!gB-tR{52y- znv%mbkTGc_$3<<->+506MVlNwFy^8)1`x?FT4Ml_{302%1j*9_ahW-PfH5=Wxo(`x zVa%}d%a%ntSiHgb?+lA{#lkd@De1)G$~R0|sa-%HG(qP8geF(4Dd+{QE7lZ1(7HmJ z#8q5$zQy>+oT>7KW7tXF-X+3v+6ECTMXTP{(v?SVS&sk_;VtV?I(c+;h)43+1|jBL z9i&4RU9}crI5J+f76BsIRiaZ0m4ZNAW6rzsoeg4Q*)_Y}2N36vjQ`1q6DV;^16dT1 zIM>OdS#z%2yl8|w*R4B%$a~$o1BkrW$(?D)n-Pc)nDdeBs~6>EAJ}&`19|_#_=hHM zhRSOirXz0#<-H-ZH;RhV8&)keBGC;N9)`y&L}QTX21^h8=}0s@5T7vTAF_L+m^S>?Es5FtNhqsQRX#=Q}N_>4Kb zxE!0nC+RbaNze5NkpCy+|1#u9DDtLZ26B#|oL@4YkuzQvY!sPtT0*4Xf^R*sMkh=| zuYLjmLEuZfCO{DQlGa4mjS=MNSMsk&U;35hj7Dm;oB=WAE8<)Z&Y3tYaA&Vvez%w! z-NVC$_?~!PCQ<5yYkM4%o%ddvoaD_=*;RKIt+y!N6uTz+x4 zILf|~ha1FMF|Rj1+rMS-Qkr_1`Rxfh2B>@KgZGI0*a`XhJ)$jRY;XJ~zv%1zxRb`@ zqmAOdp_3liB<}Z)F+WpY+6KMS8bwX4us5FP=V0>C>*vr(UH6J@-qXy_lJT7|_J5nS zr%CK!N94boMEkdA=xG+6?5O;rS#*5+;0HI0=OXWq*7wRjRVL`KY7ui88zuK|5tXql zebi<2xZt@2qyrU$Gj1-A<&2fjw~NA1a|^!K^1T&eRtnt?8J++)-`o!6bFH}J<(rc$ zx~R`w41sX!%r_T9Mkm)oeKEu&QHq_yEgYCgK?}|05JNGs&|D4~fg*L}or~W%@)lw9 z(-jN4A{Ln=FZvLp$Q*frXqwq-uxpKRCdjASL}~FD99`&$%W>Kt%g1R6xROA@Sy&?{ z2}8!qer94l%QMx{)1i)M#!RKo~#W+%#PBt3u0;-^f`pkdv&dFtVc$F)EDg zK!{NR+3DblFV!q3EhF&W6xUXRI;4F-s--ga|kmR;y^|^hoJK!s+iDzrwb?w;)t58D=16xypL1Q zyPSBR2;&H(>mI~yso{p22^PN8h^;FqOO4n-;IA!M-2I;@j=#No|u+r!cga|94KOIu>O{{X( zGF*)4o+meTirnHVyksN^RXkSlHChr~@lcTLQ5BDBWzSq)?Wi`nb+x0~*rTf*)y5tm zJjn1iS8w%3*71fErt%VEYwF^JGUqs9!k1i~~ z)=&V#@@tI?Kv;gQaRCSvYT*LCL8`=2C$H3q395<(jG1*tdR^?OGt%o~N1c&g6+7zH z!>@}S^#jRC`+B3jE_T!#?RBxE-e`{*DVrgqQrgBnIqm6gy zoVuu?AU&q2MFegqG%wMb`#VIk(Hvc%&}k?DVcDHVb0ARYG@1i}LZ{JO6*RUDY7UH< z+l=PAw6V=-u1g!+jOMDeu|sb-y0o!lAUlb^!-%d+8#|2Xy0ozaqSH~s4?^9}J~?Tp zD0jQVMe?=pi6VbCR;o%Adu6;^RG1RQUVC=bC5pWa`qJ(Ih$F~e27wL4cD|QE;oFb^ zC5nfcb4VsXAeML!;}$IaQ=y_LJ#DHEv7ZfmH!Ib5)10w%5dyvX$fb)A`|YtE5IOfV zC`vg2k#j$TqzhDFbRLvxU1Cw`L3>I=HG*30IUNvF4%%~iGM&>O|Jpen<%cktk$~XaD=p8hP Date: Sun, 29 Sep 2024 21:22:18 -0400 Subject: [PATCH 02/27] CI tests and lint --- e2e_tests/tests/cluster/test_log_policies.py | 18 ++++------- .../db/postgres_experiments_intg_test.go | 18 +++++++---- master/internal/trial_intg_test.go | 3 +- master/pkg/model/task_container_defaults.go | 5 +++- .../pkg/model/task_container_defaults_test.go | 30 +++++++++---------- .../pkg/schemas/expconf/log_pattern_config.go | 11 +++---- master/pkg/schemas/merge.go | 5 +++- master/pkg/schemas/zgen_schemas.go | 23 ++++++++++++-- schemas/expconf/v0/log-policy.json | 14 +++++---- 9 files changed, 79 insertions(+), 48 deletions(-) diff --git a/e2e_tests/tests/cluster/test_log_policies.py b/e2e_tests/tests/cluster/test_log_policies.py index 8305b1c7d01..37651db9811 100644 --- a/e2e_tests/tests/cluster/test_log_policies.py +++ b/e2e_tests/tests/cluster/test_log_policies.py @@ -6,6 +6,7 @@ from tests import experiment as exp from tests.experiment import noop from tests import detproc +from tests import experiment as exp from tests.cluster import utils @@ -177,13 +178,9 @@ def test_log_signal(should_match: bool) -> None: searchRes = utils.get_run_by_exp_id(sess, exp_ref.id) runSignal = searchRes.runs[0].logSignal - trialRes = bindings.get_GetTrial( - sess, - trialId = searchRes.runs[0].id - ) + trialRes = bindings.get_GetTrial(sess, trialId=searchRes.runs[0].id) trialSignal = trialRes.trial.logSignal - if should_match: assert runSignal == expected_signal assert trialSignal == expected_signal @@ -191,6 +188,7 @@ def test_log_signal(should_match: bool) -> None: assert runSignal is None assert trialSignal is None + @pytest.mark.e2e_cpu def test_signal_clear_after_exp_continue() -> None: sess = api_utils.user_session() @@ -208,10 +206,7 @@ def test_signal_clear_after_exp_continue() -> None: searchRes = utils.get_run_by_exp_id(sess, exp_ref.id) runSignal = searchRes.runs[0].logSignal - trialRes = bindings.get_GetTrial( - sess, - trialId = searchRes.runs[0].id - ) + trialRes = bindings.get_GetTrial(sess, trialId=searchRes.runs[0].id) trialSignal = trialRes.trial.logSignal assert runSignal == expected_signal @@ -224,10 +219,7 @@ def test_signal_clear_after_exp_continue() -> None: searchRes = utils.get_run_by_exp_id(sess, exp_ref.id) runSignal = searchRes.runs[0].logSignal - trialRes = bindings.get_GetTrial( - sess, - trialId = searchRes.runs[0].id - ) + trialRes = bindings.get_GetTrial(sess, trialId=searchRes.runs[0].id) trialSignal = trialRes.trial.logSignal assert runSignal == None diff --git a/master/internal/db/postgres_experiments_intg_test.go b/master/internal/db/postgres_experiments_intg_test.go index 48c573bb87b..af9c2f9f5dd 100644 --- a/master/internal/db/postgres_experiments_intg_test.go +++ b/master/internal/db/postgres_experiments_intg_test.go @@ -435,15 +435,23 @@ func TestActiveLogPatternPolicies(t *testing.T) { policies, err := ActiveLogPolicies(ctx, exp.ID) require.NoError(t, err) - require.Empty(t, policies) + require.NotEmpty(t, policies) + eccErrorSignal := "ECC Error" + cudaOOMSignal := "CUDA OOM" + expected := expconf.LogPoliciesConfig{ + expconf.LogPolicy{RawPattern: ".*uncorrectable ECC error encountered.*", RawSignal: &eccErrorSignal}, + expconf.LogPolicy{RawPattern: ".*CUDA out of memory.*", RawSignal: &cudaOOMSignal}, + } + + require.Equal(t, expected, policies) activeConfig, err := db.ActiveExperimentConfig(exp.ID) require.NoError(t, err) - activeConfig.RawLogPolicies = expconf.LogPoliciesConfig{ - expconf.LogPolicy{RawPattern: "sub", RawAction: expconf.LogAction{ + activeConfig.RawLogPolicies = &expconf.LogPoliciesConfig{ + expconf.LogPolicy{RawPattern: "sub", RawAction: &expconf.LogAction{ RawCancelRetries: &expconf.LogActionCancelRetries{}, }}, - expconf.LogPolicy{RawPattern: `\d{5}$`, RawAction: expconf.LogAction{ + expconf.LogPolicy{RawPattern: `\d{5}$`, RawAction: &expconf.LogAction{ RawExcludeNode: &expconf.LogActionExcludeNode{}, }}, } @@ -460,7 +468,7 @@ func TestActiveLogPatternPolicies(t *testing.T) { policies, err = ActiveLogPolicies(ctx, exp.ID) require.NoError(t, err) - require.Equal(t, activeConfig.RawLogPolicies, policies) + require.Equal(t, activeConfig.RawLogPolicies, &policies) } func TestGetNonTerminalExperimentCount(t *testing.T) { diff --git a/master/internal/trial_intg_test.go b/master/internal/trial_intg_test.go index ad6ff20e049..24ad7575da1 100644 --- a/master/internal/trial_intg_test.go +++ b/master/internal/trial_intg_test.go @@ -11,9 +11,10 @@ import ( "testing" "time" - "github.com/determined-ai/determined/master/pkg/ptrs" "github.com/uptrace/bun" + "github.com/determined-ai/determined/master/pkg/ptrs" + "github.com/determined-ai/determined/master/pkg/schemas" "github.com/determined-ai/determined/master/pkg/schemas/expconf" diff --git a/master/pkg/model/task_container_defaults.go b/master/pkg/model/task_container_defaults.go index 2201e3e28f4..a25a1a35950 100644 --- a/master/pkg/model/task_container_defaults.go +++ b/master/pkg/model/task_container_defaults.go @@ -164,7 +164,9 @@ func (c *TaskContainerDefaultsConfig) MergeIntoExpConfig(config *expconf.Experim c.Slurm.SbatchArgs(), configRawSlurmConfig.SbatchArgs()...) } + fmt.Printf("\nbefore merge, logp: %#v\n", config.RawLogPolicies) config.RawLogPolicies = schemas.Merge(config.RawLogPolicies, c.LogPolicies) + fmt.Printf("\nafter merge, logp: %#v\n", config.RawLogPolicies) configRawPbsConfig := config.RawPbsConfig config.RawPbsConfig = schemas.Merge(config.RawPbsConfig, &c.Pbs) @@ -323,7 +325,8 @@ func (c TaskContainerDefaultsConfig) Merge( if res.LogPolicies == nil { res.LogPolicies = other.LogPolicies } else { - res.LogPolicies = res.LogPolicies.Merge(other.LogPolicies) + logPolicies := res.LogPolicies.Merge(*other.LogPolicies) + res.LogPolicies = &logPolicies } } diff --git a/master/pkg/model/task_container_defaults_test.go b/master/pkg/model/task_container_defaults_test.go index 4039dec9af8..4f8a715538a 100644 --- a/master/pkg/model/task_container_defaults_test.go +++ b/master/pkg/model/task_container_defaults_test.go @@ -449,11 +449,11 @@ func TestLogPatternUnmarshal(t *testing.T) { ShmSizeBytes: 4294967296, NetworkMode: "bridge", PreemptionTimeout: DefaultPreemptionTimeout, - LogPolicies: expconf.LogPoliciesConfig{ - expconf.LogPolicy{RawPattern: "test", RawAction: expconf.LogAction{ + LogPolicies: &expconf.LogPoliciesConfig{ + expconf.LogPolicy{RawPattern: "test", RawAction: &expconf.LogAction{ RawExcludeNode: &expconf.LogActionExcludeNode{}, }}, - expconf.LogPolicy{RawPattern: "test2", RawAction: expconf.LogAction{ + expconf.LogPolicy{RawPattern: "test2", RawAction: &expconf.LogAction{ RawCancelRetries: &expconf.LogActionCancelRetries{}, }}, }, @@ -463,25 +463,25 @@ func TestLogPatternUnmarshal(t *testing.T) { func TestLogPatternPoliciesMerging(t *testing.T) { defaults := &TaskContainerDefaultsConfig{ - LogPolicies: expconf.LogPoliciesConfig{ - expconf.LogPolicy{RawPattern: "a", RawAction: expconf.LogAction{ + LogPolicies: &expconf.LogPoliciesConfig{ + expconf.LogPolicy{RawPattern: "a", RawAction: &expconf.LogAction{ RawCancelRetries: &expconf.LogActionCancelRetries{}, }}, - expconf.LogPolicy{RawPattern: "b", RawAction: expconf.LogAction{ + expconf.LogPolicy{RawPattern: "b", RawAction: &expconf.LogAction{ RawExcludeNode: &expconf.LogActionExcludeNode{}, }}, }, } conf := expconf.ExperimentConfig{ - RawLogPolicies: expconf.LogPoliciesConfig{ - expconf.LogPolicy{RawPattern: "b", RawAction: expconf.LogAction{ + RawLogPolicies: &expconf.LogPoliciesConfig{ + expconf.LogPolicy{RawPattern: "b", RawAction: &expconf.LogAction{ RawCancelRetries: &expconf.LogActionCancelRetries{}, }}, - expconf.LogPolicy{RawPattern: "b", RawAction: expconf.LogAction{ + expconf.LogPolicy{RawPattern: "b", RawAction: &expconf.LogAction{ RawExcludeNode: &expconf.LogActionExcludeNode{}, }}, - expconf.LogPolicy{RawPattern: "c", RawAction: expconf.LogAction{ + expconf.LogPolicy{RawPattern: "c", RawAction: &expconf.LogAction{ RawExcludeNode: &expconf.LogActionExcludeNode{}, }}, }, @@ -489,17 +489,17 @@ func TestLogPatternPoliciesMerging(t *testing.T) { defaults.MergeIntoExpConfig(&conf) - expected := expconf.LogPoliciesConfig{ - expconf.LogPolicy{RawPattern: "a", RawAction: expconf.LogAction{ + expected := &expconf.LogPoliciesConfig{ + expconf.LogPolicy{RawPattern: "a", RawAction: &expconf.LogAction{ RawCancelRetries: &expconf.LogActionCancelRetries{}, }}, - expconf.LogPolicy{RawPattern: "b", RawAction: expconf.LogAction{ + expconf.LogPolicy{RawPattern: "b", RawAction: &expconf.LogAction{ RawExcludeNode: &expconf.LogActionExcludeNode{}, }}, - expconf.LogPolicy{RawPattern: "b", RawAction: expconf.LogAction{ + expconf.LogPolicy{RawPattern: "b", RawAction: &expconf.LogAction{ RawCancelRetries: &expconf.LogActionCancelRetries{}, }}, - expconf.LogPolicy{RawPattern: "c", RawAction: expconf.LogAction{ + expconf.LogPolicy{RawPattern: "c", RawAction: &expconf.LogAction{ RawExcludeNode: &expconf.LogActionExcludeNode{}, }}, } diff --git a/master/pkg/schemas/expconf/log_pattern_config.go b/master/pkg/schemas/expconf/log_pattern_config.go index 3b03fa8466d..0efc37c9d32 100644 --- a/master/pkg/schemas/expconf/log_pattern_config.go +++ b/master/pkg/schemas/expconf/log_pattern_config.go @@ -30,16 +30,17 @@ func (l *LogPoliciesConfigV0) WithDefaults() *LogPoliciesConfigV0 { } // Merge implemenets the mergable interface. -func (b *LogPoliciesConfigV0) Merge( - other *LogPoliciesConfigV0, -) *LogPoliciesConfigV0 { +func (b LogPoliciesConfigV0) Merge( + other LogPoliciesConfigV0, +) LogPoliciesConfigV0 { var out LogPoliciesConfigV0 seen := make(map[string]bool) - for _, p := range append(*other, *b...) { + for _, p := range append(other, b...) { json, err := json.Marshal(p) if err != nil { log.Errorf("marshaling error %+v %v", p, err) } + fmt.Printf("\njson: %#v\n", json) if seen[string(json)] { continue } @@ -47,7 +48,7 @@ func (b *LogPoliciesConfigV0) Merge( out = append(out, p) } - return &out + return out } // LogPolicyV0 is an action to take if we match against trial logs. diff --git a/master/pkg/schemas/merge.go b/master/pkg/schemas/merge.go index 63c47ada1b2..143a7a07991 100644 --- a/master/pkg/schemas/merge.go +++ b/master/pkg/schemas/merge.go @@ -3,6 +3,7 @@ package schemas import ( "fmt" "reflect" + "runtime/debug" ) // mergeIfMergable checks if obj implements our Mergable psuedointerface and calls obj.Merge(src) @@ -92,7 +93,8 @@ func assertTypeMatch(obj reflect.Value, src reflect.Value) { // return type will also be the same. The return value will never share memory with src, so it is // safe to alter obj without affecting src after the fact. func merge(obj reflect.Value, src reflect.Value, name string) reflect.Value { - // fmt.Printf("merge(%T, %T, %v)\n", obj.Interface(), src.Interface(), name) + debug.PrintStack() + fmt.Printf("\nmerge(%T, %T, %v)\n", obj.Interface(), src.Interface(), name) assertTypeMatch(obj, src) // Always handle pointers first. @@ -125,6 +127,7 @@ func merge(obj reflect.Value, src reflect.Value, name string) reflect.Value { switch obj.Kind() { case reflect.Struct: // Recurse into each field of the struct. + fmt.Printf("\nit's a struct\n\n") out := reflect.New(obj.Type()).Elem() for i := 0; i < src.NumField(); i++ { structField := src.Type().Field(i) diff --git a/master/pkg/schemas/zgen_schemas.go b/master/pkg/schemas/zgen_schemas.go index 36474bc91fc..694b2524e8f 100644 --- a/master/pkg/schemas/zgen_schemas.go +++ b/master/pkg/schemas/zgen_schemas.go @@ -1570,6 +1570,7 @@ var ( "title": "LogPolicy", "additionalProperties": false, "required": [ + "pattern" ], "type": "object", "properties": { @@ -1583,12 +1584,30 @@ var ( "object", "null" ], - "$ref": "http://determined.ai/schemas/expconf/v0/log-action.json", + "optionalRef": "http://determined.ai/schemas/expconf/v0/log-action.json", "default": null }, "signal": { "type": [ - "string" + "string", + "null" + ], + "default": null + } + }, + "checks": { + "one of or both of action and signal must be set": { + "anyOf": [ + { + "required": [ + "action" + ] + }, + { + "required": [ + "signal" + ] + } ] } } diff --git a/schemas/expconf/v0/log-policy.json b/schemas/expconf/v0/log-policy.json index 37c9d1fada9..56631f43bfb 100644 --- a/schemas/expconf/v0/log-policy.json +++ b/schemas/expconf/v0/log-policy.json @@ -15,18 +15,22 @@ }, "action": { "type": [ - "object" + "object", + "null" ], - "$ref": "http://determined.ai/schemas/expconf/v0/log-action.json" + "optionalRef": "http://determined.ai/schemas/expconf/v0/log-action.json", + "default": null }, "signal": { "type": [ - "string" - ] + "string", + "null" + ], + "default": null } }, "checks": { - "neither action nor signal is set": { + "one of or both of action and signal must be set": { "anyOf": [ { "required": [ From 072746e520d614a2f762c2686af5a94289804ebf Mon Sep 17 00:00:00 2001 From: Jerry Gong Date: Sun, 29 Sep 2024 21:43:01 -0400 Subject: [PATCH 03/27] Fix failed CI tests --- master/internal/api_runs.go | 1 - master/internal/core_experiment.go | 14 -------------- master/internal/trial_intg_test.go | 5 ++--- master/pkg/model/task_container_defaults.go | 2 -- master/pkg/schemas/defaults.go | 6 ------ master/pkg/schemas/expconf/const.go | 1 + master/pkg/schemas/expconf/log_pattern_config.go | 7 +++---- master/pkg/schemas/merge.go | 5 +---- master/pkg/schemas/schema.go | 3 --- schemas/test_cases/v0/experiment.yaml | 6 +++++- 10 files changed, 12 insertions(+), 38 deletions(-) diff --git a/master/internal/api_runs.go b/master/internal/api_runs.go index a5ec14165e8..7f6322b2958 100644 --- a/master/internal/api_runs.go +++ b/master/internal/api_runs.go @@ -130,7 +130,6 @@ func (a *apiServer) SearchRuns( } resp.Pagination = pagination resp.Runs = runs - fmt.Printf("\n\n runs: %#v\n\n", runs) return resp, nil } diff --git a/master/internal/core_experiment.go b/master/internal/core_experiment.go index 94507f7d2fe..7ce08f6dc1f 100644 --- a/master/internal/core_experiment.go +++ b/master/internal/core_experiment.go @@ -291,9 +291,7 @@ func (m *Master) parseCreateExperiment(ctx context.Context, req *apiv1.CreateExp } defaulted := schemas.WithDefaults(config) - fmt.Printf("\n\ndefaulted: %#v\n\n", *defaulted.RawEnvironment.RawImage.RawCUDA) - fmt.Printf("\n\nconfig: %#v\n\n", config.RawEnvironment) resources := defaulted.Resources() p, err := getCreateExperimentsProject(m, req, owner, defaulted) @@ -320,23 +318,17 @@ func (m *Master) parseCreateExperiment(ctx context.Context, req *apiv1.CreateExp } } - fmt.Printf("\n\nbefore mering taskcontainerdefault: %#v\n\n", m.config.TaskContainerDefaults.Image) - taskContainerDefaults, err := m.rm.TaskContainerDefaults( poolName, m.config.TaskContainerDefaults, ) - fmt.Printf("\n\nafter mering taskcontainerdefault: %#v\n\n", taskContainerDefaults.Image) if err != nil { return nil, nil, config, nil, nil, errors.Wrapf(err, "error getting TaskContainerDefaults") } - fmt.Printf("\n\nbefore mering taskcontainerdefault with config: %#v\n\n", config.RawEnvironment) taskSpec.TaskContainerDefaults = taskContainerDefaults taskSpec.TaskContainerDefaults.MergeIntoExpConfig(&config) - fmt.Printf("\n\nafter mering taskcontainerdefault with config: %#v\n\n", config.RawEnvironment.RawImage) - // Merge log retention into the taskSpec. if config.RawRetentionPolicy != nil { taskSpec.LogRetentionDays = config.RawRetentionPolicy.RawLogRetentionDays @@ -364,15 +356,9 @@ func (m *Master) parseCreateExperiment(ctx context.Context, req *apiv1.CreateExp config.RawResources.RawPriority = &prio } - userLogPolicyConfig := config.RawLogPolicies // Lastly, apply any json-schema-defined defaults. - fmt.Printf("\n\nbefore last withdefaults config: %#v\n\n", config.RawEnvironment.RawImage) - fmt.Printf("\n\nbefore last withdefaults config log: %#v\n\n", userLogPolicyConfig) config = schemas.WithDefaults(config) - fmt.Printf("\n\nafter last withdefaults config: %#v\n\n", *config.RawEnvironment.RawImage.RawCUDA) - fmt.Printf("\n\nafter last withdefaults config log: %#v\n\n", config.RawLogPolicies) - // Make sure the experiment config has all eventuallyRequired fields. if err = schemas.IsComplete(config); err != nil { return nil, nil, config, nil, nil, errors.Wrap(err, "invalid experiment configuration") diff --git a/master/internal/trial_intg_test.go b/master/internal/trial_intg_test.go index 24ad7575da1..d7000f3a22a 100644 --- a/master/internal/trial_intg_test.go +++ b/master/internal/trial_intg_test.go @@ -21,7 +21,6 @@ import ( "github.com/stretchr/testify/mock" "github.com/stretchr/testify/require" - "github.com/determined-ai/determined/master/internal/db" internaldb "github.com/determined-ai/determined/master/internal/db" "github.com/determined-ai/determined/master/internal/experiment" "github.com/determined-ai/determined/master/internal/mocks/allocationmocks" @@ -228,7 +227,7 @@ func TestClearLogSignalAfterTrialRestarts(t *testing.T) { LogSignal *string `db:"log_signal"` }{} - err = db.Bun().NewSelect().Model(&runsOut). + err = internaldb.Bun().NewSelect().Model(&runsOut). Where("id = ?", tr.id). Scan(context.Background()) require.NoError(t, err) @@ -239,7 +238,7 @@ func TestClearLogSignalAfterTrialRestarts(t *testing.T) { bun.BaseModel `bun:"table:tasks"` LogSignal *string `db:"log_signal"` }{} - err = db.Bun().NewSelect().Model(&tasksOut). + err = internaldb.Bun().NewSelect().Model(&tasksOut). Join("LEFT JOIN run_id_task_id AS rt on tasks.task_id = rt.task_id"). Where("run_id = ?", tr.id). Scan(context.Background()) diff --git a/master/pkg/model/task_container_defaults.go b/master/pkg/model/task_container_defaults.go index a25a1a35950..9c44bab5f6a 100644 --- a/master/pkg/model/task_container_defaults.go +++ b/master/pkg/model/task_container_defaults.go @@ -164,9 +164,7 @@ func (c *TaskContainerDefaultsConfig) MergeIntoExpConfig(config *expconf.Experim c.Slurm.SbatchArgs(), configRawSlurmConfig.SbatchArgs()...) } - fmt.Printf("\nbefore merge, logp: %#v\n", config.RawLogPolicies) config.RawLogPolicies = schemas.Merge(config.RawLogPolicies, c.LogPolicies) - fmt.Printf("\nafter merge, logp: %#v\n", config.RawLogPolicies) configRawPbsConfig := config.RawPbsConfig config.RawPbsConfig = schemas.Merge(config.RawPbsConfig, &c.Pbs) diff --git a/master/pkg/schemas/defaults.go b/master/pkg/schemas/defaults.go index 93a4b920e4c..f93113ac388 100644 --- a/master/pkg/schemas/defaults.go +++ b/master/pkg/schemas/defaults.go @@ -29,12 +29,6 @@ func defaultIfDefaultable(obj reflect.Value) (reflect.Value, bool) { if !ok { return out, false } - if obj.Type().Name() == "LogPoliciesConfigV0" { - fmt.Printf("\n\n found type: %#v\n\n", obj.Type().Name()) - } - if obj.Type().Name() == "*LogPoliciesConfigV0" { - fmt.Printf("\n\n found type: %#v\n\n", obj.Type().Name()) - } // Verify the signature matches our Defaultable psuedointerface: // - one input (the receiver), and one output diff --git a/master/pkg/schemas/expconf/const.go b/master/pkg/schemas/expconf/const.go index 8d9befa8c4a..e2ea45cd140 100644 --- a/master/pkg/schemas/expconf/const.go +++ b/master/pkg/schemas/expconf/const.go @@ -13,6 +13,7 @@ const ( ROCMImage = "determinedai/environments:rocm-5.6-pytorch-1.3-tf-2.10-rocm-mpich-0736b6d" ) +// Default log policy values. const ( ECCErrorPattern = ".*uncorrectable ECC error encountered.*" ECCErrorSignal = "ECC Error" diff --git a/master/pkg/schemas/expconf/log_pattern_config.go b/master/pkg/schemas/expconf/log_pattern_config.go index 0efc37c9d32..7f4e8dc7414 100644 --- a/master/pkg/schemas/expconf/log_pattern_config.go +++ b/master/pkg/schemas/expconf/log_pattern_config.go @@ -14,19 +14,19 @@ import ( type LogPoliciesConfigV0 []LogPolicyV0 // WithDefaults implements the Defaultable psuedointerface. -func (l *LogPoliciesConfigV0) WithDefaults() *LogPoliciesConfigV0 { +func (b *LogPoliciesConfigV0) WithDefaults() *LogPoliciesConfigV0 { eccErrorPattern := ECCErrorPattern eccErrorSignal := ECCErrorSignal cudaOomPattern := CUDAOOMPattern cudaOomSignal := CUDAOOMSignal - if l != nil && len(*l) == 0 { + if b != nil && len(*b) == 0 { return &LogPoliciesConfigV0{ LogPolicyV0{RawPattern: eccErrorPattern, RawSignal: &eccErrorSignal}, LogPolicyV0{RawPattern: cudaOomPattern, RawSignal: &cudaOomSignal}, } } - return l + return b } // Merge implemenets the mergable interface. @@ -40,7 +40,6 @@ func (b LogPoliciesConfigV0) Merge( if err != nil { log.Errorf("marshaling error %+v %v", p, err) } - fmt.Printf("\njson: %#v\n", json) if seen[string(json)] { continue } diff --git a/master/pkg/schemas/merge.go b/master/pkg/schemas/merge.go index 143a7a07991..63c47ada1b2 100644 --- a/master/pkg/schemas/merge.go +++ b/master/pkg/schemas/merge.go @@ -3,7 +3,6 @@ package schemas import ( "fmt" "reflect" - "runtime/debug" ) // mergeIfMergable checks if obj implements our Mergable psuedointerface and calls obj.Merge(src) @@ -93,8 +92,7 @@ func assertTypeMatch(obj reflect.Value, src reflect.Value) { // return type will also be the same. The return value will never share memory with src, so it is // safe to alter obj without affecting src after the fact. func merge(obj reflect.Value, src reflect.Value, name string) reflect.Value { - debug.PrintStack() - fmt.Printf("\nmerge(%T, %T, %v)\n", obj.Interface(), src.Interface(), name) + // fmt.Printf("merge(%T, %T, %v)\n", obj.Interface(), src.Interface(), name) assertTypeMatch(obj, src) // Always handle pointers first. @@ -127,7 +125,6 @@ func merge(obj reflect.Value, src reflect.Value, name string) reflect.Value { switch obj.Kind() { case reflect.Struct: // Recurse into each field of the struct. - fmt.Printf("\nit's a struct\n\n") out := reflect.New(obj.Type()).Elem() for i := 0; i < src.NumField(); i++ { structField := src.Type().Field(i) diff --git a/master/pkg/schemas/schema.go b/master/pkg/schemas/schema.go index 29b8dbea181..4a6ff5f54c7 100644 --- a/master/pkg/schemas/schema.go +++ b/master/pkg/schemas/schema.go @@ -5,7 +5,6 @@ package schemas import ( "bytes" "encoding/json" - "fmt" "sync" "github.com/pkg/errors" @@ -136,7 +135,6 @@ func GetCompletenessValidator(url string) *jsonschema.Schema { compiler.Extensions["checks"] = extensions.ChecksExtension() compiler.Extensions["compareProperties"] = extensions.ComparePropertiesExtension() compiler.Extensions["optionalRef"] = extensions.OptionalRefExtension() - fmt.Printf("url: %#v\n", url) compiler.Extensions["eventuallyRequired"] = extensions.EventuallyRequiredExtension() compiler.Extensions["eventually"] = extensions.EventuallyExtension() @@ -144,7 +142,6 @@ func GetCompletenessValidator(url string) *jsonschema.Schema { if err != nil { panic("uncompilable schema: " + url) } - fmt.Printf("after Compiple url\n") completenessValidators[url] = validator diff --git a/schemas/test_cases/v0/experiment.yaml b/schemas/test_cases/v0/experiment.yaml index 62c20fe1fd1..8311c007e4f 100644 --- a/schemas/test_cases/v0/experiment.yaml +++ b/schemas/test_cases/v0/experiment.yaml @@ -93,7 +93,11 @@ # pre-0.15.6 non-Native-API experiments emitted `internal: null` configs internal: null labels: [] - log_policies: [] + log_policies: + - pattern: .*uncorrectable ECC error encountered.* + signal: ECC Error + - pattern: .*CUDA out of memory.* + signal: CUDA OOM max_restarts: 5 min_validation_period: batches: 0 From f2417f40e22d04a0831dc2327bf44b46d8d78dd5 Mon Sep 17 00:00:00 2001 From: Jerry Gong Date: Sun, 29 Sep 2024 22:04:57 -0400 Subject: [PATCH 04/27] Fix failed tests in CI --- e2e_tests/tests/cluster/test_log_policies.py | 4 ++-- master/internal/core_experiment.go | 1 - master/internal/logpattern/logpattern.go | 1 - schemas/test_cases/v0/experiment.yaml | 6 +++++- 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/e2e_tests/tests/cluster/test_log_policies.py b/e2e_tests/tests/cluster/test_log_policies.py index 37651db9811..8a7aa8da7fe 100644 --- a/e2e_tests/tests/cluster/test_log_policies.py +++ b/e2e_tests/tests/cluster/test_log_policies.py @@ -222,5 +222,5 @@ def test_signal_clear_after_exp_continue() -> None: trialRes = bindings.get_GetTrial(sess, trialId=searchRes.runs[0].id) trialSignal = trialRes.trial.logSignal - assert runSignal == None - assert trialSignal == None + assert runSignal is None + assert trialSignal is None diff --git a/master/internal/core_experiment.go b/master/internal/core_experiment.go index 7ce08f6dc1f..88161fccc94 100644 --- a/master/internal/core_experiment.go +++ b/master/internal/core_experiment.go @@ -291,7 +291,6 @@ func (m *Master) parseCreateExperiment(ctx context.Context, req *apiv1.CreateExp } defaulted := schemas.WithDefaults(config) - resources := defaulted.Resources() p, err := getCreateExperimentsProject(m, req, owner, defaulted) diff --git a/master/internal/logpattern/logpattern.go b/master/internal/logpattern/logpattern.go index 2a8de32c5d3..baf36886988 100644 --- a/master/internal/logpattern/logpattern.go +++ b/master/internal/logpattern/logpattern.go @@ -283,6 +283,5 @@ func ClearSignal(ctx context.Context, taskID model.TaskID) error { return fmt.Errorf("resetting log signal: %w", err) } - // TODO run return nil } diff --git a/schemas/test_cases/v0/experiment.yaml b/schemas/test_cases/v0/experiment.yaml index 8311c007e4f..e08fbbb7866 100644 --- a/schemas/test_cases/v0/experiment.yaml +++ b/schemas/test_cases/v0/experiment.yaml @@ -200,7 +200,11 @@ add_capabilities: [] drop_capabilities: [] hyperparameters: {} - log_policies: [] + log_policies: + - pattern: '*' + signal: '*' + - pattern: '*' + signal: '*' labels: [] max_restarts: 5 min_checkpoint_period: From 0df5c6b22117bb8b144d59b3809b47b41b40d93b Mon Sep 17 00:00:00 2001 From: Jerry Gong Date: Wed, 9 Oct 2024 20:41:04 -0400 Subject: [PATCH 05/27] update property action --- master/internal/logpattern/logpattern.go | 36 ++++++------ .../pkg/model/task_container_defaults_test.go | 57 +++++++++++-------- .../pkg/schemas/expconf/log_pattern_config.go | 42 +++++++++----- .../pkg/schemas/expconf/zgen_log_policy_v0.go | 8 +-- master/pkg/schemas/zgen_schemas.go | 17 +++--- schemas/expconf/v0/log-policy.json | 17 +++--- 6 files changed, 102 insertions(+), 75 deletions(-) diff --git a/master/internal/logpattern/logpattern.go b/master/internal/logpattern/logpattern.go index baf36886988..45261210337 100644 --- a/master/internal/logpattern/logpattern.go +++ b/master/internal/logpattern/logpattern.go @@ -76,24 +76,26 @@ func (l *LogPatternPolicies) monitor(ctx context.Context, } if compiledRegex.MatchString(log.Log) { - if policy.Action() != nil { - switch policy.Action().GetUnionMember().(type) { - case expconf.LogActionCancelRetries: - if err := addDontRetry( - ctx, model.TaskID(log.TaskID), *log.AgentID, policy.Pattern(), log.Log, - ); err != nil { - return fmt.Errorf("adding don't retry: %w", err) + if actions := policy.Actions(); len(actions) > 0 { + for _, a := range actions { + switch a.GetUnionMember().(type) { + case expconf.LogActionCancelRetries: + if err := addDontRetry( + ctx, model.TaskID(log.TaskID), *log.AgentID, policy.Pattern(), log.Log, + ); err != nil { + return fmt.Errorf("adding don't retry: %w", err) + } + + case expconf.LogActionExcludeNode: + if err := addRetryOnDifferentNode( + ctx, model.TaskID(log.TaskID), *log.AgentID, policy.Pattern(), log.Log, + ); err != nil { + return fmt.Errorf("adding retry on different node: %w", err) + } + + default: + return fmt.Errorf("unrecognized log pattern policy type") } - - case expconf.LogActionExcludeNode: - if err := addRetryOnDifferentNode( - ctx, model.TaskID(log.TaskID), *log.AgentID, policy.Pattern(), log.Log, - ); err != nil { - return fmt.Errorf("adding retry on different node: %w", err) - } - - default: - return fmt.Errorf("unrecognized log pattern policy type") } } diff --git a/master/pkg/model/task_container_defaults_test.go b/master/pkg/model/task_container_defaults_test.go index 4f8a715538a..a5e18256073 100644 --- a/master/pkg/model/task_container_defaults_test.go +++ b/master/pkg/model/task_container_defaults_test.go @@ -440,8 +440,8 @@ func TestLogPatternUnmarshal(t *testing.T) { var tcd TaskContainerDefaultsConfig require.NoError(t, json.Unmarshal([]byte(string(`{ "log_policies": [ - {"pattern": "test", "action": {"type": "exclude_node"}}, - {"pattern": "test2", "action": {"type": "cancel_retries"}} + {"pattern": "test", "actions": [{"type": "exclude_node"}]}, + {"pattern": "test2", "actions": [{"type": "cancel_retries"}]} ] }`)), &tcd)) @@ -450,12 +450,12 @@ func TestLogPatternUnmarshal(t *testing.T) { NetworkMode: "bridge", PreemptionTimeout: DefaultPreemptionTimeout, LogPolicies: &expconf.LogPoliciesConfig{ - expconf.LogPolicy{RawPattern: "test", RawAction: &expconf.LogAction{ + expconf.LogPolicy{RawPattern: "test", RawActions: []expconf.LogAction{{ RawExcludeNode: &expconf.LogActionExcludeNode{}, - }}, - expconf.LogPolicy{RawPattern: "test2", RawAction: &expconf.LogAction{ + }}}, + expconf.LogPolicy{RawPattern: "test2", RawActions: []expconf.LogAction{{ RawCancelRetries: &expconf.LogActionCancelRetries{}, - }}, + }}}, }, } require.Equal(t, expected, tcd) @@ -464,44 +464,51 @@ func TestLogPatternUnmarshal(t *testing.T) { func TestLogPatternPoliciesMerging(t *testing.T) { defaults := &TaskContainerDefaultsConfig{ LogPolicies: &expconf.LogPoliciesConfig{ - expconf.LogPolicy{RawPattern: "a", RawAction: &expconf.LogAction{ + expconf.LogPolicy{RawPattern: "a", RawActions: []expconf.LogAction{{ RawCancelRetries: &expconf.LogActionCancelRetries{}, - }}, - expconf.LogPolicy{RawPattern: "b", RawAction: &expconf.LogAction{ - RawExcludeNode: &expconf.LogActionExcludeNode{}, + }}}, + expconf.LogPolicy{RawPattern: "b", RawActions: []expconf.LogAction{ + { + RawExcludeNode: &expconf.LogActionExcludeNode{}, + }, + { + RawCancelRetries: &expconf.LogActionCancelRetries{}, + }, }}, }, } conf := expconf.ExperimentConfig{ RawLogPolicies: &expconf.LogPoliciesConfig{ - expconf.LogPolicy{RawPattern: "b", RawAction: &expconf.LogAction{ + expconf.LogPolicy{RawPattern: "b", RawActions: []expconf.LogAction{{ RawCancelRetries: &expconf.LogActionCancelRetries{}, - }}, - expconf.LogPolicy{RawPattern: "b", RawAction: &expconf.LogAction{ + }}}, + expconf.LogPolicy{RawPattern: "b", RawActions: []expconf.LogAction{{ RawExcludeNode: &expconf.LogActionExcludeNode{}, - }}, - expconf.LogPolicy{RawPattern: "c", RawAction: &expconf.LogAction{ + }}}, + expconf.LogPolicy{RawPattern: "c", RawActions: []expconf.LogAction{{ RawExcludeNode: &expconf.LogActionExcludeNode{}, - }}, + }}}, }, } defaults.MergeIntoExpConfig(&conf) expected := &expconf.LogPoliciesConfig{ - expconf.LogPolicy{RawPattern: "a", RawAction: &expconf.LogAction{ - RawCancelRetries: &expconf.LogActionCancelRetries{}, - }}, - expconf.LogPolicy{RawPattern: "b", RawAction: &expconf.LogAction{ - RawExcludeNode: &expconf.LogActionExcludeNode{}, - }}, - expconf.LogPolicy{RawPattern: "b", RawAction: &expconf.LogAction{ + expconf.LogPolicy{RawPattern: "a", RawActions: []expconf.LogAction{{ RawCancelRetries: &expconf.LogActionCancelRetries{}, + }}}, + expconf.LogPolicy{RawPattern: "b", RawActions: []expconf.LogAction{ + { + RawExcludeNode: &expconf.LogActionExcludeNode{}, + }, + { + RawCancelRetries: &expconf.LogActionCancelRetries{}, + }, }}, - expconf.LogPolicy{RawPattern: "c", RawAction: &expconf.LogAction{ + expconf.LogPolicy{RawPattern: "c", RawActions: []expconf.LogAction{{ RawExcludeNode: &expconf.LogActionExcludeNode{}, - }}, + }}}, } require.Equal(t, expected, conf.RawLogPolicies) diff --git a/master/pkg/schemas/expconf/log_pattern_config.go b/master/pkg/schemas/expconf/log_pattern_config.go index 7f4e8dc7414..fe9cfbb9a60 100644 --- a/master/pkg/schemas/expconf/log_pattern_config.go +++ b/master/pkg/schemas/expconf/log_pattern_config.go @@ -3,8 +3,7 @@ package expconf import ( "encoding/json" "fmt" - - log "github.com/sirupsen/logrus" + "slices" "github.com/determined-ai/determined/master/pkg/schemas" "github.com/determined-ai/determined/master/pkg/union" @@ -34,17 +33,34 @@ func (b LogPoliciesConfigV0) Merge( other LogPoliciesConfigV0, ) LogPoliciesConfigV0 { var out LogPoliciesConfigV0 - seen := make(map[string]bool) - for _, p := range append(other, b...) { - json, err := json.Marshal(p) - if err != nil { - log.Errorf("marshaling error %+v %v", p, err) - } - if seen[string(json)] { - continue + + patternToLp := make(map[string]LogPolicyV0) + for _, p := range b { + patternToLp[p.RawPattern] = p + } + + for _, p := range other { + if v, ok := patternToLp[p.RawPattern]; ok { + // Union merge actions + existing_actions := patternToLp[p.RawPattern].RawActions + merged_actions := append([]LogActionV0{}, existing_actions...) + for _, a := range p.RawActions { + if ok := slices.Contains(existing_actions, a); !ok { + merged_actions = append(merged_actions, a) + } + } + v.RawActions = merged_actions + patternToLp[p.RawPattern] = v + + // Other signal takes precedence + v.RawSignal = schemas.Merge(p.RawSignal, v.RawSignal) + patternToLp[p.RawPattern] = v + } else { + patternToLp[p.RawPattern] = p } - seen[string(json)] = true + } + for _, p := range patternToLp { out = append(out, p) } return out @@ -56,8 +72,8 @@ func (b LogPoliciesConfigV0) Merge( type LogPolicyV0 struct { RawPattern string `json:"pattern"` - RawAction *LogActionV0 `json:"action,omitempty"` - RawSignal *string `json:"signal,omitempty"` + RawActions []LogActionV0 `json:"actions,omitempty"` + RawSignal *string `json:"signal,omitempty"` } // LogActionV0 is a policy to take after matching. diff --git a/master/pkg/schemas/expconf/zgen_log_policy_v0.go b/master/pkg/schemas/expconf/zgen_log_policy_v0.go index 8da43d74c33..09c66144644 100644 --- a/master/pkg/schemas/expconf/zgen_log_policy_v0.go +++ b/master/pkg/schemas/expconf/zgen_log_policy_v0.go @@ -16,12 +16,12 @@ func (l *LogPolicyV0) SetPattern(val string) { l.RawPattern = val } -func (l LogPolicyV0) Action() *LogActionV0 { - return l.RawAction +func (l LogPolicyV0) Actions() []LogActionV0 { + return l.RawActions } -func (l *LogPolicyV0) SetAction(val *LogActionV0) { - l.RawAction = val +func (l *LogPolicyV0) SetActions(val []LogActionV0) { + l.RawActions = val } func (l LogPolicyV0) Signal() *string { diff --git a/master/pkg/schemas/zgen_schemas.go b/master/pkg/schemas/zgen_schemas.go index 694b2524e8f..751376ccd77 100644 --- a/master/pkg/schemas/zgen_schemas.go +++ b/master/pkg/schemas/zgen_schemas.go @@ -1579,28 +1579,29 @@ var ( "string" ] }, - "action": { + "actions": { "type": [ - "object", + "array", "null" ], - "optionalRef": "http://determined.ai/schemas/expconf/v0/log-action.json", - "default": null + "items": { + "$ref": "http://determined.ai/schemas/expconf/v0/log-action.json" + }, + "default": [] }, "signal": { "type": [ "string", "null" - ], - "default": null + ] } }, "checks": { - "one of or both of action and signal must be set": { + "one of or both of actions and signal must be set": { "anyOf": [ { "required": [ - "action" + "actions" ] }, { diff --git a/schemas/expconf/v0/log-policy.json b/schemas/expconf/v0/log-policy.json index 56631f43bfb..f5530c47ca9 100644 --- a/schemas/expconf/v0/log-policy.json +++ b/schemas/expconf/v0/log-policy.json @@ -13,28 +13,29 @@ "string" ] }, - "action": { + "actions": { "type": [ - "object", + "array", "null" ], - "optionalRef": "http://determined.ai/schemas/expconf/v0/log-action.json", - "default": null + "items": { + "$ref": "http://determined.ai/schemas/expconf/v0/log-action.json" + }, + "default": [] }, "signal": { "type": [ "string", "null" - ], - "default": null + ] } }, "checks": { - "one of or both of action and signal must be set": { + "one of or both of actions and signal must be set": { "anyOf": [ { "required": [ - "action" + "actions" ] }, { From 822204ac5cd0f044ef97db0cd142116e3e66f643 Mon Sep 17 00:00:00 2001 From: Jerry Gong Date: Wed, 9 Oct 2024 22:56:30 -0400 Subject: [PATCH 06/27] add merging tests --- .../pkg/model/task_container_defaults_test.go | 3 -- .../pkg/schemas/expconf/log_pattern_config.go | 19 ++++--- master/pkg/schemas/expconf/schema_test.go | 2 + .../expconf/zgen_log_policies_config_v0.go | 21 ++++++++ master/pkg/schemas/zgen_schemas.go | 36 ++++++++++++- schemas/expconf/v0/experiment.json | 2 +- schemas/expconf/v0/log-policies.json | 9 ++++ schemas/test_cases/v0/merging.yaml | 50 +++++++++++++++++++ 8 files changed, 129 insertions(+), 13 deletions(-) create mode 100644 master/pkg/schemas/expconf/zgen_log_policies_config_v0.go create mode 100644 schemas/expconf/v0/log-policies.json diff --git a/master/pkg/model/task_container_defaults_test.go b/master/pkg/model/task_container_defaults_test.go index a5e18256073..18071b20907 100644 --- a/master/pkg/model/task_container_defaults_test.go +++ b/master/pkg/model/task_container_defaults_test.go @@ -483,9 +483,6 @@ func TestLogPatternPoliciesMerging(t *testing.T) { expconf.LogPolicy{RawPattern: "b", RawActions: []expconf.LogAction{{ RawCancelRetries: &expconf.LogActionCancelRetries{}, }}}, - expconf.LogPolicy{RawPattern: "b", RawActions: []expconf.LogAction{{ - RawExcludeNode: &expconf.LogActionExcludeNode{}, - }}}, expconf.LogPolicy{RawPattern: "c", RawActions: []expconf.LogAction{{ RawExcludeNode: &expconf.LogActionExcludeNode{}, }}}, diff --git a/master/pkg/schemas/expconf/log_pattern_config.go b/master/pkg/schemas/expconf/log_pattern_config.go index fe9cfbb9a60..289382c3196 100644 --- a/master/pkg/schemas/expconf/log_pattern_config.go +++ b/master/pkg/schemas/expconf/log_pattern_config.go @@ -3,13 +3,15 @@ package expconf import ( "encoding/json" "fmt" - "slices" "github.com/determined-ai/determined/master/pkg/schemas" + "github.com/determined-ai/determined/master/pkg/set" "github.com/determined-ai/determined/master/pkg/union" ) // LogPoliciesConfigV0 is a list of log policies. +// +//go:generate ../gen.sh type LogPoliciesConfigV0 []LogPolicyV0 // WithDefaults implements the Defaultable psuedointerface. @@ -35,21 +37,22 @@ func (b LogPoliciesConfigV0) Merge( var out LogPoliciesConfigV0 patternToLp := make(map[string]LogPolicyV0) - for _, p := range b { + for _, p := range other { patternToLp[p.RawPattern] = p } - for _, p := range other { + for _, p := range b { if v, ok := patternToLp[p.RawPattern]; ok { // Union merge actions - existing_actions := patternToLp[p.RawPattern].RawActions - merged_actions := append([]LogActionV0{}, existing_actions...) + actions := make(set.Set[LogActionV0]) + for _, a := range patternToLp[p.RawPattern].RawActions { + actions.Insert(a) + } for _, a := range p.RawActions { - if ok := slices.Contains(existing_actions, a); !ok { - merged_actions = append(merged_actions, a) + if !actions.Contains(a) { + v.RawActions = append(v.RawActions, a) } } - v.RawActions = merged_actions patternToLp[p.RawPattern] = v // Other signal takes precedence diff --git a/master/pkg/schemas/expconf/schema_test.go b/master/pkg/schemas/expconf/schema_test.go index 1ee06e5dfd0..0bc9ebfca70 100644 --- a/master/pkg/schemas/expconf/schema_test.go +++ b/master/pkg/schemas/expconf/schema_test.go @@ -165,6 +165,8 @@ func objectForURL(url string) interface{} { "http://determined.ai/schemas/expconf/v0/test-union-a.json", "http://determined.ai/schemas/expconf/v0/test-union-b.json": return &TestUnionV0{} + case "http://determined.ai/schemas/expconf/v0/log-policies.json": + return &LogPoliciesConfigV0{} default: panic(fmt.Sprintf("No object to match %v, maybe you need to add one?", url)) } diff --git a/master/pkg/schemas/expconf/zgen_log_policies_config_v0.go b/master/pkg/schemas/expconf/zgen_log_policies_config_v0.go new file mode 100644 index 00000000000..49b45b654d5 --- /dev/null +++ b/master/pkg/schemas/expconf/zgen_log_policies_config_v0.go @@ -0,0 +1,21 @@ +// Code generated by gen.py. DO NOT EDIT. + +package expconf + +import ( + "github.com/santhosh-tekuri/jsonschema/v2" + + "github.com/determined-ai/determined/master/pkg/schemas" +) + +func (l LogPoliciesConfigV0) ParsedSchema() interface{} { + return schemas.ParsedLogPoliciesConfigV0() +} + +func (l LogPoliciesConfigV0) SanityValidator() *jsonschema.Schema { + return schemas.GetSanityValidator("http://determined.ai/schemas/expconf/v0/log-policies.json") +} + +func (l LogPoliciesConfigV0) CompletenessValidator() *jsonschema.Schema { + return schemas.GetCompletenessValidator("http://determined.ai/schemas/expconf/v0/log-policies.json") +} diff --git a/master/pkg/schemas/zgen_schemas.go b/master/pkg/schemas/zgen_schemas.go index 751376ccd77..488a901272a 100644 --- a/master/pkg/schemas/zgen_schemas.go +++ b/master/pkg/schemas/zgen_schemas.go @@ -850,7 +850,7 @@ var ( ], "default": [], "items": { - "$ref": "http://determined.ai/schemas/expconf/v0/log-policy.json" + "optionalref": "http://determined.ai/schemas/expconf/v0/log-policies.json" } }, "retention_policy": { @@ -1563,6 +1563,16 @@ var ( "type": true } } +`) + textLogPoliciesConfigV0 = []byte(`{ + "$schema": "http://json-schema.org/draft-07/schema#", + "$id": "http://determined.ai/schemas/expconf/v0/log-policies.json", + "title": "LogPoliciesConfig", + "type": "array", + "items": { + "$ref": "http://determined.ai/schemas/expconf/v0/log-policy.json" + } +} `) textLogPolicyV0 = []byte(`{ "$schema": "http://json-schema.org/draft-07/schema#", @@ -3485,6 +3495,8 @@ var ( schemaLogActionV0 interface{} + schemaLogPoliciesConfigV0 interface{} + schemaLogPolicyV0 interface{} schemaOptimizationsConfigV0 interface{} @@ -4180,6 +4192,26 @@ func ParsedLogActionV0() interface{} { return schemaLogActionV0 } +func ParsedLogPoliciesConfigV0() interface{} { + cacheLock.RLock() + if schemaLogPoliciesConfigV0 != nil { + cacheLock.RUnlock() + return schemaLogPoliciesConfigV0 + } + cacheLock.RUnlock() + + cacheLock.Lock() + defer cacheLock.Unlock() + if schemaLogPoliciesConfigV0 != nil { + return schemaLogPoliciesConfigV0 + } + err := json.Unmarshal(textLogPoliciesConfigV0, &schemaLogPoliciesConfigV0) + if err != nil { + panic("invalid embedded json for LogPoliciesConfigV0") + } + return schemaLogPoliciesConfigV0 +} + func ParsedLogPolicyV0() interface{} { cacheLock.RLock() if schemaLogPolicyV0 != nil { @@ -4937,6 +4969,8 @@ func schemaBytesMap() map[string][]byte { cachedSchemaBytesMap[url] = textLogActionExcludeNodeV0 url = "http://determined.ai/schemas/expconf/v0/log-action.json" cachedSchemaBytesMap[url] = textLogActionV0 + url = "http://determined.ai/schemas/expconf/v0/log-policies.json" + cachedSchemaBytesMap[url] = textLogPoliciesConfigV0 url = "http://determined.ai/schemas/expconf/v0/log-policy.json" cachedSchemaBytesMap[url] = textLogPolicyV0 url = "http://determined.ai/schemas/expconf/v0/optimizations.json" diff --git a/schemas/expconf/v0/experiment.json b/schemas/expconf/v0/experiment.json index eb5c17cf690..24ab3135b3e 100644 --- a/schemas/expconf/v0/experiment.json +++ b/schemas/expconf/v0/experiment.json @@ -123,7 +123,7 @@ ], "default": [], "items": { - "$ref": "http://determined.ai/schemas/expconf/v0/log-policy.json" + "optionalref": "http://determined.ai/schemas/expconf/v0/log-policies.json" } }, "retention_policy": { diff --git a/schemas/expconf/v0/log-policies.json b/schemas/expconf/v0/log-policies.json new file mode 100644 index 00000000000..475594526e4 --- /dev/null +++ b/schemas/expconf/v0/log-policies.json @@ -0,0 +1,9 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "$id": "http://determined.ai/schemas/expconf/v0/log-policies.json", + "title": "LogPoliciesConfig", + "type": "array", + "items": { + "$ref": "http://determined.ai/schemas/expconf/v0/log-policy.json" + } +} diff --git a/schemas/test_cases/v0/merging.yaml b/schemas/test_cases/v0/merging.yaml index c1aa5b88807..cbc8f58f71c 100644 --- a/schemas/test_cases/v0/merging.yaml +++ b/schemas/test_cases/v0/merging.yaml @@ -193,3 +193,53 @@ save_experiment_best: null save_trial_best: null save_trial_latest: null + +- name: unique actions when merged + merge_as: http://determined.ai/schemas/expconf/v0/log-policies.json + case: + - pattern: a + actions: + - type: cancel_retries + merge_src: + - pattern: a + actions: + - type: exclude_node + merged: + - pattern: a + actions: + - type: exclude_node + - type: cancel_retries + +- name: no duplicated actions when merged + merge_as: http://determined.ai/schemas/expconf/v0/log-policies.json + case: + - pattern: a + actions: + - type: cancel_retries + - type: exclude_node + merge_src: + - pattern: a + actions: + - type: exclude_node + merged: + - pattern: a + actions: + - type: exclude_node + - type: cancel_retries + +- name: no duplicated actions when merged 2 + merge_as: http://determined.ai/schemas/expconf/v0/log-policies.json + case: + - pattern: a + actions: + - type: exclude_node + merge_src: + - pattern: a + actions: + - type: cancel_retries + - type: exclude_node + merged: + - pattern: a + actions: + - type: cancel_retries + - type: exclude_node From 40c8edc612ffed16831ffbea7d2de4c5ad017b82 Mon Sep 17 00:00:00 2001 From: Jerry Gong Date: Wed, 9 Oct 2024 23:07:17 -0400 Subject: [PATCH 07/27] fix CI tests --- master/internal/api_tasks_intg_test.go | 8 ++++---- master/internal/db/postgres_experiments_intg_test.go | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/master/internal/api_tasks_intg_test.go b/master/internal/api_tasks_intg_test.go index 9298d9e8f12..581913833ff 100644 --- a/master/internal/api_tasks_intg_test.go +++ b/master/internal/api_tasks_intg_test.go @@ -219,12 +219,12 @@ func TestPostTaskLogsLogPattern(t *testing.T) { activeConfig, err := api.m.db.ActiveExperimentConfig(trial.ExperimentID) require.NoError(t, err) activeConfig.RawLogPolicies = &expconf.LogPoliciesConfig{ - expconf.LogPolicy{RawPattern: "sub", RawAction: &expconf.LogAction{ + expconf.LogPolicy{RawPattern: "sub", RawActions: []expconf.LogActionV0{{ RawCancelRetries: &expconf.LogActionCancelRetries{}, - }}, - expconf.LogPolicy{RawPattern: `\d{5}$`, RawAction: &expconf.LogAction{ + }}}, + expconf.LogPolicy{RawPattern: `\d{5}$`, RawActions: []expconf.LogActionV0{{ RawExcludeNode: &expconf.LogActionExcludeNode{}, - }}, + }}}, } v, err := json.Marshal(activeConfig) diff --git a/master/internal/db/postgres_experiments_intg_test.go b/master/internal/db/postgres_experiments_intg_test.go index af9c2f9f5dd..4b81bad2f64 100644 --- a/master/internal/db/postgres_experiments_intg_test.go +++ b/master/internal/db/postgres_experiments_intg_test.go @@ -448,12 +448,12 @@ func TestActiveLogPatternPolicies(t *testing.T) { activeConfig, err := db.ActiveExperimentConfig(exp.ID) require.NoError(t, err) activeConfig.RawLogPolicies = &expconf.LogPoliciesConfig{ - expconf.LogPolicy{RawPattern: "sub", RawAction: &expconf.LogAction{ + expconf.LogPolicy{RawPattern: "sub", RawActions: []expconf.LogAction{{ RawCancelRetries: &expconf.LogActionCancelRetries{}, - }}, - expconf.LogPolicy{RawPattern: `\d{5}$`, RawAction: &expconf.LogAction{ + }}}, + expconf.LogPolicy{RawPattern: `\d{5}$`, RawActions: []expconf.LogAction{{ RawExcludeNode: &expconf.LogActionExcludeNode{}, - }}, + }}}, } v, err := json.Marshal(activeConfig) From eae8064ea54d3280aad7884fb53292ff25837f7e Mon Sep 17 00:00:00 2001 From: Jerry Gong Date: Thu, 10 Oct 2024 00:16:18 -0400 Subject: [PATCH 08/27] lint --- e2e_tests/tests/cluster/test_log_policies.py | 26 ++++++++++++-------- e2e_tests/tests/cluster/utils.py | 2 +- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/e2e_tests/tests/cluster/test_log_policies.py b/e2e_tests/tests/cluster/test_log_policies.py index 8a7aa8da7fe..a500e4a341d 100644 --- a/e2e_tests/tests/cluster/test_log_policies.py +++ b/e2e_tests/tests/cluster/test_log_policies.py @@ -2,12 +2,10 @@ from determined.common.api import bindings from determined.experimental import client -from tests import api_utils -from tests import experiment as exp -from tests.experiment import noop -from tests import detproc +from tests import api_utils, detproc from tests import experiment as exp from tests.cluster import utils +from tests.experiment import noop @pytest.mark.e2e_cpu @@ -50,7 +48,7 @@ def test_log_policy_exclude_node_k8s(should_match: bool) -> None: assert agents[0].slots is not None config = { - "log_policies": [{"pattern": regex, "action": {"type": "exclude_node"}}], + "log_policies": [{"pattern": regex, "actions": [{"type": "exclude_node"}]}], "resources": {"slots_per_trial": len(agents[0].slots)}, "max_restarts": 1, } @@ -93,7 +91,7 @@ def test_log_policy_exclude_node_single_agent(should_match: bool) -> None: assert agents[0].slots is not None config = { - "log_policies": [{"pattern": regex, "action": {"type": "exclude_node"}}], + "log_policies": [{"pattern": regex, "actions": [{"type": "exclude_node"}]}], "resources": {"slots_per_trial": len(agents[0].slots)}, "max_restarts": 1, } @@ -136,7 +134,7 @@ def test_log_policy_exclude_slurm(should_match: bool) -> None: regex = r"(.*) this should not match (.*)" config = { - "log_policies": [{"pattern": regex, "action": {"type": "exclude_node"}}], + "log_policies": [{"pattern": regex, "actions": [{"type": "exclude_node"}]}], "max_restarts": 1, } exp_ref = noop.create_experiment(sess, [noop.Exit(7)], config=config) @@ -173,7 +171,6 @@ def test_log_signal(should_match: bool) -> None: exp_ref = noop.create_experiment(sess, [noop.Exit(7)], config=config) assert exp_ref.wait(interval=0.01) == client.ExperimentState.ERROR - searchRes = utils.get_run_by_exp_id(sess, exp_ref.id) runSignal = searchRes.runs[0].logSignal @@ -212,8 +209,17 @@ def test_signal_clear_after_exp_continue() -> None: assert runSignal == expected_signal assert trialSignal == expected_signal - - detproc.check_call(sess, ["det", "e", "continue", str(exp_ref.id), "--config", "hyperparameters.crash_on_startup=false"]) + detproc.check_call( + sess, + [ + "det", + "e", + "continue", + str(exp_ref.id), + "--config", + "hyperparameters.crash_on_startup=false", + ], + ) exp.wait_for_experiment_state(sess, exp_ref.id, bindings.experimentv1State.COMPLETED) searchRes = utils.get_run_by_exp_id(sess, exp_ref.id) diff --git a/e2e_tests/tests/cluster/utils.py b/e2e_tests/tests/cluster/utils.py index f71e1cb290e..9bef663f01f 100644 --- a/e2e_tests/tests/cluster/utils.py +++ b/e2e_tests/tests/cluster/utils.py @@ -200,7 +200,7 @@ def set_master_port(config: str) -> None: conf.MASTER_PORT = port -def get_run_by_exp_id(sess, exp_id) -> int: +def get_run_by_exp_id(sess: api.Session, exp_id: int) -> bindings.v1SearchRunsResponse: return bindings.post_SearchRuns( sess, body=bindings.v1SearchRunsRequest( From 04bc0ee299377e177084d3484753096456791446 Mon Sep 17 00:00:00 2001 From: Jerry Gong Date: Thu, 10 Oct 2024 11:09:59 -0400 Subject: [PATCH 09/27] Update default test --- master/internal/api_tasks_intg_test.go | 83 -------------------------- schemas/test_cases/v0/defaults.yaml | 12 ++++ 2 files changed, 12 insertions(+), 83 deletions(-) diff --git a/master/internal/api_tasks_intg_test.go b/master/internal/api_tasks_intg_test.go index 581913833ff..9f2aea9a756 100644 --- a/master/internal/api_tasks_intg_test.go +++ b/master/internal/api_tasks_intg_test.go @@ -513,86 +513,3 @@ func TestPostTaskLogsLogSignalDataSaving(t *testing.T) { require.Equal(t, "sub", *tasksOut.LogSignal) } - -func TestPostTaskLogsDefaultLogSignal(t *testing.T) { - api, curUser, ctx := setupAPITest(t, nil) - trial, task := createTestTrial(t, api, curUser) - - _, err := api.PostTaskLogs(ctx, &apiv1.PostTaskLogsRequest{ - Logs: []*taskv1.TaskLog{ - { - TaskId: string(task.TaskID), - AgentId: ptrs.Ptr("a1"), - Log: "log CUDA out of memory more log", - }, - }, - }) - require.NoError(t, err) - - runsOut := struct { - bun.BaseModel `bun:"table:runs"` - LogSignal *string `db:"log_signal"` - }{} - - err = db.Bun().NewSelect().Model(&runsOut). - Where("id = ?", trial.ID). - Scan(ctx) - require.NoError(t, err) - require.NotNil(t, runsOut) - require.NotNil(t, runsOut.LogSignal) - - require.Equal(t, "CUDA OOM", *runsOut.LogSignal) - - tasksOut := struct { - bun.BaseModel `bun:"table:tasks"` - LogSignal *string `db:"log_signal"` - }{} - err = db.Bun().NewSelect().Model(&tasksOut). - Join("LEFT JOIN run_id_task_id AS rt on tasks.task_id = rt.task_id"). - Where("run_id = ?", trial.ID). - Scan(ctx) - require.NoError(t, err) - require.NotNil(t, tasksOut) - require.NotNil(t, tasksOut.LogSignal) - - require.Equal(t, "CUDA OOM", *tasksOut.LogSignal) - - _, err = api.PostTaskLogs(ctx, &apiv1.PostTaskLogsRequest{ - Logs: []*taskv1.TaskLog{ - { - TaskId: string(task.TaskID), - AgentId: ptrs.Ptr("a1"), - Log: "log uncorrectable ECC error encountered more log", - }, - }, - }) - require.NoError(t, err) - - runsOut = struct { - bun.BaseModel `bun:"table:runs"` - LogSignal *string `db:"log_signal"` - }{} - - err = db.Bun().NewSelect().Model(&runsOut). - Where("id = ?", trial.ID). - Scan(ctx) - require.NoError(t, err) - require.NotNil(t, runsOut) - require.NotNil(t, runsOut.LogSignal) - - require.Equal(t, "ECC Error", *runsOut.LogSignal) - - tasksOut = struct { - bun.BaseModel `bun:"table:tasks"` - LogSignal *string `db:"log_signal"` - }{} - err = db.Bun().NewSelect().Model(&tasksOut). - Join("LEFT JOIN run_id_task_id AS rt on tasks.task_id = rt.task_id"). - Where("run_id = ?", trial.ID). - Scan(ctx) - require.NoError(t, err) - require.NotNil(t, tasksOut) - require.NotNil(t, tasksOut.LogSignal) - - require.Equal(t, "ECC Error", *tasksOut.LogSignal) -} diff --git a/schemas/test_cases/v0/defaults.yaml b/schemas/test_cases/v0/defaults.yaml index 05c7c703a79..ff9c99177a2 100644 --- a/schemas/test_cases/v0/defaults.yaml +++ b/schemas/test_cases/v0/defaults.yaml @@ -216,3 +216,15 @@ priority: null resource_pool: '' is_single_node: null + +- name: log policies defaults + sane_as: + - http://determined.ai/schemas/expconf/v0/log-policies.json + default_as: + http://determined.ai/schemas/expconf/v0/log-policies.json + case: [] + defaulted: + - pattern: .*uncorrectable ECC error encountered.* + signal: ECC Error + - pattern: .*CUDA out of memory.* + signal: CUDA OOM From 323d58f5ccbc48701bb01d74b3c8b8f02b184d87 Mon Sep 17 00:00:00 2001 From: Jerry Gong Date: Tue, 15 Oct 2024 16:40:32 -0400 Subject: [PATCH 10/27] Add Backward Compatibility action is a legacy field now --- master/internal/logpattern/logpattern.go | 52 ++--- master/internal/trial.go | 2 + .../pkg/schemas/expconf/experiment_config.go | 2 +- master/pkg/schemas/expconf/latest.go | 2 - .../pkg/schemas/expconf/log_pattern_config.go | 220 +++++++++++++++--- master/pkg/schemas/expconf/schema_test.go | 82 ++++++- .../expconf/zgen_experiment_config_v0.go | 7 +- .../zgen_log_action_cancel_retries_v0.go | 21 -- .../zgen_log_action_exclude_node_v0.go | 21 -- ...gen_log_legacy_action_cancel_retries_v0.go | 21 ++ .../zgen_log_legacy_action_exclude_node_v0.go | 21 ++ ...ion_v0.go => zgen_log_legacy_action_v0.go} | 14 +- .../pkg/schemas/expconf/zgen_log_policy_v0.go | 16 +- .../schemas/extensions/disallow_properties.go | 2 +- master/pkg/schemas/zgen_schemas.go | 171 +++++++++----- schemas/expconf/v0/experiment.json | 4 +- schemas/expconf/v0/log-action.json | 47 ++-- ... => log-legacy-action-cancel-retries.json} | 4 +- ...on => log-legacy-action-exclude-node.json} | 4 +- schemas/expconf/v0/log-legacy-action.json | 32 +++ schemas/expconf/v0/log-policy.json | 29 ++- .../test_cases/v0/backward-compatibility.yaml | 20 ++ schemas/test_cases/v0/defaults.yaml | 6 +- schemas/test_cases/v0/experiment.yaml | 12 +- schemas/test_cases/v0/merging.yaml | 28 +-- schemas/test_cases/v0/misc.yaml | 47 ++++ schemas/test_cases/v0/unions.yaml | 6 +- 27 files changed, 629 insertions(+), 264 deletions(-) delete mode 100644 master/pkg/schemas/expconf/zgen_log_action_cancel_retries_v0.go delete mode 100644 master/pkg/schemas/expconf/zgen_log_action_exclude_node_v0.go create mode 100644 master/pkg/schemas/expconf/zgen_log_legacy_action_cancel_retries_v0.go create mode 100644 master/pkg/schemas/expconf/zgen_log_legacy_action_exclude_node_v0.go rename master/pkg/schemas/expconf/{zgen_log_action_v0.go => zgen_log_legacy_action_v0.go} (56%) rename schemas/expconf/v0/{log-action-cancel-retries.json => log-legacy-action-cancel-retries.json} (67%) rename schemas/expconf/v0/{log-action-exclude-node.json => log-legacy-action-exclude-node.json} (67%) create mode 100644 schemas/expconf/v0/log-legacy-action.json create mode 100644 schemas/test_cases/v0/backward-compatibility.yaml diff --git a/master/internal/logpattern/logpattern.go b/master/internal/logpattern/logpattern.go index 45261210337..c8f84d4ac0d 100644 --- a/master/internal/logpattern/logpattern.go +++ b/master/internal/logpattern/logpattern.go @@ -92,38 +92,36 @@ func (l *LogPatternPolicies) monitor(ctx context.Context, ); err != nil { return fmt.Errorf("adding retry on different node: %w", err) } - + case string: + signal := a.Signal + if signal != nil { + err = db.Bun().RunInTx(ctx, nil, func(ctx context.Context, tx bun.Tx) error { + if _, err := tx.NewUpdate().Model(&model.Task{}). + Set("log_signal = ?", signal). + Where("task_id = ?", log.TaskID). + Exec(ctx); err != nil { + return fmt.Errorf("updating log signal of task %s: %w", log.TaskID, err) + } + if _, err := tx.NewUpdate().Model(&model.Run{}). + Table("run_id_task_id"). + Set("log_signal = ?", signal). + Where("run.id = run_id_task_id.run_id"). + Where("run_id_task_id.task_id = ?", log.TaskID). + Exec(ctx); err != nil { + return fmt.Errorf("updating log signal of task %s: %w", log.TaskID, err) + } + + return nil + }) + if err != nil { + return fmt.Errorf("updating log signal: %w", err) + } + } default: return fmt.Errorf("unrecognized log pattern policy type") } } } - - if policy.Signal() != nil { - signal := policy.Signal() - - err = db.Bun().RunInTx(ctx, nil, func(ctx context.Context, tx bun.Tx) error { - if _, err := tx.NewUpdate().Model(&model.Task{}). - Set("log_signal = ?", signal). - Where("task_id = ?", log.TaskID). - Exec(ctx); err != nil { - return fmt.Errorf("updating log signal of task %s: %w", log.TaskID, err) - } - if _, err := tx.NewUpdate().Model(&model.Run{}). - Table("run_id_task_id"). - Set("log_signal = ?", signal). - Where("run.id = run_id_task_id.run_id"). - Where("run_id_task_id.task_id = ?", log.TaskID). - Exec(ctx); err != nil { - return fmt.Errorf("updating log signal of task %s: %w", log.TaskID, err) - } - - return nil - }) - if err != nil { - return fmt.Errorf("updating log signal: %w", err) - } - } } } } diff --git a/master/internal/trial.go b/master/internal/trial.go index 552dea6a291..a724ed4c4fd 100644 --- a/master/internal/trial.go +++ b/master/internal/trial.go @@ -676,6 +676,8 @@ func (t *trial) handleAllocationExit(exit *task.AllocationExited) error { } } + // Clear the stored signal before trial restarts, as the log in the new trial + // may be different. if err := logpattern.ClearSignal(context.TODO(), t.taskID); err != nil { return t.transition(model.StateWithReason{ State: model.ErrorState, diff --git a/master/pkg/schemas/expconf/experiment_config.go b/master/pkg/schemas/expconf/experiment_config.go index 8ea181b86c4..b2701d846e4 100644 --- a/master/pkg/schemas/expconf/experiment_config.go +++ b/master/pkg/schemas/expconf/experiment_config.go @@ -29,7 +29,7 @@ type ExperimentConfigV0 struct { RawEnvironment *EnvironmentConfigV0 `json:"environment"` RawHyperparameters HyperparametersV0 `json:"hyperparameters"` RawLabels LabelsV0 `json:"labels"` - RawLogPolicies *LogPoliciesConfigV0 `json:"log_policies"` + RawLogPolicies LogPoliciesConfigV0 `json:"log_policies"` RawRetentionPolicy *RetentionPolicyConfigV0 `json:"retention_policy,omitempty"` RawMaxRestarts *int `json:"max_restarts"` RawMinCheckpointPeriod *LengthV0 `json:"min_checkpoint_period"` diff --git a/master/pkg/schemas/expconf/latest.go b/master/pkg/schemas/expconf/latest.go index 3d342c88409..9751934700f 100644 --- a/master/pkg/schemas/expconf/latest.go +++ b/master/pkg/schemas/expconf/latest.go @@ -31,8 +31,6 @@ type ( LogPoliciesConfig = LogPoliciesConfigV0 LogPolicy = LogPolicyV0 LogAction = LogActionV0 - LogActionCancelRetries = LogActionCancelRetriesV0 - LogActionExcludeNode = LogActionExcludeNodeV0 LogHyperparameter = LogHyperparameterV0 OptimizationsConfig = OptimizationsConfigV0 PbsConfig = PbsConfigV0 diff --git a/master/pkg/schemas/expconf/log_pattern_config.go b/master/pkg/schemas/expconf/log_pattern_config.go index 289382c3196..91711e434c2 100644 --- a/master/pkg/schemas/expconf/log_pattern_config.go +++ b/master/pkg/schemas/expconf/log_pattern_config.go @@ -4,11 +4,18 @@ import ( "encoding/json" "fmt" + "github.com/pkg/errors" + "github.com/determined-ai/determined/master/pkg/schemas" "github.com/determined-ai/determined/master/pkg/set" "github.com/determined-ai/determined/master/pkg/union" ) +const ( + cancelRetries = "cancel_retries" + excludeNode = "exclude_node" +) + // LogPoliciesConfigV0 is a list of log policies. // //go:generate ../gen.sh @@ -23,8 +30,8 @@ func (b *LogPoliciesConfigV0) WithDefaults() *LogPoliciesConfigV0 { if b != nil && len(*b) == 0 { return &LogPoliciesConfigV0{ - LogPolicyV0{RawPattern: eccErrorPattern, RawSignal: &eccErrorSignal}, - LogPolicyV0{RawPattern: cudaOomPattern, RawSignal: &cudaOomSignal}, + LogPolicyV0{RawPattern: eccErrorPattern, RawActions: []LogActionV0{{Signal: &eccErrorSignal}}}, + LogPolicyV0{RawPattern: cudaOomPattern, RawActions: []LogActionV0{{Signal: &cudaOomSignal}}}, } } return b @@ -37,29 +44,41 @@ func (b LogPoliciesConfigV0) Merge( var out LogPoliciesConfigV0 patternToLp := make(map[string]LogPolicyV0) - for _, p := range other { - patternToLp[p.RawPattern] = p + for _, lp := range other { + patternToLp[lp.RawPattern] = lp } - for _, p := range b { - if v, ok := patternToLp[p.RawPattern]; ok { - // Union merge actions - actions := make(set.Set[LogActionV0]) - for _, a := range patternToLp[p.RawPattern].RawActions { + for _, lp := range b { + if v, ok := patternToLp[lp.RawPattern]; ok { + // Union merge all actions except signal + actions := set.New[LogActionV0]() + var signal *LogActionV0 + for _, a := range patternToLp[lp.RawPattern].RawActions { + if a.Signal != nil { + signal = &a + continue + } actions.Insert(a) } - for _, a := range p.RawActions { + var otherSignal *LogActionV0 + for _, a := range lp.RawActions { + if a.Signal != nil { + otherSignal = &a + continue + } if !actions.Contains(a) { v.RawActions = append(v.RawActions, a) } } - patternToLp[p.RawPattern] = v // Other signal takes precedence - v.RawSignal = schemas.Merge(p.RawSignal, v.RawSignal) - patternToLp[p.RawPattern] = v + if otherSignal != nil || signal != nil { + v.RawActions = append(v.RawActions, *schemas.Merge(otherSignal, signal)) + } + + patternToLp[lp.RawPattern] = v } else { - patternToLp[p.RawPattern] = p + patternToLp[lp.RawPattern] = lp } } @@ -69,57 +88,194 @@ func (b LogPoliciesConfigV0) Merge( return out } +// UnmarshalJSON implements the json.Unmarshaler interface. +// The legacy log policy config allows entries with duplicated pattern because only single action +// can be associated with a pattern. For example: +// +// - pattern: a +// action: cancel_retries +// - pattern: a +// action: exclude_node +// +// An actions field is available now. Multiple entires can be associated with a pattern. No more +// duplicated pattern in log policies config. For example: +// +// - pattern: a +// actions: +// - cancel_retries +// - exclude_node +func (b *LogPoliciesConfigV0) UnmarshalJSON(data []byte) error { + type DefaultParser LogPoliciesConfigV0 + var jsonItems DefaultParser + if err := json.Unmarshal(data, &jsonItems); err != nil { + return errors.Wrapf(err, "failed to parse runtime items") + } + + // Merge LogPolicyV0s with the same pattern into one + patternToActions := make(map[string]set.Set[LogActionV0]) + for _, jsonItem := range jsonItems { + if jsonItem.RawLegacyAction != nil { + return fmt.Errorf("legacy action field expected to be nil: %+v", jsonItem) + } + + if _, ok := patternToActions[jsonItem.RawPattern]; !ok { + patternToActions[jsonItem.RawPattern] = set.New[LogActionV0]() + } + + for _, a := range jsonItem.RawActions { + actions := patternToActions[jsonItem.RawPattern] + actions.Insert(a) + } + } + + for p, actions := range patternToActions { + lp := LogPolicyV0{ + RawPattern: p, + RawActions: actions.ToSlice(), + } + *b = append(*b, lp) + } + + return nil +} + // LogPolicyV0 is an action to take if we match against trial logs. // //go:generate ../gen.sh type LogPolicyV0 struct { - RawPattern string `json:"pattern"` + RawPattern string `json:"pattern"` + RawLegacyAction *LogLegacyActionV0 `json:"action,omitempty"` + RawActions []LogActionV0 `json:"actions,omitempty"` +} + +// UnmarshalJSON implements the json.Unmarshaler interface. +func (b *LogPolicyV0) UnmarshalJSON(data []byte) error { + type DefaultParser LogPolicyV0 + var jsonItems DefaultParser + if err := json.Unmarshal(data, &jsonItems); err != nil { + return fmt.Errorf("failed to parse log policy: %w", err) + } + b.RawPattern = jsonItems.RawPattern - RawActions []LogActionV0 `json:"actions,omitempty"` - RawSignal *string `json:"signal,omitempty"` + // Backward compatiblity. Move data in the legacy action field to the newer actions field. + if jsonItems.RawLegacyAction != nil && jsonItems.RawActions == nil { + if jsonItems.RawLegacyAction.RawCancelRetries != nil { + b.RawActions = []LogActionV0{{CancelRetries: &LogActionCancelRetries{}}} + } else if jsonItems.RawLegacyAction.RawExcludeNode != nil { + b.RawActions = []LogActionV0{{ExcludeNode: &LogActionExcludeNode{}}} + } else { + return fmt.Errorf("invalid legacy log action: %+v", jsonItems.RawLegacyAction) + } + b.RawLegacyAction = nil + } else if jsonItems.RawLegacyAction == nil && jsonItems.RawActions != nil { + b.RawActions = jsonItems.RawActions + } else { + return fmt.Errorf("invalid log policy: %+v", jsonItems) + } + + return nil } // LogActionV0 is a policy to take after matching. // //go:generate ../gen.sh +type LogLegacyActionV0 struct { + RawCancelRetries *LogLegacyActionCancelRetriesV0 `union:"type,cancel_retries" json:"-"` + RawExcludeNode *LogLegacyActionExcludeNodeV0 `union:"type,exclude_node" json:"-"` +} + +// MarshalJSON implements the json.Marshaler interface. +func (s LogLegacyActionV0) MarshalJSON() ([]byte, error) { + return union.MarshalEx(s, true) +} + +// UnmarshalJSON implements the json.Unmarshaler interface. +func (s *LogLegacyActionV0) UnmarshalJSON(data []byte) error { + if err := union.Unmarshal(data, s); err != nil { + return err + } + type DefaultParser *LogLegacyActionV0 + if err := json.Unmarshal(data, DefaultParser(s)); err != nil { + return fmt.Errorf("failed to parse LogLegacyActionV0: %w", err) + } + return nil +} + +// LogActionV0 is a policy to take after matching. type LogActionV0 struct { - RawCancelRetries *LogActionCancelRetriesV0 `union:"type,cancel_retries" json:"-"` - RawExcludeNode *LogActionExcludeNodeV0 `union:"type,exclude_node" json:"-"` + Signal *string + CancelRetries *LogActionCancelRetries + ExcludeNode *LogActionExcludeNode } -// Merge implements schemas.Mergeable. -func (s LogActionV0) Merge(other LogActionV0) LogActionV0 { - return schemas.UnionMerge(s, other) +func (l LogActionV0) GetUnionMember() interface{} { + if l.Signal != nil { + return *l.Signal + } else if l.CancelRetries != nil { + return *l.CancelRetries + } else if l.ExcludeNode != nil { + return *l.ExcludeNode + } + panic("no union member defined") } // MarshalJSON implements the json.Marshaler interface. func (s LogActionV0) MarshalJSON() ([]byte, error) { - return union.MarshalEx(s, true) + if s.Signal != nil { + return json.Marshal(struct { + Signal string `json:"signal,omitempty"` + }{Signal: *s.Signal}) + } else if s.CancelRetries != nil { + return json.Marshal(cancelRetries) + } else if s.ExcludeNode != nil { + return json.Marshal(excludeNode) + } + return nil, fmt.Errorf("failed to marshal LogActionV0: %+v", s) } // UnmarshalJSON implements the json.Unmarshaler interface. func (s *LogActionV0) UnmarshalJSON(data []byte) error { - if err := union.Unmarshal(data, s); err != nil { - return err + var action string + if err := json.Unmarshal(data, &action); err == nil { + switch action { + case cancelRetries: + s.CancelRetries = &LogActionCancelRetries{} + case excludeNode: + s.ExcludeNode = &LogActionExcludeNode{} + default: + return fmt.Errorf("invalid log action: %v", action) + } } - type DefaultParser *LogActionV0 - if err := json.Unmarshal(data, DefaultParser(s)); err != nil { - return fmt.Errorf("failed to parse searcher config: %w", err) + + // Handle Signal + out := struct { + Signal *string `json:"signal,omitempty"` + }{} + if err := json.Unmarshal(data, &out); err == nil { + s.Signal = out.Signal } + return nil } -// LogActionCancelRetriesV0 doesn't retry the trial if it fails. +// LogActionCancelRetries doesn't retry the trial if it fails. +type LogActionCancelRetries struct{} + +// LogActionExcludeNodeV0 will exclude the node the log was seen on +// (only for that trial) and reschedule. +type LogActionExcludeNode struct{} + +// LogLegacyActionCancelRetriesV0 doesn't retry the trial if it fails. // //go:generate ../gen.sh -type LogActionCancelRetriesV0 struct { +type LogLegacyActionCancelRetriesV0 struct { // This comment is needed to stop ../gen.sh from complaining. } -// LogActionExcludeNodeV0 will exclude the node the log was seen on +// LogLegacyActionExcludeNodeV0 will exclude the node the log was seen on // (only for that trial) and reschedule. // //go:generate ../gen.sh -type LogActionExcludeNodeV0 struct { +type LogLegacyActionExcludeNodeV0 struct { // This comment is needed to stop ../gen.sh from complaining. } diff --git a/master/pkg/schemas/expconf/schema_test.go b/master/pkg/schemas/expconf/schema_test.go index 0bc9ebfca70..72b9dccf19a 100644 --- a/master/pkg/schemas/expconf/schema_test.go +++ b/master/pkg/schemas/expconf/schema_test.go @@ -13,6 +13,7 @@ import ( "github.com/pkg/errors" "github.com/santhosh-tekuri/jsonschema/v2" + tassert "github.com/stretchr/testify/assert" "gotest.tools/assert" "github.com/determined-ai/determined/master/pkg/schemas" @@ -21,17 +22,20 @@ import ( type JSON = interface{} type SchemaTestCase struct { - Name string `json:"name"` - SaneAs *[]string `json:"sane_as"` - CompleteAs *[]string `json:"complete_as"` - SanityErrors *map[string][]string `json:"sanity_errors"` - CompletenessErrors *map[string][]string `json:"completeness_errors"` - DefaultAs *string `json:"default_as"` - Defaulted *JSON `json:"defaulted"` - Case JSON `json:"case"` - MergeAs *string `json:"merge_as"` - MergeSrc *JSON `json:"merge_src"` - Merged *JSON `json:"merged"` + Name string `json:"name"` + SaneAs *[]string `json:"sane_as"` + CompleteAs *[]string `json:"complete_as"` + SanityErrors *map[string][]string `json:"sanity_errors"` + CompletenessErrors *map[string][]string `json:"completeness_errors"` + DefaultAs *string `json:"default_as"` + Defaulted *JSON `json:"defaulted"` + Case JSON `json:"case"` + MergeAs *string `json:"merge_as"` + MergeSrc *JSON `json:"merge_src"` + Merged *JSON `json:"merged"` + BackwardCompatibility *string `json:"backward_compatiblity"` + Legacy *JSON `json:"legacy"` + New *JSON `json:"new"` } func errorIn(expect string, errors []error) bool { @@ -288,7 +292,6 @@ func (tc SchemaTestCase) CheckMerged(t *testing.T) { assert.NilError(t, err) merged := schemas.Merge(obj, src) - // Compare json-to-json. mergedBytes, err := json.Marshal(merged) assert.NilError(t, err) @@ -337,6 +340,60 @@ func (tc SchemaTestCase) CheckRoundTrip(t *testing.T) { assert.DeepEqual(t, obj, cpy) } +func (tc SchemaTestCase) CheckBackwardCompatiblity(t *testing.T) { + if tc.BackwardCompatibility == nil && tc.Legacy == nil && tc.New == nil { + return + } + + if tc.BackwardCompatibility == nil || tc.Legacy == nil || tc.New == nil { + assert.NilError(t, errors.New( + "if any of backward_compatibility, legacy, or new are set in a test case, "+ + "they must all be set", + )) + } + + if *tc.BackwardCompatibility != "http://determined.ai/schemas/expconf/v0/log-policies.json" { + assert.NilError(t, errors.New("only log-policies.json is supported for now")) + } + + legacyBytes, err := json.Marshal(tc.Legacy) + assert.NilError(t, err) + newBytes, err := json.Marshal(tc.New) + assert.NilError(t, err) + + // Get an empty objects to unmarshal into. + legacy := LogPoliciesConfig{} + new := LogPoliciesConfig{} + + err = json.Unmarshal(legacyBytes, &legacy) + assert.NilError(t, err) + err = json.Unmarshal(newBytes, &new) + assert.NilError(t, err) + + legacyPatterns := make([]string, 0) + newPatternToActions := make(map[string][]LogAction) + newPatterns := make([]string, 0) + for _, lp := range new { + newPatternToActions[lp.Pattern()] = lp.Actions() + newPatterns = append(newPatterns, lp.Pattern()) + } + for _, lp := range legacy { + legacyPatterns = append(legacyPatterns, lp.Pattern()) + } + + assert := tassert.New(t) + assert.ElementsMatch(legacyPatterns, newPatterns, "", "legacy input was't translated to the expected output") + + for _, lp := range legacy { + pattern := lp.Pattern() + + legacyActions := lp.Actions() + newActions := newPatternToActions[pattern] + + assert.ElementsMatch(legacyActions, newActions, "legacy input was't translated to the expected output") + } +} + func RunCasesFile(t *testing.T, path string, displayPath string) { // Ignore the security error about including files as variables; this is just a test. byts, err := os.ReadFile(path) //nolint: gosec @@ -359,6 +416,7 @@ func RunCasesFile(t *testing.T, path string, displayPath string) { tc.CheckDefaulted(t) tc.CheckRoundTrip(t) tc.CheckMerged(t) + tc.CheckBackwardCompatiblity(t) }) } } diff --git a/master/pkg/schemas/expconf/zgen_experiment_config_v0.go b/master/pkg/schemas/expconf/zgen_experiment_config_v0.go index af2de47013c..f502fb04942 100644 --- a/master/pkg/schemas/expconf/zgen_experiment_config_v0.go +++ b/master/pkg/schemas/expconf/zgen_experiment_config_v0.go @@ -109,14 +109,11 @@ func (e *ExperimentConfigV0) SetLabels(val LabelsV0) { } func (e ExperimentConfigV0) LogPolicies() LogPoliciesConfigV0 { - if e.RawLogPolicies == nil { - panic("You must call WithDefaults on ExperimentConfigV0 before .LogPolicies") - } - return *e.RawLogPolicies + return e.RawLogPolicies } func (e *ExperimentConfigV0) SetLogPolicies(val LogPoliciesConfigV0) { - e.RawLogPolicies = &val + e.RawLogPolicies = val } func (e ExperimentConfigV0) RetentionPolicy() *RetentionPolicyConfigV0 { diff --git a/master/pkg/schemas/expconf/zgen_log_action_cancel_retries_v0.go b/master/pkg/schemas/expconf/zgen_log_action_cancel_retries_v0.go deleted file mode 100644 index c729c49a662..00000000000 --- a/master/pkg/schemas/expconf/zgen_log_action_cancel_retries_v0.go +++ /dev/null @@ -1,21 +0,0 @@ -// Code generated by gen.py. DO NOT EDIT. - -package expconf - -import ( - "github.com/santhosh-tekuri/jsonschema/v2" - - "github.com/determined-ai/determined/master/pkg/schemas" -) - -func (l LogActionCancelRetriesV0) ParsedSchema() interface{} { - return schemas.ParsedLogActionCancelRetriesV0() -} - -func (l LogActionCancelRetriesV0) SanityValidator() *jsonschema.Schema { - return schemas.GetSanityValidator("http://determined.ai/schemas/expconf/v0/log-action-cancel-retries.json") -} - -func (l LogActionCancelRetriesV0) CompletenessValidator() *jsonschema.Schema { - return schemas.GetCompletenessValidator("http://determined.ai/schemas/expconf/v0/log-action-cancel-retries.json") -} diff --git a/master/pkg/schemas/expconf/zgen_log_action_exclude_node_v0.go b/master/pkg/schemas/expconf/zgen_log_action_exclude_node_v0.go deleted file mode 100644 index 7c0063b06fe..00000000000 --- a/master/pkg/schemas/expconf/zgen_log_action_exclude_node_v0.go +++ /dev/null @@ -1,21 +0,0 @@ -// Code generated by gen.py. DO NOT EDIT. - -package expconf - -import ( - "github.com/santhosh-tekuri/jsonschema/v2" - - "github.com/determined-ai/determined/master/pkg/schemas" -) - -func (l LogActionExcludeNodeV0) ParsedSchema() interface{} { - return schemas.ParsedLogActionExcludeNodeV0() -} - -func (l LogActionExcludeNodeV0) SanityValidator() *jsonschema.Schema { - return schemas.GetSanityValidator("http://determined.ai/schemas/expconf/v0/log-action-exclude-node.json") -} - -func (l LogActionExcludeNodeV0) CompletenessValidator() *jsonschema.Schema { - return schemas.GetCompletenessValidator("http://determined.ai/schemas/expconf/v0/log-action-exclude-node.json") -} diff --git a/master/pkg/schemas/expconf/zgen_log_legacy_action_cancel_retries_v0.go b/master/pkg/schemas/expconf/zgen_log_legacy_action_cancel_retries_v0.go new file mode 100644 index 00000000000..e2feb5128cc --- /dev/null +++ b/master/pkg/schemas/expconf/zgen_log_legacy_action_cancel_retries_v0.go @@ -0,0 +1,21 @@ +// Code generated by gen.py. DO NOT EDIT. + +package expconf + +import ( + "github.com/santhosh-tekuri/jsonschema/v2" + + "github.com/determined-ai/determined/master/pkg/schemas" +) + +func (l LogLegacyActionCancelRetriesV0) ParsedSchema() interface{} { + return schemas.ParsedLogLegacyActionCancelRetriesV0() +} + +func (l LogLegacyActionCancelRetriesV0) SanityValidator() *jsonschema.Schema { + return schemas.GetSanityValidator("http://determined.ai/schemas/expconf/v0/log-legacy-action-cancel-retries.json") +} + +func (l LogLegacyActionCancelRetriesV0) CompletenessValidator() *jsonschema.Schema { + return schemas.GetCompletenessValidator("http://determined.ai/schemas/expconf/v0/log-legacy-action-cancel-retries.json") +} diff --git a/master/pkg/schemas/expconf/zgen_log_legacy_action_exclude_node_v0.go b/master/pkg/schemas/expconf/zgen_log_legacy_action_exclude_node_v0.go new file mode 100644 index 00000000000..6366f544a12 --- /dev/null +++ b/master/pkg/schemas/expconf/zgen_log_legacy_action_exclude_node_v0.go @@ -0,0 +1,21 @@ +// Code generated by gen.py. DO NOT EDIT. + +package expconf + +import ( + "github.com/santhosh-tekuri/jsonschema/v2" + + "github.com/determined-ai/determined/master/pkg/schemas" +) + +func (l LogLegacyActionExcludeNodeV0) ParsedSchema() interface{} { + return schemas.ParsedLogLegacyActionExcludeNodeV0() +} + +func (l LogLegacyActionExcludeNodeV0) SanityValidator() *jsonschema.Schema { + return schemas.GetSanityValidator("http://determined.ai/schemas/expconf/v0/log-legacy-action-exclude-node.json") +} + +func (l LogLegacyActionExcludeNodeV0) CompletenessValidator() *jsonschema.Schema { + return schemas.GetCompletenessValidator("http://determined.ai/schemas/expconf/v0/log-legacy-action-exclude-node.json") +} diff --git a/master/pkg/schemas/expconf/zgen_log_action_v0.go b/master/pkg/schemas/expconf/zgen_log_legacy_action_v0.go similarity index 56% rename from master/pkg/schemas/expconf/zgen_log_action_v0.go rename to master/pkg/schemas/expconf/zgen_log_legacy_action_v0.go index c74aa1384ae..a482b44a4d4 100644 --- a/master/pkg/schemas/expconf/zgen_log_action_v0.go +++ b/master/pkg/schemas/expconf/zgen_log_legacy_action_v0.go @@ -8,7 +8,7 @@ import ( "github.com/determined-ai/determined/master/pkg/schemas" ) -func (l LogActionV0) GetUnionMember() interface{} { +func (l LogLegacyActionV0) GetUnionMember() interface{} { if l.RawCancelRetries != nil { return *l.RawCancelRetries } @@ -18,14 +18,14 @@ func (l LogActionV0) GetUnionMember() interface{} { panic("no union member defined") } -func (l LogActionV0) ParsedSchema() interface{} { - return schemas.ParsedLogActionV0() +func (l LogLegacyActionV0) ParsedSchema() interface{} { + return schemas.ParsedLogLegacyActionV0() } -func (l LogActionV0) SanityValidator() *jsonschema.Schema { - return schemas.GetSanityValidator("http://determined.ai/schemas/expconf/v0/log-action.json") +func (l LogLegacyActionV0) SanityValidator() *jsonschema.Schema { + return schemas.GetSanityValidator("http://determined.ai/schemas/expconf/v0/log-legacy-action.json") } -func (l LogActionV0) CompletenessValidator() *jsonschema.Schema { - return schemas.GetCompletenessValidator("http://determined.ai/schemas/expconf/v0/log-action.json") +func (l LogLegacyActionV0) CompletenessValidator() *jsonschema.Schema { + return schemas.GetCompletenessValidator("http://determined.ai/schemas/expconf/v0/log-legacy-action.json") } diff --git a/master/pkg/schemas/expconf/zgen_log_policy_v0.go b/master/pkg/schemas/expconf/zgen_log_policy_v0.go index 09c66144644..4e775a05470 100644 --- a/master/pkg/schemas/expconf/zgen_log_policy_v0.go +++ b/master/pkg/schemas/expconf/zgen_log_policy_v0.go @@ -16,20 +16,20 @@ func (l *LogPolicyV0) SetPattern(val string) { l.RawPattern = val } -func (l LogPolicyV0) Actions() []LogActionV0 { - return l.RawActions +func (l LogPolicyV0) LegacyAction() *LogLegacyActionV0 { + return l.RawLegacyAction } -func (l *LogPolicyV0) SetActions(val []LogActionV0) { - l.RawActions = val +func (l *LogPolicyV0) SetLegacyAction(val *LogLegacyActionV0) { + l.RawLegacyAction = val } -func (l LogPolicyV0) Signal() *string { - return l.RawSignal +func (l LogPolicyV0) Actions() []LogActionV0 { + return l.RawActions } -func (l *LogPolicyV0) SetSignal(val *string) { - l.RawSignal = val +func (l *LogPolicyV0) SetActions(val []LogActionV0) { + l.RawActions = val } func (l LogPolicyV0) ParsedSchema() interface{} { diff --git a/master/pkg/schemas/extensions/disallow_properties.go b/master/pkg/schemas/extensions/disallow_properties.go index b2dc61af94f..76a65482500 100644 --- a/master/pkg/schemas/extensions/disallow_properties.go +++ b/master/pkg/schemas/extensions/disallow_properties.go @@ -1,5 +1,5 @@ // disallowProperties is for restricting which properties are allowed in an object with -// per-property, such as when we allow a k8s pod spec with some fields disallowed. +// per-property error messages, such as when we allow a k8s pod spec with some fields disallowed. // // Example: The "pod_spec" property of the environment config: // diff --git a/master/pkg/schemas/zgen_schemas.go b/master/pkg/schemas/zgen_schemas.go index 488a901272a..e5beb6fbd99 100644 --- a/master/pkg/schemas/zgen_schemas.go +++ b/master/pkg/schemas/zgen_schemas.go @@ -849,9 +849,7 @@ var ( "null" ], "default": [], - "items": { - "optionalref": "http://determined.ai/schemas/expconf/v0/log-policies.json" - } + "optionalRef": "http://determined.ai/schemas/expconf/v0/log-policies.json" }, "retention_policy": { "type": [ @@ -1496,10 +1494,39 @@ var ( } } `) - textLogActionCancelRetriesV0 = []byte(`{ + textLogActionV0 = []byte(`{ "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "http://determined.ai/schemas/expconf/v0/log-action-cancel-retries.json", - "title": "LogActionCancelRetries", + "$id": "http://determined.ai/schemas/expconf/v0/log-action.json", + "title": "LogAction", + "union": { + "defaultMessage": "expect one of the followings: cancel_retries, exclude_node, or an object with a single \"signal\" field", + "items": [ + { + "unionKey": "singleproperty:signal", + "properties": { + "signal": { + "type": "string" + } + }, + "type": "object", + "additionalProperties": false + }, + { + "unionKey": "never", + "const": "cancel_retries" + }, + { + "unionKey": "never", + "const": "exclude_node" + } + ] + } +} +`) + textLogLegacyActionCancelRetriesV0 = []byte(`{ + "$schema": "http://json-schema.org/draft-07/schema#", + "$id": "http://determined.ai/schemas/expconf/v0/log-legacy-action-cancel-retries.json", + "title": "LogLegacyActionCancelRetries", "additionalProperties": false, "type": "object", "required": [ @@ -1513,10 +1540,10 @@ var ( } } `) - textLogActionExcludeNodeV0 = []byte(`{ + textLogLegacyActionExcludeNodeV0 = []byte(`{ "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "http://determined.ai/schemas/expconf/v0/log-action-exclude-node.json", - "title": "LogActionExcludeNode", + "$id": "http://determined.ai/schemas/expconf/v0/log-legacy-action-exclude-node.json", + "title": "LogLegacyActionExcludeNode", "additionalProperties": false, "type": "object", "required": [ @@ -1530,11 +1557,10 @@ var ( } } `) - textLogActionV0 = []byte(`{ + textLogLegacyActionV0 = []byte(`{ "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "http://determined.ai/schemas/expconf/v0/log-action.json", - "title": "LogAction", - "$comment": "this is a union of all possible properties, with validation for the common properties", + "$id": "http://determined.ai/schemas/expconf/v0/log-legacy-action.json", + "title": "LogLegacyAction", "if": { "required": [ "type" @@ -1546,11 +1572,11 @@ var ( "items": [ { "unionKey": "const:type=cancel_retries", - "$ref": "http://determined.ai/schemas/expconf/v0/log-action-cancel-retries.json" + "$ref": "http://determined.ai/schemas/expconf/v0/log-legacy-action-cancel-retries.json" }, { "unionKey": "const:type=exclude_node", - "$ref": "http://determined.ai/schemas/expconf/v0/log-action-exclude-node.json" + "$ref": "http://determined.ai/schemas/expconf/v0/log-legacy-action-exclude-node.json" } ] } @@ -1579,17 +1605,18 @@ var ( "$id": "http://determined.ai/schemas/expconf/v0/log-policy.json", "title": "LogPolicy", "additionalProperties": false, - "required": [ - "pattern" - ], "type": "object", "properties": { "pattern": { + "$comment": "default doesn't apply here because pattern is a required field", "type": [ - "string" - ] + "string", + "null" + ], + "default": null }, "actions": { + "$comment": "\"default\": [] is for passing lint. The actual default can be found in const.go", "type": [ "array", "null" @@ -1597,26 +1624,30 @@ var ( "items": { "$ref": "http://determined.ai/schemas/expconf/v0/log-action.json" }, - "default": [] + "default": null }, - "signal": { + "action": { "type": [ - "string", + "object", "null" - ] + ], + "optionalRef": "http://determined.ai/schemas/expconf/v0/log-legacy-action.json", + "default": null } }, "checks": { - "one of or both of actions and signal must be set": { - "anyOf": [ + "both the depreated \"action\" field and the newer \"actions\" field are detected": { + "oneOf": [ { "required": [ + "pattern", "actions" ] }, { "required": [ - "signal" + "pattern", + "action" ] } ] @@ -3489,11 +3520,13 @@ var ( schemaLengthV0 interface{} - schemaLogActionCancelRetriesV0 interface{} + schemaLogActionV0 interface{} - schemaLogActionExcludeNodeV0 interface{} + schemaLogLegacyActionCancelRetriesV0 interface{} - schemaLogActionV0 interface{} + schemaLogLegacyActionExcludeNodeV0 interface{} + + schemaLogLegacyActionV0 interface{} schemaLogPoliciesConfigV0 interface{} @@ -4132,64 +4165,84 @@ func ParsedLengthV0() interface{} { return schemaLengthV0 } -func ParsedLogActionCancelRetriesV0() interface{} { +func ParsedLogActionV0() interface{} { cacheLock.RLock() - if schemaLogActionCancelRetriesV0 != nil { + if schemaLogActionV0 != nil { cacheLock.RUnlock() - return schemaLogActionCancelRetriesV0 + return schemaLogActionV0 } cacheLock.RUnlock() cacheLock.Lock() defer cacheLock.Unlock() - if schemaLogActionCancelRetriesV0 != nil { - return schemaLogActionCancelRetriesV0 + if schemaLogActionV0 != nil { + return schemaLogActionV0 } - err := json.Unmarshal(textLogActionCancelRetriesV0, &schemaLogActionCancelRetriesV0) + err := json.Unmarshal(textLogActionV0, &schemaLogActionV0) if err != nil { - panic("invalid embedded json for LogActionCancelRetriesV0") + panic("invalid embedded json for LogActionV0") } - return schemaLogActionCancelRetriesV0 + return schemaLogActionV0 } -func ParsedLogActionExcludeNodeV0() interface{} { +func ParsedLogLegacyActionCancelRetriesV0() interface{} { cacheLock.RLock() - if schemaLogActionExcludeNodeV0 != nil { + if schemaLogLegacyActionCancelRetriesV0 != nil { cacheLock.RUnlock() - return schemaLogActionExcludeNodeV0 + return schemaLogLegacyActionCancelRetriesV0 } cacheLock.RUnlock() cacheLock.Lock() defer cacheLock.Unlock() - if schemaLogActionExcludeNodeV0 != nil { - return schemaLogActionExcludeNodeV0 + if schemaLogLegacyActionCancelRetriesV0 != nil { + return schemaLogLegacyActionCancelRetriesV0 } - err := json.Unmarshal(textLogActionExcludeNodeV0, &schemaLogActionExcludeNodeV0) + err := json.Unmarshal(textLogLegacyActionCancelRetriesV0, &schemaLogLegacyActionCancelRetriesV0) if err != nil { - panic("invalid embedded json for LogActionExcludeNodeV0") + panic("invalid embedded json for LogLegacyActionCancelRetriesV0") } - return schemaLogActionExcludeNodeV0 + return schemaLogLegacyActionCancelRetriesV0 } -func ParsedLogActionV0() interface{} { +func ParsedLogLegacyActionExcludeNodeV0() interface{} { cacheLock.RLock() - if schemaLogActionV0 != nil { + if schemaLogLegacyActionExcludeNodeV0 != nil { cacheLock.RUnlock() - return schemaLogActionV0 + return schemaLogLegacyActionExcludeNodeV0 } cacheLock.RUnlock() cacheLock.Lock() defer cacheLock.Unlock() - if schemaLogActionV0 != nil { - return schemaLogActionV0 + if schemaLogLegacyActionExcludeNodeV0 != nil { + return schemaLogLegacyActionExcludeNodeV0 } - err := json.Unmarshal(textLogActionV0, &schemaLogActionV0) + err := json.Unmarshal(textLogLegacyActionExcludeNodeV0, &schemaLogLegacyActionExcludeNodeV0) if err != nil { - panic("invalid embedded json for LogActionV0") + panic("invalid embedded json for LogLegacyActionExcludeNodeV0") } - return schemaLogActionV0 + return schemaLogLegacyActionExcludeNodeV0 +} + +func ParsedLogLegacyActionV0() interface{} { + cacheLock.RLock() + if schemaLogLegacyActionV0 != nil { + cacheLock.RUnlock() + return schemaLogLegacyActionV0 + } + cacheLock.RUnlock() + + cacheLock.Lock() + defer cacheLock.Unlock() + if schemaLogLegacyActionV0 != nil { + return schemaLogLegacyActionV0 + } + err := json.Unmarshal(textLogLegacyActionV0, &schemaLogLegacyActionV0) + if err != nil { + panic("invalid embedded json for LogLegacyActionV0") + } + return schemaLogLegacyActionV0 } func ParsedLogPoliciesConfigV0() interface{} { @@ -4963,12 +5016,14 @@ func schemaBytesMap() map[string][]byte { cachedSchemaBytesMap[url] = textKerberosConfigV0 url = "http://determined.ai/schemas/expconf/v0/length.json" cachedSchemaBytesMap[url] = textLengthV0 - url = "http://determined.ai/schemas/expconf/v0/log-action-cancel-retries.json" - cachedSchemaBytesMap[url] = textLogActionCancelRetriesV0 - url = "http://determined.ai/schemas/expconf/v0/log-action-exclude-node.json" - cachedSchemaBytesMap[url] = textLogActionExcludeNodeV0 url = "http://determined.ai/schemas/expconf/v0/log-action.json" cachedSchemaBytesMap[url] = textLogActionV0 + url = "http://determined.ai/schemas/expconf/v0/log-legacy-action-cancel-retries.json" + cachedSchemaBytesMap[url] = textLogLegacyActionCancelRetriesV0 + url = "http://determined.ai/schemas/expconf/v0/log-legacy-action-exclude-node.json" + cachedSchemaBytesMap[url] = textLogLegacyActionExcludeNodeV0 + url = "http://determined.ai/schemas/expconf/v0/log-legacy-action.json" + cachedSchemaBytesMap[url] = textLogLegacyActionV0 url = "http://determined.ai/schemas/expconf/v0/log-policies.json" cachedSchemaBytesMap[url] = textLogPoliciesConfigV0 url = "http://determined.ai/schemas/expconf/v0/log-policy.json" diff --git a/schemas/expconf/v0/experiment.json b/schemas/expconf/v0/experiment.json index 24ab3135b3e..17a01f2d3da 100644 --- a/schemas/expconf/v0/experiment.json +++ b/schemas/expconf/v0/experiment.json @@ -122,9 +122,7 @@ "null" ], "default": [], - "items": { - "optionalref": "http://determined.ai/schemas/expconf/v0/log-policies.json" - } + "optionalRef": "http://determined.ai/schemas/expconf/v0/log-policies.json" }, "retention_policy": { "type": [ diff --git a/schemas/expconf/v0/log-action.json b/schemas/expconf/v0/log-action.json index 6dcabee6727..bb2d06fa3ef 100644 --- a/schemas/expconf/v0/log-action.json +++ b/schemas/expconf/v0/log-action.json @@ -2,32 +2,27 @@ "$schema": "http://json-schema.org/draft-07/schema#", "$id": "http://determined.ai/schemas/expconf/v0/log-action.json", "title": "LogAction", - "$comment": "this is a union of all possible properties, with validation for the common properties", - "if": { - "required": [ - "type" - ] - }, - "then": { - "union": { - "defaultMessage": "is not an object where object[\"type\"] is one of 'cancel_retries' or 'exclude_node'", - "items": [ - { - "unionKey": "const:type=cancel_retries", - "$ref": "http://determined.ai/schemas/expconf/v0/log-action-cancel-retries.json" + "union": { + "defaultMessage": "expect one of the followings: cancel_retries, exclude_node, or an object with a single \"signal\" field", + "items": [ + { + "unionKey": "singleproperty:signal", + "properties": { + "signal": { + "type": "string" + } }, - { - "unionKey": "const:type=exclude_node", - "$ref": "http://determined.ai/schemas/expconf/v0/log-action-exclude-node.json" - } - ] - } - }, - "additionalProperties": false, - "eventuallyRequired": [ - "type" - ], - "properties": { - "type": true + "type": "object", + "additionalProperties": false + }, + { + "unionKey": "never", + "const": "cancel_retries" + }, + { + "unionKey": "never", + "const": "exclude_node" + } + ] } } diff --git a/schemas/expconf/v0/log-action-cancel-retries.json b/schemas/expconf/v0/log-legacy-action-cancel-retries.json similarity index 67% rename from schemas/expconf/v0/log-action-cancel-retries.json rename to schemas/expconf/v0/log-legacy-action-cancel-retries.json index 57105184a0c..9f120b919df 100644 --- a/schemas/expconf/v0/log-action-cancel-retries.json +++ b/schemas/expconf/v0/log-legacy-action-cancel-retries.json @@ -1,7 +1,7 @@ { "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "http://determined.ai/schemas/expconf/v0/log-action-cancel-retries.json", - "title": "LogActionCancelRetries", + "$id": "http://determined.ai/schemas/expconf/v0/log-legacy-action-cancel-retries.json", + "title": "LogLegacyActionCancelRetries", "additionalProperties": false, "type": "object", "required": [ diff --git a/schemas/expconf/v0/log-action-exclude-node.json b/schemas/expconf/v0/log-legacy-action-exclude-node.json similarity index 67% rename from schemas/expconf/v0/log-action-exclude-node.json rename to schemas/expconf/v0/log-legacy-action-exclude-node.json index 4442a9fba5d..523736533f4 100644 --- a/schemas/expconf/v0/log-action-exclude-node.json +++ b/schemas/expconf/v0/log-legacy-action-exclude-node.json @@ -1,7 +1,7 @@ { "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "http://determined.ai/schemas/expconf/v0/log-action-exclude-node.json", - "title": "LogActionExcludeNode", + "$id": "http://determined.ai/schemas/expconf/v0/log-legacy-action-exclude-node.json", + "title": "LogLegacyActionExcludeNode", "additionalProperties": false, "type": "object", "required": [ diff --git a/schemas/expconf/v0/log-legacy-action.json b/schemas/expconf/v0/log-legacy-action.json new file mode 100644 index 00000000000..c4683a9d69d --- /dev/null +++ b/schemas/expconf/v0/log-legacy-action.json @@ -0,0 +1,32 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "$id": "http://determined.ai/schemas/expconf/v0/log-legacy-action.json", + "title": "LogLegacyAction", + "if": { + "required": [ + "type" + ] + }, + "then": { + "union": { + "defaultMessage": "is not an object where object[\"type\"] is one of 'cancel_retries' or 'exclude_node'", + "items": [ + { + "unionKey": "const:type=cancel_retries", + "$ref": "http://determined.ai/schemas/expconf/v0/log-legacy-action-cancel-retries.json" + }, + { + "unionKey": "const:type=exclude_node", + "$ref": "http://determined.ai/schemas/expconf/v0/log-legacy-action-exclude-node.json" + } + ] + } + }, + "additionalProperties": false, + "eventuallyRequired": [ + "type" + ], + "properties": { + "type": true + } +} diff --git a/schemas/expconf/v0/log-policy.json b/schemas/expconf/v0/log-policy.json index f5530c47ca9..fbabe397236 100644 --- a/schemas/expconf/v0/log-policy.json +++ b/schemas/expconf/v0/log-policy.json @@ -3,17 +3,18 @@ "$id": "http://determined.ai/schemas/expconf/v0/log-policy.json", "title": "LogPolicy", "additionalProperties": false, - "required": [ - "pattern" - ], "type": "object", "properties": { "pattern": { + "$comment": "default doesn't apply here because pattern is a required field", "type": [ - "string" - ] + "string", + "null" + ], + "default": null }, "actions": { + "$comment": "\"default\": [] is for passing lint. The actual default can be found in const.go", "type": [ "array", "null" @@ -21,26 +22,30 @@ "items": { "$ref": "http://determined.ai/schemas/expconf/v0/log-action.json" }, - "default": [] + "default": null }, - "signal": { + "action": { "type": [ - "string", + "object", "null" - ] + ], + "optionalRef": "http://determined.ai/schemas/expconf/v0/log-legacy-action.json", + "default": null } }, "checks": { - "one of or both of actions and signal must be set": { - "anyOf": [ + "both the depreated \"action\" field and the newer \"actions\" field are detected": { + "oneOf": [ { "required": [ + "pattern", "actions" ] }, { "required": [ - "signal" + "pattern", + "action" ] } ] diff --git a/schemas/test_cases/v0/backward-compatibility.yaml b/schemas/test_cases/v0/backward-compatibility.yaml new file mode 100644 index 00000000000..26a73184ed5 --- /dev/null +++ b/schemas/test_cases/v0/backward-compatibility.yaml @@ -0,0 +1,20 @@ +- name: recognize action field for backward compatibility + backward_compatiblity: http://determined.ai/schemas/expconf/v0/log-policies.json + legacy: + - pattern: a + action: + type: cancel_retries + - pattern: a + action: + type: exclude_node + - pattern: b + action: + type: exclude_node + new: + - pattern: a + actions: + - exclude_node + - cancel_retries + - pattern: b + actions: + - exclude_node diff --git a/schemas/test_cases/v0/defaults.yaml b/schemas/test_cases/v0/defaults.yaml index ff9c99177a2..29061df328c 100644 --- a/schemas/test_cases/v0/defaults.yaml +++ b/schemas/test_cases/v0/defaults.yaml @@ -225,6 +225,8 @@ case: [] defaulted: - pattern: .*uncorrectable ECC error encountered.* - signal: ECC Error + actions: + - signal: ECC Error - pattern: .*CUDA out of memory.* - signal: CUDA OOM + actions: + - signal: CUDA OOM diff --git a/schemas/test_cases/v0/experiment.yaml b/schemas/test_cases/v0/experiment.yaml index e08fbbb7866..4d63e341890 100644 --- a/schemas/test_cases/v0/experiment.yaml +++ b/schemas/test_cases/v0/experiment.yaml @@ -95,9 +95,11 @@ labels: [] log_policies: - pattern: .*uncorrectable ECC error encountered.* - signal: ECC Error + actions: + - signal: ECC Error - pattern: .*CUDA out of memory.* - signal: CUDA OOM + actions: + - signal: CUDA OOM max_restarts: 5 min_validation_period: batches: 0 @@ -202,9 +204,11 @@ hyperparameters: {} log_policies: - pattern: '*' - signal: '*' + actions: + - signal: '*' - pattern: '*' - signal: '*' + actions: + - signal: '*' labels: [] max_restarts: 5 min_checkpoint_period: diff --git a/schemas/test_cases/v0/merging.yaml b/schemas/test_cases/v0/merging.yaml index cbc8f58f71c..e172e45aee5 100644 --- a/schemas/test_cases/v0/merging.yaml +++ b/schemas/test_cases/v0/merging.yaml @@ -199,47 +199,47 @@ case: - pattern: a actions: - - type: cancel_retries + - cancel_retries merge_src: - pattern: a actions: - - type: exclude_node + - exclude_node merged: - pattern: a actions: - - type: exclude_node - - type: cancel_retries + - exclude_node + - cancel_retries - name: no duplicated actions when merged merge_as: http://determined.ai/schemas/expconf/v0/log-policies.json case: - pattern: a actions: - - type: cancel_retries - - type: exclude_node + - cancel_retries + - exclude_node merge_src: - pattern: a actions: - - type: exclude_node + - exclude_node merged: - pattern: a actions: - - type: exclude_node - - type: cancel_retries + - exclude_node + - cancel_retries - name: no duplicated actions when merged 2 merge_as: http://determined.ai/schemas/expconf/v0/log-policies.json case: - pattern: a actions: - - type: exclude_node + - exclude_node merge_src: - pattern: a actions: - - type: cancel_retries - - type: exclude_node + - cancel_retries + - exclude_node merged: - pattern: a actions: - - type: cancel_retries - - type: exclude_node + - cancel_retries + - exclude_node diff --git a/schemas/test_cases/v0/misc.yaml b/schemas/test_cases/v0/misc.yaml index 0dbfef58eea..38002ae7262 100644 --- a/schemas/test_cases/v0/misc.yaml +++ b/schemas/test_cases/v0/misc.yaml @@ -332,3 +332,50 @@ case: shm_size: 1 i +- name: valid log policy + sane_as: + - http://determined.ai/schemas/expconf/v0/log-policy.json + case: + pattern: a + actions: + - signal: sig + - cancel_retries + - exclude_node + +- name: log policy is invalid with both action and actions + sanity_errors: + http://determined.ai/schemas/expconf/v0/log-policy.json: + - ": both the depreated \"action\" field and the newer \"actions\" field are detected" + case: + pattern: a + actions: + - signal: sig + action: + type: cancel_retries + +- name: invalid log policy 1 + sanity_errors: + http://determined.ai/schemas/expconf/v0/log-policy.json: + - "expect one of the followings: cancel_retries, exclude_node, or an object with a single \"signal\" field" + case: + pattern: a + actions: + - notallowedstring + +- name: invalid log policy 2 + sanity_errors: + http://determined.ai/schemas/expconf/v0/log-policy.json: + - "expect one of the followings: cancel_retries, exclude_node, or an object with a single \"signal\" field" + case: + pattern: a + actions: + - notallowedstring + +- name: invalid log policy 3 + sanity_errors: + http://determined.ai/schemas/expconf/v0/log-policy.json: + - "expect one of the followings: cancel_retries, exclude_node, or an object with a single \"signal\" field" + case: + pattern: a + actions: + - 3 diff --git a/schemas/test_cases/v0/unions.yaml b/schemas/test_cases/v0/unions.yaml index 0719defdf63..eab13af4e3e 100644 --- a/schemas/test_cases/v0/unions.yaml +++ b/schemas/test_cases/v0/unions.yaml @@ -63,15 +63,13 @@ - name: log action cancel_retries sane_as: - - http://determined.ai/schemas/expconf/v0/log-action-cancel-retries.json - - http://determined.ai/schemas/expconf/v0/log-action.json + - http://determined.ai/schemas/expconf/v0/log-legacy-action-cancel-retries.json case: type: cancel_retries - name: log action exclude_node sane_as: - - http://determined.ai/schemas/expconf/v0/log-action-exclude-node.json - - http://determined.ai/schemas/expconf/v0/log-action.json + - http://determined.ai/schemas/expconf/v0/log-legacy-action-exclude-node.json case: type: exclude_node From 2c69c90107524f31378bf6ef427f27aee20658de Mon Sep 17 00:00:00 2001 From: Jerry Gong Date: Thu, 17 Oct 2024 09:41:17 -0400 Subject: [PATCH 11/27] Address comment --- master/pkg/model/task_container_defaults.go | 13 +++++-------- master/pkg/schemas/expconf/log_pattern_config.go | 6 +++--- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/master/pkg/model/task_container_defaults.go b/master/pkg/model/task_container_defaults.go index 9c44bab5f6a..ed89cd46c23 100644 --- a/master/pkg/model/task_container_defaults.go +++ b/master/pkg/model/task_container_defaults.go @@ -49,7 +49,7 @@ type TaskContainerDefaultsConfig struct { StartupHook string `json:"startup_hook"` - LogPolicies *expconf.LogPoliciesConfig `json:"log_policies"` + LogPolicies expconf.LogPoliciesConfig `json:"log_policies"` PreemptionTimeout int `json:"preemption_timeout,omitempty"` @@ -319,13 +319,10 @@ func (c TaskContainerDefaultsConfig) Merge( res.Pbs.SetSbatchArgs(tmp) } - if other.LogPolicies != nil { - if res.LogPolicies == nil { - res.LogPolicies = other.LogPolicies - } else { - logPolicies := res.LogPolicies.Merge(*other.LogPolicies) - res.LogPolicies = &logPolicies - } + if res.LogPolicies == nil { + res.LogPolicies = other.LogPolicies + } else { + res.LogPolicies = res.LogPolicies.Merge(other.LogPolicies) } if other.PreemptionTimeout > 0 { diff --git a/master/pkg/schemas/expconf/log_pattern_config.go b/master/pkg/schemas/expconf/log_pattern_config.go index 91711e434c2..ea89ac366d6 100644 --- a/master/pkg/schemas/expconf/log_pattern_config.go +++ b/master/pkg/schemas/expconf/log_pattern_config.go @@ -22,14 +22,14 @@ const ( type LogPoliciesConfigV0 []LogPolicyV0 // WithDefaults implements the Defaultable psuedointerface. -func (b *LogPoliciesConfigV0) WithDefaults() *LogPoliciesConfigV0 { +func (b LogPoliciesConfigV0) WithDefaults() LogPoliciesConfigV0 { eccErrorPattern := ECCErrorPattern eccErrorSignal := ECCErrorSignal cudaOomPattern := CUDAOOMPattern cudaOomSignal := CUDAOOMSignal - if b != nil && len(*b) == 0 { - return &LogPoliciesConfigV0{ + if b != nil && len(b) == 0 { + return LogPoliciesConfigV0{ LogPolicyV0{RawPattern: eccErrorPattern, RawActions: []LogActionV0{{Signal: &eccErrorSignal}}}, LogPolicyV0{RawPattern: cudaOomPattern, RawActions: []LogActionV0{{Signal: &cudaOomSignal}}}, } From e984fc214f440179eab4ed661616188f9cde2bcf Mon Sep 17 00:00:00 2001 From: Ryan Date: Thu, 17 Oct 2024 11:13:17 -0600 Subject: [PATCH 12/27] for jerry --- .../schemas/expconf/experiment_config_test.go | 26 ++++ .../pkg/schemas/expconf/log_pattern_config.go | 141 +++++++----------- 2 files changed, 80 insertions(+), 87 deletions(-) diff --git a/master/pkg/schemas/expconf/experiment_config_test.go b/master/pkg/schemas/expconf/experiment_config_test.go index a69ff5adc0c..e33b078d18d 100644 --- a/master/pkg/schemas/expconf/experiment_config_test.go +++ b/master/pkg/schemas/expconf/experiment_config_test.go @@ -4,6 +4,7 @@ package expconf import ( "encoding/json" "testing" + "fmt" "github.com/stretchr/testify/require" "gotest.tools/assert" @@ -116,3 +117,28 @@ func TestName(t *testing.T) { assert.DeepEqual(t, newConfig.Name().String(), "my_name") } + +func TestLogPolicies(t *testing.T) { + newConfig := ExperimentConfig{} + bytes := []byte(`{ + "log_policies": [ + ] + }`) +/* + { + "pattern": "x", + "actions": [ + "cancel_retries" + ] + } +*/ + err := json.Unmarshal(bytes, &newConfig) + assert.NilError(t, err) + panic( + fmt.Sprintf( + "newConfig.RawLogPolicies: (nil?%v) %v\n", + newConfig.RawLogPolicies == nil, + newConfig.RawLogPolicies, + ), + ) +} diff --git a/master/pkg/schemas/expconf/log_pattern_config.go b/master/pkg/schemas/expconf/log_pattern_config.go index ea89ac366d6..d06bb0ff9f0 100644 --- a/master/pkg/schemas/expconf/log_pattern_config.go +++ b/master/pkg/schemas/expconf/log_pattern_config.go @@ -37,7 +37,7 @@ func (b LogPoliciesConfigV0) WithDefaults() LogPoliciesConfigV0 { return b } -// Merge implemenets the mergable interface. +// Merge implements the Mergable psuedo-interface. func (b LogPoliciesConfigV0) Merge( other LogPoliciesConfigV0, ) LogPoliciesConfigV0 { @@ -150,80 +150,67 @@ type LogPolicyV0 struct { // UnmarshalJSON implements the json.Unmarshaler interface. func (b *LogPolicyV0) UnmarshalJSON(data []byte) error { - type DefaultParser LogPolicyV0 - var jsonItems DefaultParser - if err := json.Unmarshal(data, &jsonItems); err != nil { - return fmt.Errorf("failed to parse log policy: %w", err) + // First, check if we can unmarshal the log policy item as a legacy item. + type LegacyCancelRetries struct {} + + type LegacyExcludeNode struct {} + + type LegacyAction struct { + CancelRetries *LegacyCancelRetries `union:"type,cancel_retries" json:"-"` + ExcludeNode *LegacyExcludeNode `union:"type,exclude_node" json:"-"` } - b.RawPattern = jsonItems.RawPattern - - // Backward compatiblity. Move data in the legacy action field to the newer actions field. - if jsonItems.RawLegacyAction != nil && jsonItems.RawActions == nil { - if jsonItems.RawLegacyAction.RawCancelRetries != nil { - b.RawActions = []LogActionV0{{CancelRetries: &LogActionCancelRetries{}}} - } else if jsonItems.RawLegacyAction.RawExcludeNode != nil { - b.RawActions = []LogActionV0{{ExcludeNode: &LogActionExcludeNode{}}} + + type LegacyPolicy struct { + Pattern string `json:"pattern"` + Action *LegacyAction `json:"action"` + } + + var legacy LegacyPolicy + err := json.Unmarshal(data, &legacy) + // For this to be a valid LegacyPolicy, the Action must have been provided. + if err == nil && legacy.Action != nil { + // Apply shim to bring legacy policy into the current format. + var action LogActionV0 + if legacy.Action.CancelRetries { + action = LogActionV0{Type: LogActionTypeCancelRetries} } else { - return fmt.Errorf("invalid legacy log action: %+v", jsonItems.RawLegacyAction) + action = LogActionV0{Type: LogActionTypeExcludeNode} } - b.RawLegacyAction = nil - } else if jsonItems.RawLegacyAction == nil && jsonItems.RawActions != nil { - b.RawActions = jsonItems.RawActions - } else { - return fmt.Errorf("invalid log policy: %+v", jsonItems) + *b = LogPolicyV0{ + RawPattern: legacy.Pattern, + RawActions: []LogActionV0{action}, + } + return nil } + // Shimming is not necessary. + type DefaultParser *LogPolicyV0 + if err := json.Unmarshal(data, DefaultParser(b)); err != nil { + return fmt.Errorf("failed to parse LogPolicyV0: %w", err) + } return nil } -// LogActionV0 is a policy to take after matching. -// -//go:generate ../gen.sh -type LogLegacyActionV0 struct { - RawCancelRetries *LogLegacyActionCancelRetriesV0 `union:"type,cancel_retries" json:"-"` - RawExcludeNode *LogLegacyActionExcludeNodeV0 `union:"type,exclude_node" json:"-"` -} +type LogActionType string -// MarshalJSON implements the json.Marshaler interface. -func (s LogLegacyActionV0) MarshalJSON() ([]byte, error) { - return union.MarshalEx(s, true) -} - -// UnmarshalJSON implements the json.Unmarshaler interface. -func (s *LogLegacyActionV0) UnmarshalJSON(data []byte) error { - if err := union.Unmarshal(data, s); err != nil { - return err - } - type DefaultParser *LogLegacyActionV0 - if err := json.Unmarshal(data, DefaultParser(s)); err != nil { - return fmt.Errorf("failed to parse LogLegacyActionV0: %w", err) - } - return nil -} +const LogActionTypeCancelRetries LogActionType = "cancel_retries" +const LogActionTypeExcludeNodes LogActionType = "exclude_nodes" +const LogActionTypeSignal LogActionType = "signal" // LogActionV0 is a policy to take after matching. type LogActionV0 struct { - Signal *string - CancelRetries *LogActionCancelRetries - ExcludeNode *LogActionExcludeNode -} + Type LogActionType -func (l LogActionV0) GetUnionMember() interface{} { - if l.Signal != nil { - return *l.Signal - } else if l.CancelRetries != nil { - return *l.CancelRetries - } else if l.ExcludeNode != nil { - return *l.ExcludeNode - } - panic("no union member defined") + // Only used by the "signal" action. + Signal string } // MarshalJSON implements the json.Marshaler interface. func (s LogActionV0) MarshalJSON() ([]byte, error) { + // XXX case statement here if s.Signal != nil { return json.Marshal(struct { - Signal string `json:"signal,omitempty"` + Signal string `json:"signal"` }{Signal: *s.Signal}) } else if s.CancelRetries != nil { return json.Marshal(cancelRetries) @@ -238,44 +225,24 @@ func (s *LogActionV0) UnmarshalJSON(data []byte) error { var action string if err := json.Unmarshal(data, &action); err == nil { switch action { - case cancelRetries: - s.CancelRetries = &LogActionCancelRetries{} - case excludeNode: - s.ExcludeNode = &LogActionExcludeNode{} + case LogActionTypeCancelRetries: + *s = LogActionV0{Type: LogActionTypeCancelRetries} + return nil + case LogActionTypeExcludeNode: + *s = LogActionV0{Type: LogActionTypeExcludeNode} + return nil default: return fmt.Errorf("invalid log action: %v", action) } } // Handle Signal - out := struct { - Signal *string `json:"signal,omitempty"` + temp := struct { + Signal *string `json:"signal"` }{} - if err := json.Unmarshal(data, &out); err == nil { - s.Signal = out.Signal + if err := json.Unmarshal(data, &temp); err != nil { + return fmt.Errorf("failed to unmarshal log action: %q", string(data)) } - + *s = LogActionV0{Type: LogActionTypeSignal, Signal: out.Signal} return nil } - -// LogActionCancelRetries doesn't retry the trial if it fails. -type LogActionCancelRetries struct{} - -// LogActionExcludeNodeV0 will exclude the node the log was seen on -// (only for that trial) and reschedule. -type LogActionExcludeNode struct{} - -// LogLegacyActionCancelRetriesV0 doesn't retry the trial if it fails. -// -//go:generate ../gen.sh -type LogLegacyActionCancelRetriesV0 struct { - // This comment is needed to stop ../gen.sh from complaining. -} - -// LogLegacyActionExcludeNodeV0 will exclude the node the log was seen on -// (only for that trial) and reschedule. -// -//go:generate ../gen.sh -type LogLegacyActionExcludeNodeV0 struct { - // This comment is needed to stop ../gen.sh from complaining. -} From e6950225c6713393c3e5b26793b335d2758ffc4f Mon Sep 17 00:00:00 2001 From: Jerry Gong Date: Fri, 18 Oct 2024 13:22:32 -0400 Subject: [PATCH 13/27] Address comments --- .../db/postgres_experiments_intg_test.go | 26 +- master/internal/logpattern/logpattern.go | 8 +- .../pkg/schemas/expconf/log_pattern_config.go | 275 +++++++++++------- master/pkg/schemas/expconf/schema_test.go | 3 + ...gen_log_legacy_action_cancel_retries_v0.go | 21 -- .../zgen_log_legacy_action_exclude_node_v0.go | 21 -- .../expconf/zgen_log_legacy_action_v0.go | 31 -- .../pkg/schemas/expconf/zgen_log_policy_v0.go | 12 +- master/pkg/schemas/zgen_schemas.go | 1 + master/pkg/union/unmarshal.go | 2 + schemas/expconf/v0/experiment.json | 1 + schemas/test_cases/v0/defaults.yaml | 14 - schemas/test_cases/v0/experiment.yaml | 8 +- schemas/test_cases/v0/log_policies.yaml | 139 +++++++++ schemas/test_cases/v0/merging.yaml | 50 ---- schemas/test_cases/v0/misc.yaml | 48 --- schemas/test_cases/v0/unions.yaml | 12 - 17 files changed, 346 insertions(+), 326 deletions(-) delete mode 100644 master/pkg/schemas/expconf/zgen_log_legacy_action_cancel_retries_v0.go delete mode 100644 master/pkg/schemas/expconf/zgen_log_legacy_action_exclude_node_v0.go delete mode 100644 master/pkg/schemas/expconf/zgen_log_legacy_action_v0.go create mode 100644 schemas/test_cases/v0/log_policies.yaml diff --git a/master/internal/db/postgres_experiments_intg_test.go b/master/internal/db/postgres_experiments_intg_test.go index 4b81bad2f64..ef1a7157cd5 100644 --- a/master/internal/db/postgres_experiments_intg_test.go +++ b/master/internal/db/postgres_experiments_intg_test.go @@ -439,21 +439,29 @@ func TestActiveLogPatternPolicies(t *testing.T) { eccErrorSignal := "ECC Error" cudaOOMSignal := "CUDA OOM" expected := expconf.LogPoliciesConfig{ - expconf.LogPolicy{RawPattern: ".*uncorrectable ECC error encountered.*", RawSignal: &eccErrorSignal}, - expconf.LogPolicy{RawPattern: ".*CUDA out of memory.*", RawSignal: &cudaOOMSignal}, + expconf.LogPolicy{ + RawPattern: ".*uncorrectable ECC error encountered.*", + RawActions: expconf.LogActionsV0{expconf.LogActionV0{Type: expconf.LogActionTypeSignal, Signal: &eccErrorSignal}}, + }, + expconf.LogPolicy{ + RawPattern: ".*CUDA out of memory.*", + RawActions: expconf.LogActionsV0{expconf.LogActionV0{Type: expconf.LogActionTypeSignal, Signal: &cudaOOMSignal}}, + }, } require.Equal(t, expected, policies) activeConfig, err := db.ActiveExperimentConfig(exp.ID) require.NoError(t, err) - activeConfig.RawLogPolicies = &expconf.LogPoliciesConfig{ - expconf.LogPolicy{RawPattern: "sub", RawActions: []expconf.LogAction{{ - RawCancelRetries: &expconf.LogActionCancelRetries{}, - }}}, - expconf.LogPolicy{RawPattern: `\d{5}$`, RawActions: []expconf.LogAction{{ - RawExcludeNode: &expconf.LogActionExcludeNode{}, - }}}, + activeConfig.RawLogPolicies = expconf.LogPoliciesConfig{ + expconf.LogPolicy{ + RawPattern: "sub", + RawActions: expconf.LogActionsV0{expconf.LogActionV0{Type: expconf.LogActionTypeCancelRetries}}, + }, + expconf.LogPolicy{ + RawPattern: `\d{5}$`, + RawActions: expconf.LogActionsV0{expconf.LogActionV0{Type: expconf.LogActionTypeExcludeNode}}, + }, } v, err := json.Marshal(activeConfig) diff --git a/master/internal/logpattern/logpattern.go b/master/internal/logpattern/logpattern.go index c8f84d4ac0d..d400e1dbedc 100644 --- a/master/internal/logpattern/logpattern.go +++ b/master/internal/logpattern/logpattern.go @@ -78,21 +78,21 @@ func (l *LogPatternPolicies) monitor(ctx context.Context, if compiledRegex.MatchString(log.Log) { if actions := policy.Actions(); len(actions) > 0 { for _, a := range actions { - switch a.GetUnionMember().(type) { - case expconf.LogActionCancelRetries: + switch a.Type { + case expconf.LogActionTypeCancelRetries: if err := addDontRetry( ctx, model.TaskID(log.TaskID), *log.AgentID, policy.Pattern(), log.Log, ); err != nil { return fmt.Errorf("adding don't retry: %w", err) } - case expconf.LogActionExcludeNode: + case expconf.LogActionTypeExcludeNode: if err := addRetryOnDifferentNode( ctx, model.TaskID(log.TaskID), *log.AgentID, policy.Pattern(), log.Log, ); err != nil { return fmt.Errorf("adding retry on different node: %w", err) } - case string: + case expconf.LogActionTypeSignal: signal := a.Signal if signal != nil { err = db.Bun().RunInTx(ctx, nil, func(ctx context.Context, tx bun.Tx) error { diff --git a/master/pkg/schemas/expconf/log_pattern_config.go b/master/pkg/schemas/expconf/log_pattern_config.go index d06bb0ff9f0..fb9e9c1a6e6 100644 --- a/master/pkg/schemas/expconf/log_pattern_config.go +++ b/master/pkg/schemas/expconf/log_pattern_config.go @@ -3,17 +3,12 @@ package expconf import ( "encoding/json" "fmt" + "sort" "github.com/pkg/errors" "github.com/determined-ai/determined/master/pkg/schemas" "github.com/determined-ai/determined/master/pkg/set" - "github.com/determined-ai/determined/master/pkg/union" -) - -const ( - cancelRetries = "cancel_retries" - excludeNode = "exclude_node" ) // LogPoliciesConfigV0 is a list of log policies. @@ -23,73 +18,56 @@ type LogPoliciesConfigV0 []LogPolicyV0 // WithDefaults implements the Defaultable psuedointerface. func (b LogPoliciesConfigV0) WithDefaults() LogPoliciesConfigV0 { - eccErrorPattern := ECCErrorPattern - eccErrorSignal := ECCErrorSignal cudaOomPattern := CUDAOOMPattern cudaOomSignal := CUDAOOMSignal + eccErrorPattern := ECCErrorPattern + eccErrorSignal := ECCErrorSignal - if b != nil && len(b) == 0 { + if b == nil { return LogPoliciesConfigV0{ - LogPolicyV0{RawPattern: eccErrorPattern, RawActions: []LogActionV0{{Signal: &eccErrorSignal}}}, - LogPolicyV0{RawPattern: cudaOomPattern, RawActions: []LogActionV0{{Signal: &cudaOomSignal}}}, + LogPolicyV0{RawPattern: cudaOomPattern, RawActions: []LogActionV0{{Type: LogActionTypeSignal, Signal: &cudaOomSignal}}}, + LogPolicyV0{RawPattern: eccErrorPattern, RawActions: []LogActionV0{{Type: LogActionTypeSignal, Signal: &eccErrorSignal}}}, } } return b } // Merge implements the Mergable psuedo-interface. +// We appends all LogPolicyV0s to the output slice, but if there are any with the same pattern, we merge +// their actions and save them as one LogPolicyV0. func (b LogPoliciesConfigV0) Merge( other LogPoliciesConfigV0, ) LogPoliciesConfigV0 { var out LogPoliciesConfigV0 - patternToLp := make(map[string]LogPolicyV0) - for _, lp := range other { - patternToLp[lp.RawPattern] = lp - } - + patternTosrcLp := make(map[string]LogPolicyV0) for _, lp := range b { - if v, ok := patternToLp[lp.RawPattern]; ok { - // Union merge all actions except signal - actions := set.New[LogActionV0]() - var signal *LogActionV0 - for _, a := range patternToLp[lp.RawPattern].RawActions { - if a.Signal != nil { - signal = &a - continue - } - actions.Insert(a) - } - var otherSignal *LogActionV0 - for _, a := range lp.RawActions { - if a.Signal != nil { - otherSignal = &a - continue - } - if !actions.Contains(a) { - v.RawActions = append(v.RawActions, a) - } - } + patternTosrcLp[lp.RawPattern] = lp + } - // Other signal takes precedence - if otherSignal != nil || signal != nil { - v.RawActions = append(v.RawActions, *schemas.Merge(otherSignal, signal)) + for _, otherLp := range other { + pattern := otherLp.RawPattern + if srcLp, ok := patternTosrcLp[pattern]; ok { + // Merge actions of two LogPolicies if they have the same pattern. + patternTosrcLp[pattern] = LogPolicyV0{ + RawPattern: pattern, + RawActions: srcLp.RawActions.merge(otherLp.RawActions), } - - patternToLp[lp.RawPattern] = v } else { - patternToLp[lp.RawPattern] = lp + // Source LogPoliciesConfig doesn't have this pattern. + patternTosrcLp[pattern] = otherLp } } - for _, p := range patternToLp { - out = append(out, p) + for _, lp := range patternTosrcLp { + out = append(out, lp) } + out.sort() return out } // UnmarshalJSON implements the json.Unmarshaler interface. -// The legacy log policy config allows entries with duplicated pattern because only single action +// The legacy log policies config allows entries with duplicated pattern because only single action // can be associated with a pattern. For example: // // - pattern: a @@ -97,84 +75,120 @@ func (b LogPoliciesConfigV0) Merge( // - pattern: a // action: exclude_node // -// An actions field is available now. Multiple entires can be associated with a pattern. No more -// duplicated pattern in log policies config. For example: +// All legacy policies become mordern policies after shimming: +// - pattern: a +// actions: [cancel_retries] +// - pattern: a +// actions: [exclude_node] +// +// The modern log policy has an actions field. Multiple entires can be associated with a pattern. No more +// duplicated pattern in log policies config. For example the policies above will be combined into: // // - pattern: a // actions: // - cancel_retries // - exclude_node func (b *LogPoliciesConfigV0) UnmarshalJSON(data []byte) error { + // jsonItems may have duplicated patterns after applying shim to the legacy policy. type DefaultParser LogPoliciesConfigV0 var jsonItems DefaultParser if err := json.Unmarshal(data, &jsonItems); err != nil { return errors.Wrapf(err, "failed to parse runtime items") } + // By distinguishing [] and nil input, user can override the default log policies and get empty. + // log policies. If a user provides [], the default values won't be applied. + if jsonItems == nil { + return nil + } else if len(jsonItems) == 0 { + *b = make([]LogPolicyV0, 0) + return nil + } - // Merge LogPolicyV0s with the same pattern into one - patternToActions := make(map[string]set.Set[LogActionV0]) + // Merge LogPolicyV0s with the same pattern into one. + patternToLp := make(map[string]LogPolicyV0) for _, jsonItem := range jsonItems { - if jsonItem.RawLegacyAction != nil { - return fmt.Errorf("legacy action field expected to be nil: %+v", jsonItem) - } - - if _, ok := patternToActions[jsonItem.RawPattern]; !ok { - patternToActions[jsonItem.RawPattern] = set.New[LogActionV0]() + pattern := jsonItem.RawPattern + if _, ok := patternToLp[pattern]; !ok { + patternToLp[pattern] = jsonItem + continue } - for _, a := range jsonItem.RawActions { - actions := patternToActions[jsonItem.RawPattern] - actions.Insert(a) - } + mergedActions := patternToLp[pattern].RawActions.merge(jsonItem.RawActions) + patternToLp[pattern] = LogPolicyV0{RawPattern: pattern, RawActions: mergedActions} } - for p, actions := range patternToActions { - lp := LogPolicyV0{ - RawPattern: p, - RawActions: actions.ToSlice(), - } - *b = append(*b, lp) + var temp LogPoliciesConfigV0 + for _, lp := range patternToLp { + temp = append(temp, lp) } + LogPoliciesConfigV0(temp).sort() + *b = temp return nil } +// Sort LogPolicyV0 by pattern so the output is in deterministic state. Testing will be easier. +func (b LogPoliciesConfigV0) sort() { + sort.Slice(b, func(i, j int) bool { + return b[i].RawPattern < b[j].RawPattern + }) +} + +type LogActionsV0 []LogActionV0 + // LogPolicyV0 is an action to take if we match against trial logs. // //go:generate ../gen.sh type LogPolicyV0 struct { - RawPattern string `json:"pattern"` - RawLegacyAction *LogLegacyActionV0 `json:"action,omitempty"` - RawActions []LogActionV0 `json:"actions,omitempty"` + RawPattern string `json:"pattern"` + RawActions LogActionsV0 `json:"actions,omitempty"` } // UnmarshalJSON implements the json.Unmarshaler interface. +// It applies shim to the legacy policy. +// For the modern policy, if a user provides multiple LogActionTypeSignal, only +// the last one will be stored. As for the other action types, the unique ones +// will be stored. For example: +// +// actions: +// - cancel_retries +// - exclude_node +// - cancel_retries +// - signal: a +// - signal: b +// +// We store: +// +// []RawActionV0{ +// {Type: LogActionTypeCancelRetries}, +// {Type: LogActionTypeExcludeNode}, +// {Type: LogActionTypeSignal, Signal: "b"} +// } func (b *LogPolicyV0) UnmarshalJSON(data []byte) error { // First, check if we can unmarshal the log policy item as a legacy item. - type LegacyCancelRetries struct {} - - type LegacyExcludeNode struct {} - type LegacyAction struct { - CancelRetries *LegacyCancelRetries `union:"type,cancel_retries" json:"-"` - ExcludeNode *LegacyExcludeNode `union:"type,exclude_node" json:"-"` + Type LogActionType `json:"type"` } type LegacyPolicy struct { Pattern string `json:"pattern"` - Action *LegacyAction `json:"action"` + Action *LegacyAction `json:"action"` } var legacy LegacyPolicy err := json.Unmarshal(data, &legacy) + // For this to be a valid LegacyPolicy, the Action must have been provided. if err == nil && legacy.Action != nil { // Apply shim to bring legacy policy into the current format. var action LogActionV0 - if legacy.Action.CancelRetries { + switch legacy.Action.Type { + case LogActionTypeCancelRetries: action = LogActionV0{Type: LogActionTypeCancelRetries} - } else { + case LogActionTypeExcludeNode: action = LogActionV0{Type: LogActionTypeExcludeNode} + default: + return fmt.Errorf("unregonized legacy action type: %s, data: %q", legacy.Action.Type, string(data)) } *b = LogPolicyV0{ RawPattern: legacy.Pattern, @@ -182,40 +196,102 @@ func (b *LogPolicyV0) UnmarshalJSON(data []byte) error { } return nil } - - // Shimming is not necessary. + // Modern policy doesn't need shimming. type DefaultParser *LogPolicyV0 - if err := json.Unmarshal(data, DefaultParser(b)); err != nil { - return fmt.Errorf("failed to parse LogPolicyV0: %w", err) + var lp LogPolicyV0 + if err := json.Unmarshal(data, DefaultParser(&lp)); err != nil { + return fmt.Errorf("failed to parse LogPolicyV0: %w, data: %q", err, string(data)) } + + // Get the last LogActionTypeSignal, and get the unique values of the other types. + var signal *LogActionV0 + otherActions := set.New[LogActionV0]() + for _, a := range lp.RawActions { + if a.Type == LogActionTypeSignal { + signal = &a + } else { + otherActions.Insert(a) + } + } + + // Prepare output. + if signal != nil { + b.RawActions = []LogActionV0{*signal} + } + b.RawActions = append(b.RawActions, otherActions.ToSlice()...) + b.RawActions.sort() + b.RawPattern = lp.RawPattern + return nil } +// Merge LogActionsV0. The value of LogActionTypeSignal from other takes precedence. +// Union merge the other LogAction types. +func (s LogActionsV0) merge(other LogActionsV0) LogActionsV0 { + // Store unique actions except signal, and find source signal. + actions := set.New[LogActionV0]() + var srcSignal *LogActionV0 + for _, a := range s { + if a.Type == LogActionTypeSignal { + srcSignal = &a + continue + } + actions.Insert(a) + } + + // Store unique actions except signal, and find other signal. + var otherSignal *LogActionV0 + for _, a := range other { + if a.Type == LogActionTypeSignal { + otherSignal = &a + continue + } + actions.Insert(a) + } + // Other signal takes precedence. + if otherSignal != nil || srcSignal != nil { + actions.Insert(*schemas.Merge(otherSignal, srcSignal)) + } + + out := LogActionsV0(actions.ToSlice()) + out.sort() + return out +} + +// Sort LogActionsV0 by type so the output is in deterministic state. Testing will be easier. +func (s LogActionsV0) sort() { + sort.Slice(s, func(i, j int) bool { + return s[i].Type < s[j].Type + }) +} + type LogActionType string -const LogActionTypeCancelRetries LogActionType = "cancel_retries" -const LogActionTypeExcludeNodes LogActionType = "exclude_nodes" -const LogActionTypeSignal LogActionType = "signal" +const ( + LogActionTypeCancelRetries LogActionType = "cancel_retries" + LogActionTypeExcludeNode LogActionType = "exclude_node" + LogActionTypeSignal LogActionType = "signal" +) // LogActionV0 is a policy to take after matching. type LogActionV0 struct { Type LogActionType // Only used by the "signal" action. - Signal string + Signal *string } // MarshalJSON implements the json.Marshaler interface. func (s LogActionV0) MarshalJSON() ([]byte, error) { - // XXX case statement here - if s.Signal != nil { + switch s.Type { + case LogActionTypeSignal: return json.Marshal(struct { - Signal string `json:"signal"` - }{Signal: *s.Signal}) - } else if s.CancelRetries != nil { - return json.Marshal(cancelRetries) - } else if s.ExcludeNode != nil { - return json.Marshal(excludeNode) + Signal *string `json:"signal"` + }{Signal: s.Signal}) + case LogActionTypeCancelRetries: + return json.Marshal(LogActionTypeCancelRetries) + case LogActionTypeExcludeNode: + return json.Marshal(LogActionTypeExcludeNode) } return nil, fmt.Errorf("failed to marshal LogActionV0: %+v", s) } @@ -224,16 +300,16 @@ func (s LogActionV0) MarshalJSON() ([]byte, error) { func (s *LogActionV0) UnmarshalJSON(data []byte) error { var action string if err := json.Unmarshal(data, &action); err == nil { - switch action { + switch LogActionType(action) { case LogActionTypeCancelRetries: *s = LogActionV0{Type: LogActionTypeCancelRetries} return nil case LogActionTypeExcludeNode: *s = LogActionV0{Type: LogActionTypeExcludeNode} return nil - default: - return fmt.Errorf("invalid log action: %v", action) } + } else { + return fmt.Errorf("failed to unmarshal log action type CancelRetries and ExcludeNode: %w, data: %q", err, string(data)) } // Handle Signal @@ -241,8 +317,9 @@ func (s *LogActionV0) UnmarshalJSON(data []byte) error { Signal *string `json:"signal"` }{} if err := json.Unmarshal(data, &temp); err != nil { - return fmt.Errorf("failed to unmarshal log action: %q", string(data)) + return fmt.Errorf("failed to unmarshal log action type signal: %w, data: %q", err, string(data)) } - *s = LogActionV0{Type: LogActionTypeSignal, Signal: out.Signal} + *s = LogActionV0{Type: LogActionTypeSignal, Signal: temp.Signal} + return nil } diff --git a/master/pkg/schemas/expconf/schema_test.go b/master/pkg/schemas/expconf/schema_test.go index 72b9dccf19a..50f77a6eaab 100644 --- a/master/pkg/schemas/expconf/schema_test.go +++ b/master/pkg/schemas/expconf/schema_test.go @@ -318,9 +318,12 @@ func (tc SchemaTestCase) CheckRoundTrip(t *testing.T) { obj := objectForURL(url) err = json.Unmarshal(byts, &obj) assert.NilError(t, err) + v, _ := obj.(ExperimentConfigV0) + fmt.Printf("obj: %#v\n", v.RawLogPolicies) // Round-trip through json. jByts, err := json.Marshal(obj) + fmt.Printf("\njson output: %#v\n", string(jByts)) assert.NilError(t, err) cpy := objectForURL(url) err = json.Unmarshal(jByts, &cpy) diff --git a/master/pkg/schemas/expconf/zgen_log_legacy_action_cancel_retries_v0.go b/master/pkg/schemas/expconf/zgen_log_legacy_action_cancel_retries_v0.go deleted file mode 100644 index e2feb5128cc..00000000000 --- a/master/pkg/schemas/expconf/zgen_log_legacy_action_cancel_retries_v0.go +++ /dev/null @@ -1,21 +0,0 @@ -// Code generated by gen.py. DO NOT EDIT. - -package expconf - -import ( - "github.com/santhosh-tekuri/jsonschema/v2" - - "github.com/determined-ai/determined/master/pkg/schemas" -) - -func (l LogLegacyActionCancelRetriesV0) ParsedSchema() interface{} { - return schemas.ParsedLogLegacyActionCancelRetriesV0() -} - -func (l LogLegacyActionCancelRetriesV0) SanityValidator() *jsonschema.Schema { - return schemas.GetSanityValidator("http://determined.ai/schemas/expconf/v0/log-legacy-action-cancel-retries.json") -} - -func (l LogLegacyActionCancelRetriesV0) CompletenessValidator() *jsonschema.Schema { - return schemas.GetCompletenessValidator("http://determined.ai/schemas/expconf/v0/log-legacy-action-cancel-retries.json") -} diff --git a/master/pkg/schemas/expconf/zgen_log_legacy_action_exclude_node_v0.go b/master/pkg/schemas/expconf/zgen_log_legacy_action_exclude_node_v0.go deleted file mode 100644 index 6366f544a12..00000000000 --- a/master/pkg/schemas/expconf/zgen_log_legacy_action_exclude_node_v0.go +++ /dev/null @@ -1,21 +0,0 @@ -// Code generated by gen.py. DO NOT EDIT. - -package expconf - -import ( - "github.com/santhosh-tekuri/jsonschema/v2" - - "github.com/determined-ai/determined/master/pkg/schemas" -) - -func (l LogLegacyActionExcludeNodeV0) ParsedSchema() interface{} { - return schemas.ParsedLogLegacyActionExcludeNodeV0() -} - -func (l LogLegacyActionExcludeNodeV0) SanityValidator() *jsonschema.Schema { - return schemas.GetSanityValidator("http://determined.ai/schemas/expconf/v0/log-legacy-action-exclude-node.json") -} - -func (l LogLegacyActionExcludeNodeV0) CompletenessValidator() *jsonschema.Schema { - return schemas.GetCompletenessValidator("http://determined.ai/schemas/expconf/v0/log-legacy-action-exclude-node.json") -} diff --git a/master/pkg/schemas/expconf/zgen_log_legacy_action_v0.go b/master/pkg/schemas/expconf/zgen_log_legacy_action_v0.go deleted file mode 100644 index a482b44a4d4..00000000000 --- a/master/pkg/schemas/expconf/zgen_log_legacy_action_v0.go +++ /dev/null @@ -1,31 +0,0 @@ -// Code generated by gen.py. DO NOT EDIT. - -package expconf - -import ( - "github.com/santhosh-tekuri/jsonschema/v2" - - "github.com/determined-ai/determined/master/pkg/schemas" -) - -func (l LogLegacyActionV0) GetUnionMember() interface{} { - if l.RawCancelRetries != nil { - return *l.RawCancelRetries - } - if l.RawExcludeNode != nil { - return *l.RawExcludeNode - } - panic("no union member defined") -} - -func (l LogLegacyActionV0) ParsedSchema() interface{} { - return schemas.ParsedLogLegacyActionV0() -} - -func (l LogLegacyActionV0) SanityValidator() *jsonschema.Schema { - return schemas.GetSanityValidator("http://determined.ai/schemas/expconf/v0/log-legacy-action.json") -} - -func (l LogLegacyActionV0) CompletenessValidator() *jsonschema.Schema { - return schemas.GetCompletenessValidator("http://determined.ai/schemas/expconf/v0/log-legacy-action.json") -} diff --git a/master/pkg/schemas/expconf/zgen_log_policy_v0.go b/master/pkg/schemas/expconf/zgen_log_policy_v0.go index 4e775a05470..b774f07e918 100644 --- a/master/pkg/schemas/expconf/zgen_log_policy_v0.go +++ b/master/pkg/schemas/expconf/zgen_log_policy_v0.go @@ -16,19 +16,11 @@ func (l *LogPolicyV0) SetPattern(val string) { l.RawPattern = val } -func (l LogPolicyV0) LegacyAction() *LogLegacyActionV0 { - return l.RawLegacyAction -} - -func (l *LogPolicyV0) SetLegacyAction(val *LogLegacyActionV0) { - l.RawLegacyAction = val -} - -func (l LogPolicyV0) Actions() []LogActionV0 { +func (l LogPolicyV0) Actions() LogActionsV0 { return l.RawActions } -func (l *LogPolicyV0) SetActions(val []LogActionV0) { +func (l *LogPolicyV0) SetActions(val LogActionsV0) { l.RawActions = val } diff --git a/master/pkg/schemas/zgen_schemas.go b/master/pkg/schemas/zgen_schemas.go index e5beb6fbd99..235472f75d2 100644 --- a/master/pkg/schemas/zgen_schemas.go +++ b/master/pkg/schemas/zgen_schemas.go @@ -848,6 +848,7 @@ var ( "array", "null" ], + "$comment": "The default value doesn't matter here. It's controlled by WithDefaults()", "default": [], "optionalRef": "http://determined.ai/schemas/expconf/v0/log-policies.json" }, diff --git a/master/pkg/union/unmarshal.go b/master/pkg/union/unmarshal.go index 36f8a8dd60a..0a8baf90772 100644 --- a/master/pkg/union/unmarshal.go +++ b/master/pkg/union/unmarshal.go @@ -2,6 +2,7 @@ package union import ( "encoding/json" + "fmt" "reflect" "strings" @@ -10,6 +11,7 @@ import ( // Unmarshal unmarshals the provided union type from a JSON byte array. func Unmarshal(data []byte, v interface{}) error { + fmt.Printf("\n\nunion unmarshal is called!\n\n") value := reflect.ValueOf(v) expectedFields := make(map[string]bool) unionTypes, err := parseUnionTypes(value.Type().Elem()) diff --git a/schemas/expconf/v0/experiment.json b/schemas/expconf/v0/experiment.json index 17a01f2d3da..3885ec17b38 100644 --- a/schemas/expconf/v0/experiment.json +++ b/schemas/expconf/v0/experiment.json @@ -121,6 +121,7 @@ "array", "null" ], + "$comment": "The default value doesn't matter here. It's controlled by WithDefaults()", "default": [], "optionalRef": "http://determined.ai/schemas/expconf/v0/log-policies.json" }, diff --git a/schemas/test_cases/v0/defaults.yaml b/schemas/test_cases/v0/defaults.yaml index 29061df328c..05c7c703a79 100644 --- a/schemas/test_cases/v0/defaults.yaml +++ b/schemas/test_cases/v0/defaults.yaml @@ -216,17 +216,3 @@ priority: null resource_pool: '' is_single_node: null - -- name: log policies defaults - sane_as: - - http://determined.ai/schemas/expconf/v0/log-policies.json - default_as: - http://determined.ai/schemas/expconf/v0/log-policies.json - case: [] - defaulted: - - pattern: .*uncorrectable ECC error encountered.* - actions: - - signal: ECC Error - - pattern: .*CUDA out of memory.* - actions: - - signal: CUDA OOM diff --git a/schemas/test_cases/v0/experiment.yaml b/schemas/test_cases/v0/experiment.yaml index 4d63e341890..6fb85311e27 100644 --- a/schemas/test_cases/v0/experiment.yaml +++ b/schemas/test_cases/v0/experiment.yaml @@ -202,13 +202,7 @@ add_capabilities: [] drop_capabilities: [] hyperparameters: {} - log_policies: - - pattern: '*' - actions: - - signal: '*' - - pattern: '*' - actions: - - signal: '*' + log_policies: [] labels: [] max_restarts: 5 min_checkpoint_period: diff --git a/schemas/test_cases/v0/log_policies.yaml b/schemas/test_cases/v0/log_policies.yaml new file mode 100644 index 00000000000..6fa9677b2c9 --- /dev/null +++ b/schemas/test_cases/v0/log_policies.yaml @@ -0,0 +1,139 @@ +- name: valid log policy + sane_as: + - http://determined.ai/schemas/expconf/v0/log-policy.json + case: + pattern: a + actions: + - signal: sig + - cancel_retries + - exclude_node + +- name: log policy is invalid with both action and actions + sanity_errors: + http://determined.ai/schemas/expconf/v0/log-policy.json: + - ": both the depreated \"action\" field and the newer \"actions\" field are detected" + case: + pattern: a + actions: + - signal: sig + action: + type: cancel_retries + +- name: invalid log policy 1 + sanity_errors: + http://determined.ai/schemas/expconf/v0/log-policy.json: + - "expect one of the followings: cancel_retries, exclude_node, or an object with a single \"signal\" field" + case: + pattern: a + actions: + - notallowedstring + +- name: invalid log policy 2 + sanity_errors: + http://determined.ai/schemas/expconf/v0/log-policy.json: + - "expect one of the followings: cancel_retries, exclude_node, or an object with a single \"signal\" field" + case: + pattern: a + actions: + - notallowedstring + +- name: invalid log policy 3 + sanity_errors: + http://determined.ai/schemas/expconf/v0/log-policy.json: + - "expect one of the followings: cancel_retries, exclude_node, or an object with a single \"signal\" field" + case: + pattern: a + actions: + - 3 + +- name: unique actions when merged + merge_as: http://determined.ai/schemas/expconf/v0/log-policies.json + case: + - pattern: a + actions: + - cancel_retries + merge_src: + - pattern: a + actions: + - exclude_node + merged: + - pattern: a + actions: + - cancel_retries + - exclude_node + +- name: no duplicated actions when merged + merge_as: http://determined.ai/schemas/expconf/v0/log-policies.json + case: + - pattern: a + actions: + - cancel_retries + - exclude_node + merge_src: + - pattern: a + actions: + - exclude_node + merged: + - pattern: a + actions: + - cancel_retries + - exclude_node + +- name: no duplicated actions when merged 2 + merge_as: http://determined.ai/schemas/expconf/v0/log-policies.json + case: + - pattern: a + actions: + - exclude_node + merge_src: + - pattern: a + actions: + - cancel_retries + - exclude_node + merged: + - pattern: a + actions: + - cancel_retries + - exclude_node + +- name: user can override default values + sane_as: + - http://determined.ai/schemas/expconf/v0/log-policies.json + default_as: + http://determined.ai/schemas/expconf/v0/log-policies.json + case: [] + defaulted: [] + +- name: log action cancel_retries + sane_as: + - http://determined.ai/schemas/expconf/v0/log-legacy-action-cancel-retries.json + case: + type: cancel_retries + +- name: log action exclude_node + sane_as: + - http://determined.ai/schemas/expconf/v0/log-legacy-action-exclude-node.json + case: + type: exclude_node + +- name: recognize action field for backward compatibility + merge_as: http://determined.ai/schemas/expconf/v0/log-policies.json + case: [] + merge_src: + - pattern: a + action: + type: cancel_retries + - pattern: a + action: + type: exclude_node + - pattern: b + action: + type: exclude_node + merged: + - pattern: a + actions: + - cancel_retries + - exclude_node + - pattern: b + actions: + - exclude_node diff --git a/schemas/test_cases/v0/merging.yaml b/schemas/test_cases/v0/merging.yaml index e172e45aee5..c1aa5b88807 100644 --- a/schemas/test_cases/v0/merging.yaml +++ b/schemas/test_cases/v0/merging.yaml @@ -193,53 +193,3 @@ save_experiment_best: null save_trial_best: null save_trial_latest: null - -- name: unique actions when merged - merge_as: http://determined.ai/schemas/expconf/v0/log-policies.json - case: - - pattern: a - actions: - - cancel_retries - merge_src: - - pattern: a - actions: - - exclude_node - merged: - - pattern: a - actions: - - exclude_node - - cancel_retries - -- name: no duplicated actions when merged - merge_as: http://determined.ai/schemas/expconf/v0/log-policies.json - case: - - pattern: a - actions: - - cancel_retries - - exclude_node - merge_src: - - pattern: a - actions: - - exclude_node - merged: - - pattern: a - actions: - - exclude_node - - cancel_retries - -- name: no duplicated actions when merged 2 - merge_as: http://determined.ai/schemas/expconf/v0/log-policies.json - case: - - pattern: a - actions: - - exclude_node - merge_src: - - pattern: a - actions: - - cancel_retries - - exclude_node - merged: - - pattern: a - actions: - - cancel_retries - - exclude_node diff --git a/schemas/test_cases/v0/misc.yaml b/schemas/test_cases/v0/misc.yaml index 38002ae7262..51c386a902e 100644 --- a/schemas/test_cases/v0/misc.yaml +++ b/schemas/test_cases/v0/misc.yaml @@ -331,51 +331,3 @@ - ".shm_size: must be a valid memory size" case: shm_size: 1 i - -- name: valid log policy - sane_as: - - http://determined.ai/schemas/expconf/v0/log-policy.json - case: - pattern: a - actions: - - signal: sig - - cancel_retries - - exclude_node - -- name: log policy is invalid with both action and actions - sanity_errors: - http://determined.ai/schemas/expconf/v0/log-policy.json: - - ": both the depreated \"action\" field and the newer \"actions\" field are detected" - case: - pattern: a - actions: - - signal: sig - action: - type: cancel_retries - -- name: invalid log policy 1 - sanity_errors: - http://determined.ai/schemas/expconf/v0/log-policy.json: - - "expect one of the followings: cancel_retries, exclude_node, or an object with a single \"signal\" field" - case: - pattern: a - actions: - - notallowedstring - -- name: invalid log policy 2 - sanity_errors: - http://determined.ai/schemas/expconf/v0/log-policy.json: - - "expect one of the followings: cancel_retries, exclude_node, or an object with a single \"signal\" field" - case: - pattern: a - actions: - - notallowedstring - -- name: invalid log policy 3 - sanity_errors: - http://determined.ai/schemas/expconf/v0/log-policy.json: - - "expect one of the followings: cancel_retries, exclude_node, or an object with a single \"signal\" field" - case: - pattern: a - actions: - - 3 diff --git a/schemas/test_cases/v0/unions.yaml b/schemas/test_cases/v0/unions.yaml index eab13af4e3e..20218b3a02f 100644 --- a/schemas/test_cases/v0/unions.yaml +++ b/schemas/test_cases/v0/unions.yaml @@ -61,18 +61,6 @@ type: directory container_path: /path/on/disk -- name: log action cancel_retries - sane_as: - - http://determined.ai/schemas/expconf/v0/log-legacy-action-cancel-retries.json - case: - type: cancel_retries - -- name: log action exclude_node - sane_as: - - http://determined.ai/schemas/expconf/v0/log-legacy-action-exclude-node.json - case: - type: exclude_node - - name: records length (valid) sane_as: - http://determined.ai/schemas/expconf/v0/length.json From d66a9a2c892aa865d1f8ab7d8e53e5675d74ef82 Mon Sep 17 00:00:00 2001 From: Jerry Gong Date: Fri, 18 Oct 2024 18:41:48 -0400 Subject: [PATCH 14/27] fix tests --- master/internal/api_tasks_intg_test.go | 18 ++--- master/internal/trial.go | 9 --- master/internal/trial_intg_test.go | 79 ------------------- .../schemas/expconf/experiment_config_test.go | 26 ------ schemas/test_cases/v0/log_policies.yaml | 1 + 5 files changed, 9 insertions(+), 124 deletions(-) diff --git a/master/internal/api_tasks_intg_test.go b/master/internal/api_tasks_intg_test.go index 9f2aea9a756..15f06fdb838 100644 --- a/master/internal/api_tasks_intg_test.go +++ b/master/internal/api_tasks_intg_test.go @@ -218,13 +218,9 @@ func TestPostTaskLogsLogPattern(t *testing.T) { activeConfig, err := api.m.db.ActiveExperimentConfig(trial.ExperimentID) require.NoError(t, err) - activeConfig.RawLogPolicies = &expconf.LogPoliciesConfig{ - expconf.LogPolicy{RawPattern: "sub", RawActions: []expconf.LogActionV0{{ - RawCancelRetries: &expconf.LogActionCancelRetries{}, - }}}, - expconf.LogPolicy{RawPattern: `\d{5}$`, RawActions: []expconf.LogActionV0{{ - RawExcludeNode: &expconf.LogActionExcludeNode{}, - }}}, + activeConfig.RawLogPolicies = expconf.LogPoliciesConfig{ + expconf.LogPolicy{RawPattern: "sub", RawActions: expconf.LogActionsV0{expconf.LogActionV0{Type: expconf.LogActionTypeCancelRetries}}}, + expconf.LogPolicy{RawPattern: `\d{5}$`, RawActions: expconf.LogActionsV0{expconf.LogActionV0{Type: expconf.LogActionTypeExcludeNode}}}, } v, err := json.Marshal(activeConfig) @@ -452,9 +448,11 @@ func TestPostTaskLogsLogSignalDataSaving(t *testing.T) { require.NoError(t, err) signal := "sub" - activeConfig.RawLogPolicies = &expconf.LogPoliciesConfig{ - expconf.LogPolicy{RawPattern: "sub", RawSignal: &signal}, - expconf.LogPolicy{RawPattern: `\d{5}$`}, + activeConfig.RawLogPolicies = expconf.LogPoliciesConfig{ + expconf.LogPolicy{ + RawPattern: "sub", + RawActions: expconf.LogActionsV0{expconf.LogActionV0{Type: expconf.LogActionTypeSignal, Signal: &signal}}, + }, } v, err := json.Marshal(activeConfig) diff --git a/master/internal/trial.go b/master/internal/trial.go index a724ed4c4fd..90711bd73d5 100644 --- a/master/internal/trial.go +++ b/master/internal/trial.go @@ -676,15 +676,6 @@ func (t *trial) handleAllocationExit(exit *task.AllocationExited) error { } } - // Clear the stored signal before trial restarts, as the log in the new trial - // may be different. - if err := logpattern.ClearSignal(context.TODO(), t.taskID); err != nil { - return t.transition(model.StateWithReason{ - State: model.ErrorState, - InformationalReason: err.Error(), - }) - } - case exit.UserRequestedStop: return t.transition(model.StateWithReason{ State: model.CompletedState, diff --git a/master/internal/trial_intg_test.go b/master/internal/trial_intg_test.go index d7000f3a22a..5681a1065ab 100644 --- a/master/internal/trial_intg_test.go +++ b/master/internal/trial_intg_test.go @@ -11,8 +11,6 @@ import ( "testing" "time" - "github.com/uptrace/bun" - "github.com/determined-ai/determined/master/pkg/ptrs" "github.com/determined-ai/determined/master/pkg/schemas" @@ -187,80 +185,3 @@ func setup(t *testing.T) ( require.NoError(t, err) return a.m.db, rID, tr, &as, done } - -func TestClearLogSignalAfterTrialRestarts(t *testing.T) { - _, rID, tr, _, done := setup(t) - // Pre-scheduled stage. - require.NoError(t, tr.PatchState( - model.StateWithReason{State: model.ActiveState})) - require.NoError(t, tr.PatchSearcherState(experiment.TrialSearcherState{ - Create: searcher.Create{RequestID: rID}, - Op: searcher.ValidateAfter{ - RequestID: rID, - Length: 10, - }, - Complete: false, - Closed: true, - })) - - signal := "Test signal" - _, err := internaldb.Bun().NewUpdate().Model(&model.Task{}). - Table("run_id_task_id"). - Set("log_signal = ?", signal). - Where("run_id_task_id.run_id = ?", tr.id). - Where("task.task_id = run_id_task_id.task_id"). - Exec(context.Background()) - require.NoError(t, err) - - _, err = internaldb.Bun().NewUpdate().Model(&model.Run{}). - Set("log_signal = ?", signal). - Where("id = ?", tr.id). - Exec(context.Background()) - require.NoError(t, err) - - // Restart - tr.AllocationExitedCallback(&task.AllocationExited{Err: fmt.Errorf("bad stuff went down")}) - - // Verity log signal is reset after restart - runsOut := struct { - bun.BaseModel `bun:"table:runs"` - LogSignal *string `db:"log_signal"` - }{} - - err = internaldb.Bun().NewSelect().Model(&runsOut). - Where("id = ?", tr.id). - Scan(context.Background()) - require.NoError(t, err) - require.NotNil(t, runsOut) - require.Nil(t, runsOut.LogSignal) - - tasksOut := struct { - bun.BaseModel `bun:"table:tasks"` - LogSignal *string `db:"log_signal"` - }{} - err = internaldb.Bun().NewSelect().Model(&tasksOut). - Join("LEFT JOIN run_id_task_id AS rt on tasks.task_id = rt.task_id"). - Where("run_id = ?", tr.id). - Scan(context.Background()) - require.NoError(t, err) - require.NotNil(t, tasksOut) - require.Nil(t, tasksOut.LogSignal) - - // Running and Terminating stage. - require.NoError(t, tr.PatchSearcherState(experiment.TrialSearcherState{ - Create: searcher.Create{RequestID: rID}, - Op: searcher.ValidateAfter{ - RequestID: rID, - Length: 10, - }, - Complete: true, - Closed: true, - })) - tr.AllocationExitedCallback(&task.AllocationExited{}) - select { - case <-done: // success - case <-time.After(5 * time.Second): - require.Error(t, fmt.Errorf("timed out waiting for trial to terminate")) - } - require.True(t, model.TerminalStates[tr.state]) -} diff --git a/master/pkg/schemas/expconf/experiment_config_test.go b/master/pkg/schemas/expconf/experiment_config_test.go index e33b078d18d..a69ff5adc0c 100644 --- a/master/pkg/schemas/expconf/experiment_config_test.go +++ b/master/pkg/schemas/expconf/experiment_config_test.go @@ -4,7 +4,6 @@ package expconf import ( "encoding/json" "testing" - "fmt" "github.com/stretchr/testify/require" "gotest.tools/assert" @@ -117,28 +116,3 @@ func TestName(t *testing.T) { assert.DeepEqual(t, newConfig.Name().String(), "my_name") } - -func TestLogPolicies(t *testing.T) { - newConfig := ExperimentConfig{} - bytes := []byte(`{ - "log_policies": [ - ] - }`) -/* - { - "pattern": "x", - "actions": [ - "cancel_retries" - ] - } -*/ - err := json.Unmarshal(bytes, &newConfig) - assert.NilError(t, err) - panic( - fmt.Sprintf( - "newConfig.RawLogPolicies: (nil?%v) %v\n", - newConfig.RawLogPolicies == nil, - newConfig.RawLogPolicies, - ), - ) -} diff --git a/schemas/test_cases/v0/log_policies.yaml b/schemas/test_cases/v0/log_policies.yaml index 6fa9677b2c9..5b57972aca7 100644 --- a/schemas/test_cases/v0/log_policies.yaml +++ b/schemas/test_cases/v0/log_policies.yaml @@ -118,6 +118,7 @@ - name: recognize action field for backward compatibility merge_as: http://determined.ai/schemas/expconf/v0/log-policies.json + comment: merge_src is legacy log policies, merged is modern log policies case: [] merge_src: - pattern: a From 1781b388b91677f790e1c61579ebe3cfd710ed12 Mon Sep 17 00:00:00 2001 From: Jerry Gong Date: Fri, 18 Oct 2024 19:24:18 -0400 Subject: [PATCH 15/27] lint --- master/internal/api_tasks_intg_test.go | 10 ++- master/pkg/model/task_container_defaults.go | 10 ++- .../pkg/model/task_container_defaults_test.go | 66 ++------------- .../pkg/schemas/expconf/log_pattern_config.go | 44 ++++++---- master/pkg/schemas/expconf/schema_test.go | 84 +++---------------- master/pkg/union/unmarshal.go | 2 - .../test_cases/v0/backward-compatibility.yaml | 20 ----- schemas/test_cases/v0/log_policies.yaml | 33 +++++++- 8 files changed, 94 insertions(+), 175 deletions(-) delete mode 100644 schemas/test_cases/v0/backward-compatibility.yaml diff --git a/master/internal/api_tasks_intg_test.go b/master/internal/api_tasks_intg_test.go index 15f06fdb838..8757e0f7070 100644 --- a/master/internal/api_tasks_intg_test.go +++ b/master/internal/api_tasks_intg_test.go @@ -219,8 +219,14 @@ func TestPostTaskLogsLogPattern(t *testing.T) { activeConfig, err := api.m.db.ActiveExperimentConfig(trial.ExperimentID) require.NoError(t, err) activeConfig.RawLogPolicies = expconf.LogPoliciesConfig{ - expconf.LogPolicy{RawPattern: "sub", RawActions: expconf.LogActionsV0{expconf.LogActionV0{Type: expconf.LogActionTypeCancelRetries}}}, - expconf.LogPolicy{RawPattern: `\d{5}$`, RawActions: expconf.LogActionsV0{expconf.LogActionV0{Type: expconf.LogActionTypeExcludeNode}}}, + expconf.LogPolicy{ + RawPattern: "sub", + RawActions: expconf.LogActionsV0{expconf.LogActionV0{Type: expconf.LogActionTypeCancelRetries}}, + }, + expconf.LogPolicy{ + RawPattern: `\d{5}$`, + RawActions: expconf.LogActionsV0{expconf.LogActionV0{Type: expconf.LogActionTypeExcludeNode}}, + }, } v, err := json.Marshal(activeConfig) diff --git a/master/pkg/model/task_container_defaults.go b/master/pkg/model/task_container_defaults.go index ed89cd46c23..4eeedd4258c 100644 --- a/master/pkg/model/task_container_defaults.go +++ b/master/pkg/model/task_container_defaults.go @@ -319,10 +319,12 @@ func (c TaskContainerDefaultsConfig) Merge( res.Pbs.SetSbatchArgs(tmp) } - if res.LogPolicies == nil { - res.LogPolicies = other.LogPolicies - } else { - res.LogPolicies = res.LogPolicies.Merge(other.LogPolicies) + if other.LogPolicies != nil { + if res.LogPolicies == nil { + res.LogPolicies = other.LogPolicies + } else { + res.LogPolicies = res.LogPolicies.Merge(other.LogPolicies) + } } if other.PreemptionTimeout > 0 { diff --git a/master/pkg/model/task_container_defaults_test.go b/master/pkg/model/task_container_defaults_test.go index 18071b20907..44a94356ff8 100644 --- a/master/pkg/model/task_container_defaults_test.go +++ b/master/pkg/model/task_container_defaults_test.go @@ -449,66 +449,18 @@ func TestLogPatternUnmarshal(t *testing.T) { ShmSizeBytes: 4294967296, NetworkMode: "bridge", PreemptionTimeout: DefaultPreemptionTimeout, - LogPolicies: &expconf.LogPoliciesConfig{ - expconf.LogPolicy{RawPattern: "test", RawActions: []expconf.LogAction{{ - RawExcludeNode: &expconf.LogActionExcludeNode{}, - }}}, - expconf.LogPolicy{RawPattern: "test2", RawActions: []expconf.LogAction{{ - RawCancelRetries: &expconf.LogActionCancelRetries{}, - }}}, - }, - } - require.Equal(t, expected, tcd) -} - -func TestLogPatternPoliciesMerging(t *testing.T) { - defaults := &TaskContainerDefaultsConfig{ - LogPolicies: &expconf.LogPoliciesConfig{ - expconf.LogPolicy{RawPattern: "a", RawActions: []expconf.LogAction{{ - RawCancelRetries: &expconf.LogActionCancelRetries{}, - }}}, - expconf.LogPolicy{RawPattern: "b", RawActions: []expconf.LogAction{ - { - RawExcludeNode: &expconf.LogActionExcludeNode{}, - }, - { - RawCancelRetries: &expconf.LogActionCancelRetries{}, - }, - }}, - }, - } - - conf := expconf.ExperimentConfig{ - RawLogPolicies: &expconf.LogPoliciesConfig{ - expconf.LogPolicy{RawPattern: "b", RawActions: []expconf.LogAction{{ - RawCancelRetries: &expconf.LogActionCancelRetries{}, - }}}, - expconf.LogPolicy{RawPattern: "c", RawActions: []expconf.LogAction{{ - RawExcludeNode: &expconf.LogActionExcludeNode{}, - }}}, - }, - } - - defaults.MergeIntoExpConfig(&conf) - - expected := &expconf.LogPoliciesConfig{ - expconf.LogPolicy{RawPattern: "a", RawActions: []expconf.LogAction{{ - RawCancelRetries: &expconf.LogActionCancelRetries{}, - }}}, - expconf.LogPolicy{RawPattern: "b", RawActions: []expconf.LogAction{ - { - RawExcludeNode: &expconf.LogActionExcludeNode{}, + LogPolicies: expconf.LogPoliciesConfig{ + expconf.LogPolicy{ + RawPattern: "test", + RawActions: expconf.LogActionsV0{expconf.LogActionV0{Type: expconf.LogActionTypeExcludeNode}}, }, - { - RawCancelRetries: &expconf.LogActionCancelRetries{}, + expconf.LogPolicy{ + RawPattern: "test2", + RawActions: expconf.LogActionsV0{expconf.LogActionV0{Type: expconf.LogActionTypeCancelRetries}}, }, - }}, - expconf.LogPolicy{RawPattern: "c", RawActions: []expconf.LogAction{{ - RawExcludeNode: &expconf.LogActionExcludeNode{}, - }}}, + }, } - - require.Equal(t, expected, conf.RawLogPolicies) + require.Equal(t, expected, tcd) } func TestPodSpecsDefaultMerging(t *testing.T) { diff --git a/master/pkg/schemas/expconf/log_pattern_config.go b/master/pkg/schemas/expconf/log_pattern_config.go index fb9e9c1a6e6..656f5d19826 100644 --- a/master/pkg/schemas/expconf/log_pattern_config.go +++ b/master/pkg/schemas/expconf/log_pattern_config.go @@ -16,7 +16,7 @@ import ( //go:generate ../gen.sh type LogPoliciesConfigV0 []LogPolicyV0 -// WithDefaults implements the Defaultable psuedointerface. +// WithDefaults implements the Defaultable pseudo-interface. func (b LogPoliciesConfigV0) WithDefaults() LogPoliciesConfigV0 { cudaOomPattern := CUDAOOMPattern cudaOomSignal := CUDAOOMSignal @@ -25,14 +25,20 @@ func (b LogPoliciesConfigV0) WithDefaults() LogPoliciesConfigV0 { if b == nil { return LogPoliciesConfigV0{ - LogPolicyV0{RawPattern: cudaOomPattern, RawActions: []LogActionV0{{Type: LogActionTypeSignal, Signal: &cudaOomSignal}}}, - LogPolicyV0{RawPattern: eccErrorPattern, RawActions: []LogActionV0{{Type: LogActionTypeSignal, Signal: &eccErrorSignal}}}, + LogPolicyV0{ + RawPattern: cudaOomPattern, + RawActions: []LogActionV0{{Type: LogActionTypeSignal, Signal: &cudaOomSignal}}, + }, + LogPolicyV0{ + RawPattern: eccErrorPattern, + RawActions: []LogActionV0{{Type: LogActionTypeSignal, Signal: &eccErrorSignal}}, + }, } } return b } -// Merge implements the Mergable psuedo-interface. +// Merge implements the Mergable pseudo-interface. // We appends all LogPolicyV0s to the output slice, but if there are any with the same pattern, we merge // their actions and save them as one LogPolicyV0. func (b LogPoliciesConfigV0) Merge( @@ -121,7 +127,7 @@ func (b *LogPoliciesConfigV0) UnmarshalJSON(data []byte) error { for _, lp := range patternToLp { temp = append(temp, lp) } - LogPoliciesConfigV0(temp).sort() + temp.sort() *b = temp return nil @@ -134,6 +140,7 @@ func (b LogPoliciesConfigV0) sort() { }) } +// LogActionsV0 is a list of log actions. type LogActionsV0 []LogActionV0 // LogPolicyV0 is an action to take if we match against trial logs. @@ -265,8 +272,10 @@ func (s LogActionsV0) sort() { }) } +// LogActionType is the type of an action. type LogActionType string +// LogActionType refers to the action user can take when a pattern is detected in the log. const ( LogActionTypeCancelRetries LogActionType = "cancel_retries" LogActionTypeExcludeNode LogActionType = "exclude_node" @@ -299,17 +308,20 @@ func (s LogActionV0) MarshalJSON() ([]byte, error) { // UnmarshalJSON implements the json.Unmarshaler interface. func (s *LogActionV0) UnmarshalJSON(data []byte) error { var action string - if err := json.Unmarshal(data, &action); err == nil { - switch LogActionType(action) { - case LogActionTypeCancelRetries: - *s = LogActionV0{Type: LogActionTypeCancelRetries} - return nil - case LogActionTypeExcludeNode: - *s = LogActionV0{Type: LogActionTypeExcludeNode} - return nil - } - } else { - return fmt.Errorf("failed to unmarshal log action type CancelRetries and ExcludeNode: %w, data: %q", err, string(data)) + if err := json.Unmarshal(data, &action); err != nil { + return fmt.Errorf( + "failed to unmarshal log action type CancelRetries and ExcludeNode: %w, data: %q", err, string(data), + ) + } + + // Handle all the types beside signal + switch LogActionType(action) { + case LogActionTypeCancelRetries: + *s = LogActionV0{Type: LogActionTypeCancelRetries} + return nil + case LogActionTypeExcludeNode: + *s = LogActionV0{Type: LogActionTypeExcludeNode} + return nil } // Handle Signal diff --git a/master/pkg/schemas/expconf/schema_test.go b/master/pkg/schemas/expconf/schema_test.go index 50f77a6eaab..c6040754379 100644 --- a/master/pkg/schemas/expconf/schema_test.go +++ b/master/pkg/schemas/expconf/schema_test.go @@ -13,7 +13,6 @@ import ( "github.com/pkg/errors" "github.com/santhosh-tekuri/jsonschema/v2" - tassert "github.com/stretchr/testify/assert" "gotest.tools/assert" "github.com/determined-ai/determined/master/pkg/schemas" @@ -22,20 +21,17 @@ import ( type JSON = interface{} type SchemaTestCase struct { - Name string `json:"name"` - SaneAs *[]string `json:"sane_as"` - CompleteAs *[]string `json:"complete_as"` - SanityErrors *map[string][]string `json:"sanity_errors"` - CompletenessErrors *map[string][]string `json:"completeness_errors"` - DefaultAs *string `json:"default_as"` - Defaulted *JSON `json:"defaulted"` - Case JSON `json:"case"` - MergeAs *string `json:"merge_as"` - MergeSrc *JSON `json:"merge_src"` - Merged *JSON `json:"merged"` - BackwardCompatibility *string `json:"backward_compatiblity"` - Legacy *JSON `json:"legacy"` - New *JSON `json:"new"` + Name string `json:"name"` + SaneAs *[]string `json:"sane_as"` + CompleteAs *[]string `json:"complete_as"` + SanityErrors *map[string][]string `json:"sanity_errors"` + CompletenessErrors *map[string][]string `json:"completeness_errors"` + DefaultAs *string `json:"default_as"` + Defaulted *JSON `json:"defaulted"` + Case JSON `json:"case"` + MergeAs *string `json:"merge_as"` + MergeSrc *JSON `json:"merge_src"` + Merged *JSON `json:"merged"` } func errorIn(expect string, errors []error) bool { @@ -318,12 +314,9 @@ func (tc SchemaTestCase) CheckRoundTrip(t *testing.T) { obj := objectForURL(url) err = json.Unmarshal(byts, &obj) assert.NilError(t, err) - v, _ := obj.(ExperimentConfigV0) - fmt.Printf("obj: %#v\n", v.RawLogPolicies) // Round-trip through json. jByts, err := json.Marshal(obj) - fmt.Printf("\njson output: %#v\n", string(jByts)) assert.NilError(t, err) cpy := objectForURL(url) err = json.Unmarshal(jByts, &cpy) @@ -343,60 +336,6 @@ func (tc SchemaTestCase) CheckRoundTrip(t *testing.T) { assert.DeepEqual(t, obj, cpy) } -func (tc SchemaTestCase) CheckBackwardCompatiblity(t *testing.T) { - if tc.BackwardCompatibility == nil && tc.Legacy == nil && tc.New == nil { - return - } - - if tc.BackwardCompatibility == nil || tc.Legacy == nil || tc.New == nil { - assert.NilError(t, errors.New( - "if any of backward_compatibility, legacy, or new are set in a test case, "+ - "they must all be set", - )) - } - - if *tc.BackwardCompatibility != "http://determined.ai/schemas/expconf/v0/log-policies.json" { - assert.NilError(t, errors.New("only log-policies.json is supported for now")) - } - - legacyBytes, err := json.Marshal(tc.Legacy) - assert.NilError(t, err) - newBytes, err := json.Marshal(tc.New) - assert.NilError(t, err) - - // Get an empty objects to unmarshal into. - legacy := LogPoliciesConfig{} - new := LogPoliciesConfig{} - - err = json.Unmarshal(legacyBytes, &legacy) - assert.NilError(t, err) - err = json.Unmarshal(newBytes, &new) - assert.NilError(t, err) - - legacyPatterns := make([]string, 0) - newPatternToActions := make(map[string][]LogAction) - newPatterns := make([]string, 0) - for _, lp := range new { - newPatternToActions[lp.Pattern()] = lp.Actions() - newPatterns = append(newPatterns, lp.Pattern()) - } - for _, lp := range legacy { - legacyPatterns = append(legacyPatterns, lp.Pattern()) - } - - assert := tassert.New(t) - assert.ElementsMatch(legacyPatterns, newPatterns, "", "legacy input was't translated to the expected output") - - for _, lp := range legacy { - pattern := lp.Pattern() - - legacyActions := lp.Actions() - newActions := newPatternToActions[pattern] - - assert.ElementsMatch(legacyActions, newActions, "legacy input was't translated to the expected output") - } -} - func RunCasesFile(t *testing.T, path string, displayPath string) { // Ignore the security error about including files as variables; this is just a test. byts, err := os.ReadFile(path) //nolint: gosec @@ -419,7 +358,6 @@ func RunCasesFile(t *testing.T, path string, displayPath string) { tc.CheckDefaulted(t) tc.CheckRoundTrip(t) tc.CheckMerged(t) - tc.CheckBackwardCompatiblity(t) }) } } diff --git a/master/pkg/union/unmarshal.go b/master/pkg/union/unmarshal.go index 0a8baf90772..36f8a8dd60a 100644 --- a/master/pkg/union/unmarshal.go +++ b/master/pkg/union/unmarshal.go @@ -2,7 +2,6 @@ package union import ( "encoding/json" - "fmt" "reflect" "strings" @@ -11,7 +10,6 @@ import ( // Unmarshal unmarshals the provided union type from a JSON byte array. func Unmarshal(data []byte, v interface{}) error { - fmt.Printf("\n\nunion unmarshal is called!\n\n") value := reflect.ValueOf(v) expectedFields := make(map[string]bool) unionTypes, err := parseUnionTypes(value.Type().Elem()) diff --git a/schemas/test_cases/v0/backward-compatibility.yaml b/schemas/test_cases/v0/backward-compatibility.yaml deleted file mode 100644 index 26a73184ed5..00000000000 --- a/schemas/test_cases/v0/backward-compatibility.yaml +++ /dev/null @@ -1,20 +0,0 @@ -- name: recognize action field for backward compatibility - backward_compatiblity: http://determined.ai/schemas/expconf/v0/log-policies.json - legacy: - - pattern: a - action: - type: cancel_retries - - pattern: a - action: - type: exclude_node - - pattern: b - action: - type: exclude_node - new: - - pattern: a - actions: - - exclude_node - - cancel_retries - - pattern: b - actions: - - exclude_node diff --git a/schemas/test_cases/v0/log_policies.yaml b/schemas/test_cases/v0/log_policies.yaml index 5b57972aca7..2e33ea24ddd 100644 --- a/schemas/test_cases/v0/log_policies.yaml +++ b/schemas/test_cases/v0/log_policies.yaml @@ -118,8 +118,8 @@ - name: recognize action field for backward compatibility merge_as: http://determined.ai/schemas/expconf/v0/log-policies.json - comment: merge_src is legacy log policies, merged is modern log policies case: [] + # merge_src is legacy log policies, merged is modern log policies merge_src: - pattern: a action: @@ -138,3 +138,34 @@ - pattern: b actions: - exclude_node + +- name: legacy log policies merging + merge_as: http://determined.ai/schemas/expconf/v0/log-policies.json + case: + - pattern: b + action: + type: cancel_retries + - pattern: b + action: + type: exclude_node + - pattern: c + action: + type: exclude_node + merge_src: + - pattern: a + action: + type: cancel_retries + - pattern: b + action: + type: exclude_node + merged: + - pattern: a + actions: + - cancel_retries + - pattern: b + actions: + - cancel_retries + - exclude_node + - pattern: c + actions: + - exclude_node From feb2e64b452581f5d5ea1f76adef6d62b24fe8b1 Mon Sep 17 00:00:00 2001 From: Jerry Gong Date: Fri, 18 Oct 2024 22:34:51 -0400 Subject: [PATCH 16/27] fix bugs detected by log policies tests --- e2e_tests/tests/cluster/test_log_policies.py | 15 ++-- .../db/postgres_experiments_intg_test.go | 18 ++--- master/internal/logpattern/logpattern.go | 78 +++++++++--------- master/pkg/model/task_container_defaults.go | 2 +- .../pkg/model/task_container_defaults_test.go | 4 +- .../pkg/schemas/expconf/log_pattern_config.go | 79 ++++++------------- master/pkg/schemas/zgen_schemas.go | 24 +++++- schemas/expconf/v0/experiment.json | 21 ++++- schemas/expconf/v0/log-policy.json | 3 +- schemas/test_cases/v0/experiment.yaml | 8 +- schemas/test_cases/v0/log_policies.yaml | 8 +- 11 files changed, 135 insertions(+), 125 deletions(-) diff --git a/e2e_tests/tests/cluster/test_log_policies.py b/e2e_tests/tests/cluster/test_log_policies.py index a500e4a341d..87f374a4917 100644 --- a/e2e_tests/tests/cluster/test_log_policies.py +++ b/e2e_tests/tests/cluster/test_log_policies.py @@ -17,7 +17,7 @@ def test_log_policy_cancel_retries(should_match: bool) -> None: regex = r"(.*) this should not match (.*)" config = { - "log_policies": [{"pattern": regex, "actions": [{"type": "cancel_retries"}]}], + "log_policies": [{"pattern": regex, "action": {"type": "cancel_retries"}}], "max_restarts": 1, } exp_ref = noop.create_experiment(sess, [noop.Exit(7)], config=config) @@ -48,7 +48,7 @@ def test_log_policy_exclude_node_k8s(should_match: bool) -> None: assert agents[0].slots is not None config = { - "log_policies": [{"pattern": regex, "actions": [{"type": "exclude_node"}]}], + "log_policies": [{"pattern": regex, "action": {"type": "exclude_node"}}], "resources": {"slots_per_trial": len(agents[0].slots)}, "max_restarts": 1, } @@ -91,7 +91,7 @@ def test_log_policy_exclude_node_single_agent(should_match: bool) -> None: assert agents[0].slots is not None config = { - "log_policies": [{"pattern": regex, "actions": [{"type": "exclude_node"}]}], + "log_policies": [{"pattern": regex, "action": {"type": "exclude_node"}}], "resources": {"slots_per_trial": len(agents[0].slots)}, "max_restarts": 1, } @@ -134,7 +134,7 @@ def test_log_policy_exclude_slurm(should_match: bool) -> None: regex = r"(.*) this should not match (.*)" config = { - "log_policies": [{"pattern": regex, "actions": [{"type": "exclude_node"}]}], + "log_policies": [{"pattern": regex, "action": {"type": "exclude_node"}}], "max_restarts": 1, } exp_ref = noop.create_experiment(sess, [noop.Exit(7)], config=config) @@ -165,7 +165,7 @@ def test_log_signal(should_match: bool) -> None: expected_signal = "Test Signal" config = { - "log_policies": [{"pattern": regex, "signal": expected_signal}], + "log_policies": [{"pattern": regex, "actions": [{"signal": expected_signal}]}], "max_restarts": 1, } @@ -193,7 +193,7 @@ def test_signal_clear_after_exp_continue() -> None: expected_signal = "Test Signal" config = { - "log_policies": [{"pattern": regex, "signal": expected_signal}], + "log_policies": [{"pattern": regex, "actions": [{"signal": expected_signal}]}], "max_restarts": 0, } @@ -216,8 +216,7 @@ def test_signal_clear_after_exp_continue() -> None: "e", "continue", str(exp_ref.id), - "--config", - "hyperparameters.crash_on_startup=false", + *noop.cli_config_overrides([noop.Exit(0)]), ], ) exp.wait_for_experiment_state(sess, exp_ref.id, bindings.experimentv1State.COMPLETED) diff --git a/master/internal/db/postgres_experiments_intg_test.go b/master/internal/db/postgres_experiments_intg_test.go index ef1a7157cd5..97c36d09db8 100644 --- a/master/internal/db/postgres_experiments_intg_test.go +++ b/master/internal/db/postgres_experiments_intg_test.go @@ -439,14 +439,14 @@ func TestActiveLogPatternPolicies(t *testing.T) { eccErrorSignal := "ECC Error" cudaOOMSignal := "CUDA OOM" expected := expconf.LogPoliciesConfig{ - expconf.LogPolicy{ - RawPattern: ".*uncorrectable ECC error encountered.*", - RawActions: expconf.LogActionsV0{expconf.LogActionV0{Type: expconf.LogActionTypeSignal, Signal: &eccErrorSignal}}, - }, expconf.LogPolicy{ RawPattern: ".*CUDA out of memory.*", RawActions: expconf.LogActionsV0{expconf.LogActionV0{Type: expconf.LogActionTypeSignal, Signal: &cudaOOMSignal}}, }, + expconf.LogPolicy{ + RawPattern: ".*uncorrectable ECC error encountered.*", + RawActions: expconf.LogActionsV0{expconf.LogActionV0{Type: expconf.LogActionTypeSignal, Signal: &eccErrorSignal}}, + }, } require.Equal(t, expected, policies) @@ -454,14 +454,14 @@ func TestActiveLogPatternPolicies(t *testing.T) { activeConfig, err := db.ActiveExperimentConfig(exp.ID) require.NoError(t, err) activeConfig.RawLogPolicies = expconf.LogPoliciesConfig{ - expconf.LogPolicy{ - RawPattern: "sub", - RawActions: expconf.LogActionsV0{expconf.LogActionV0{Type: expconf.LogActionTypeCancelRetries}}, - }, expconf.LogPolicy{ RawPattern: `\d{5}$`, RawActions: expconf.LogActionsV0{expconf.LogActionV0{Type: expconf.LogActionTypeExcludeNode}}, }, + expconf.LogPolicy{ + RawPattern: "sub", + RawActions: expconf.LogActionsV0{expconf.LogActionV0{Type: expconf.LogActionTypeCancelRetries}}, + }, } v, err := json.Marshal(activeConfig) @@ -476,7 +476,7 @@ func TestActiveLogPatternPolicies(t *testing.T) { policies, err = ActiveLogPolicies(ctx, exp.ID) require.NoError(t, err) - require.Equal(t, activeConfig.RawLogPolicies, &policies) + require.Equal(t, activeConfig.RawLogPolicies, policies) } func TestGetNonTerminalExperimentCount(t *testing.T) { diff --git a/master/internal/logpattern/logpattern.go b/master/internal/logpattern/logpattern.go index d400e1dbedc..8788d6f47cf 100644 --- a/master/internal/logpattern/logpattern.go +++ b/master/internal/logpattern/logpattern.go @@ -76,50 +76,48 @@ func (l *LogPatternPolicies) monitor(ctx context.Context, } if compiledRegex.MatchString(log.Log) { - if actions := policy.Actions(); len(actions) > 0 { - for _, a := range actions { - switch a.Type { - case expconf.LogActionTypeCancelRetries: - if err := addDontRetry( - ctx, model.TaskID(log.TaskID), *log.AgentID, policy.Pattern(), log.Log, - ); err != nil { - return fmt.Errorf("adding don't retry: %w", err) - } + for _, a := range policy.Actions() { + switch a.Type { + case expconf.LogActionTypeCancelRetries: + if err := addDontRetry( + ctx, model.TaskID(log.TaskID), *log.AgentID, policy.Pattern(), log.Log, + ); err != nil { + return fmt.Errorf("adding don't retry: %w", err) + } - case expconf.LogActionTypeExcludeNode: - if err := addRetryOnDifferentNode( - ctx, model.TaskID(log.TaskID), *log.AgentID, policy.Pattern(), log.Log, - ); err != nil { - return fmt.Errorf("adding retry on different node: %w", err) - } - case expconf.LogActionTypeSignal: - signal := a.Signal - if signal != nil { - err = db.Bun().RunInTx(ctx, nil, func(ctx context.Context, tx bun.Tx) error { - if _, err := tx.NewUpdate().Model(&model.Task{}). - Set("log_signal = ?", signal). - Where("task_id = ?", log.TaskID). - Exec(ctx); err != nil { - return fmt.Errorf("updating log signal of task %s: %w", log.TaskID, err) - } - if _, err := tx.NewUpdate().Model(&model.Run{}). - Table("run_id_task_id"). - Set("log_signal = ?", signal). - Where("run.id = run_id_task_id.run_id"). - Where("run_id_task_id.task_id = ?", log.TaskID). - Exec(ctx); err != nil { - return fmt.Errorf("updating log signal of task %s: %w", log.TaskID, err) - } - - return nil - }) - if err != nil { - return fmt.Errorf("updating log signal: %w", err) + case expconf.LogActionTypeExcludeNode: + if err := addRetryOnDifferentNode( + ctx, model.TaskID(log.TaskID), *log.AgentID, policy.Pattern(), log.Log, + ); err != nil { + return fmt.Errorf("adding retry on different node: %w", err) + } + case expconf.LogActionTypeSignal: + signal := a.Signal + if signal != nil { + err = db.Bun().RunInTx(ctx, nil, func(ctx context.Context, tx bun.Tx) error { + if _, err := tx.NewUpdate().Model(&model.Task{}). + Set("log_signal = ?", signal). + Where("task_id = ?", log.TaskID). + Exec(ctx); err != nil { + return fmt.Errorf("updating log signal of task %s: %w", log.TaskID, err) + } + if _, err := tx.NewUpdate().Model(&model.Run{}). + Table("run_id_task_id"). + Set("log_signal = ?", signal). + Where("run.id = run_id_task_id.run_id"). + Where("run_id_task_id.task_id = ?", log.TaskID). + Exec(ctx); err != nil { + return fmt.Errorf("updating log signal of task %s: %w", log.TaskID, err) } + + return nil + }) + if err != nil { + return fmt.Errorf("updating log signal: %w", err) } - default: - return fmt.Errorf("unrecognized log pattern policy type") } + default: + return fmt.Errorf("unrecognized log pattern policy type") } } } diff --git a/master/pkg/model/task_container_defaults.go b/master/pkg/model/task_container_defaults.go index 4eeedd4258c..f3579ff3882 100644 --- a/master/pkg/model/task_container_defaults.go +++ b/master/pkg/model/task_container_defaults.go @@ -323,7 +323,7 @@ func (c TaskContainerDefaultsConfig) Merge( if res.LogPolicies == nil { res.LogPolicies = other.LogPolicies } else { - res.LogPolicies = res.LogPolicies.Merge(other.LogPolicies) + res.LogPolicies = other.LogPolicies.Merge(res.LogPolicies) } } diff --git a/master/pkg/model/task_container_defaults_test.go b/master/pkg/model/task_container_defaults_test.go index 44a94356ff8..31d73f4a55e 100644 --- a/master/pkg/model/task_container_defaults_test.go +++ b/master/pkg/model/task_container_defaults_test.go @@ -440,8 +440,8 @@ func TestLogPatternUnmarshal(t *testing.T) { var tcd TaskContainerDefaultsConfig require.NoError(t, json.Unmarshal([]byte(string(`{ "log_policies": [ - {"pattern": "test", "actions": [{"type": "exclude_node"}]}, - {"pattern": "test2", "actions": [{"type": "cancel_retries"}]} + {"pattern": "test", "actions": ["exclude_node"]}, + {"pattern": "test2", "actions": ["cancel_retries"]} ] }`)), &tcd)) diff --git a/master/pkg/schemas/expconf/log_pattern_config.go b/master/pkg/schemas/expconf/log_pattern_config.go index 656f5d19826..4e886257d07 100644 --- a/master/pkg/schemas/expconf/log_pattern_config.go +++ b/master/pkg/schemas/expconf/log_pattern_config.go @@ -16,48 +16,26 @@ import ( //go:generate ../gen.sh type LogPoliciesConfigV0 []LogPolicyV0 -// WithDefaults implements the Defaultable pseudo-interface. -func (b LogPoliciesConfigV0) WithDefaults() LogPoliciesConfigV0 { - cudaOomPattern := CUDAOOMPattern - cudaOomSignal := CUDAOOMSignal - eccErrorPattern := ECCErrorPattern - eccErrorSignal := ECCErrorSignal - - if b == nil { - return LogPoliciesConfigV0{ - LogPolicyV0{ - RawPattern: cudaOomPattern, - RawActions: []LogActionV0{{Type: LogActionTypeSignal, Signal: &cudaOomSignal}}, - }, - LogPolicyV0{ - RawPattern: eccErrorPattern, - RawActions: []LogActionV0{{Type: LogActionTypeSignal, Signal: &eccErrorSignal}}, - }, - } - } - return b -} - // Merge implements the Mergable pseudo-interface. // We appends all LogPolicyV0s to the output slice, but if there are any with the same pattern, we merge // their actions and save them as one LogPolicyV0. func (b LogPoliciesConfigV0) Merge( - other LogPoliciesConfigV0, + src LogPoliciesConfigV0, ) LogPoliciesConfigV0 { var out LogPoliciesConfigV0 patternTosrcLp := make(map[string]LogPolicyV0) - for _, lp := range b { + for _, lp := range src { patternTosrcLp[lp.RawPattern] = lp } - for _, otherLp := range other { + for _, otherLp := range b { pattern := otherLp.RawPattern if srcLp, ok := patternTosrcLp[pattern]; ok { // Merge actions of two LogPolicies if they have the same pattern. patternTosrcLp[pattern] = LogPolicyV0{ RawPattern: pattern, - RawActions: srcLp.RawActions.merge(otherLp.RawActions), + RawActions: otherLp.RawActions.merge(srcLp.RawActions), } } else { // Source LogPoliciesConfig doesn't have this pattern. @@ -101,13 +79,8 @@ func (b *LogPoliciesConfigV0) UnmarshalJSON(data []byte) error { if err := json.Unmarshal(data, &jsonItems); err != nil { return errors.Wrapf(err, "failed to parse runtime items") } - // By distinguishing [] and nil input, user can override the default log policies and get empty. - // log policies. If a user provides [], the default values won't be applied. if jsonItems == nil { return nil - } else if len(jsonItems) == 0 { - *b = make([]LogPolicyV0, 0) - return nil } // Merge LogPolicyV0s with the same pattern into one. @@ -123,7 +96,8 @@ func (b *LogPoliciesConfigV0) UnmarshalJSON(data []byte) error { patternToLp[pattern] = LogPolicyV0{RawPattern: pattern, RawActions: mergedActions} } - var temp LogPoliciesConfigV0 + // if the input data is [] and we use `var temp LogPolicies`, function return will return nil + temp := make(LogPoliciesConfigV0, 0) for _, lp := range patternToLp { temp = append(temp, lp) } @@ -234,11 +208,11 @@ func (b *LogPolicyV0) UnmarshalJSON(data []byte) error { // Merge LogActionsV0. The value of LogActionTypeSignal from other takes precedence. // Union merge the other LogAction types. -func (s LogActionsV0) merge(other LogActionsV0) LogActionsV0 { +func (l LogActionsV0) merge(src LogActionsV0) LogActionsV0 { // Store unique actions except signal, and find source signal. actions := set.New[LogActionV0]() var srcSignal *LogActionV0 - for _, a := range s { + for _, a := range src { if a.Type == LogActionTypeSignal { srcSignal = &a continue @@ -248,7 +222,7 @@ func (s LogActionsV0) merge(other LogActionsV0) LogActionsV0 { // Store unique actions except signal, and find other signal. var otherSignal *LogActionV0 - for _, a := range other { + for _, a := range l { if a.Type == LogActionTypeSignal { otherSignal = &a continue @@ -266,9 +240,9 @@ func (s LogActionsV0) merge(other LogActionsV0) LogActionsV0 { } // Sort LogActionsV0 by type so the output is in deterministic state. Testing will be easier. -func (s LogActionsV0) sort() { - sort.Slice(s, func(i, j int) bool { - return s[i].Type < s[j].Type +func (l LogActionsV0) sort() { + sort.Slice(l, func(i, j int) bool { + return l[i].Type < l[j].Type }) } @@ -308,28 +282,25 @@ func (s LogActionV0) MarshalJSON() ([]byte, error) { // UnmarshalJSON implements the json.Unmarshaler interface. func (s *LogActionV0) UnmarshalJSON(data []byte) error { var action string - if err := json.Unmarshal(data, &action); err != nil { - return fmt.Errorf( - "failed to unmarshal log action type CancelRetries and ExcludeNode: %w, data: %q", err, string(data), - ) - } - - // Handle all the types beside signal - switch LogActionType(action) { - case LogActionTypeCancelRetries: - *s = LogActionV0{Type: LogActionTypeCancelRetries} - return nil - case LogActionTypeExcludeNode: - *s = LogActionV0{Type: LogActionTypeExcludeNode} - return nil + // err is not nil means input data is not cancel_retries or exclude_node. + if err := json.Unmarshal(data, &action); err == nil { + // Handle all the types beside signal + switch LogActionType(action) { + case LogActionTypeCancelRetries: + *s = LogActionV0{Type: LogActionTypeCancelRetries} + return nil + case LogActionTypeExcludeNode: + *s = LogActionV0{Type: LogActionTypeExcludeNode} + return nil + } } // Handle Signal temp := struct { Signal *string `json:"signal"` }{} - if err := json.Unmarshal(data, &temp); err != nil { - return fmt.Errorf("failed to unmarshal log action type signal: %w, data: %q", err, string(data)) + if err := json.Unmarshal(data, &temp); err != nil || temp.Signal == nil { + return fmt.Errorf("failed to unmarshal log action: %w, data: %q", err, string(data)) } *s = LogActionV0{Type: LogActionTypeSignal, Signal: temp.Signal} diff --git a/master/pkg/schemas/zgen_schemas.go b/master/pkg/schemas/zgen_schemas.go index 235472f75d2..18432310d00 100644 --- a/master/pkg/schemas/zgen_schemas.go +++ b/master/pkg/schemas/zgen_schemas.go @@ -848,8 +848,25 @@ var ( "array", "null" ], - "$comment": "The default value doesn't matter here. It's controlled by WithDefaults()", - "default": [], + "$comment": "If ther's no default field or the default is null, LogPoliciesConfigV0 WithDefaults() won't be called.", + "default": [ + { + "pattern": ".*CUDA out of memory.*", + "actions": [ + { + "signal": "CUDA OOM" + } + ] + }, + { + "pattern": ".*uncorrectable ECC error encountered.*", + "actions": [ + { + "signal": "ECC Error" + } + ] + } + ], "optionalRef": "http://determined.ai/schemas/expconf/v0/log-policies.json" }, "retention_policy": { @@ -1617,7 +1634,6 @@ var ( "default": null }, "actions": { - "$comment": "\"default\": [] is for passing lint. The actual default can be found in const.go", "type": [ "array", "null" @@ -1637,7 +1653,7 @@ var ( } }, "checks": { - "both the depreated \"action\" field and the newer \"actions\" field are detected": { + "require either \"pattern\" with the depreated \"action\" field or \"pattern\" with the newer \"actions\" field": { "oneOf": [ { "required": [ diff --git a/schemas/expconf/v0/experiment.json b/schemas/expconf/v0/experiment.json index 3885ec17b38..ee47c584b68 100644 --- a/schemas/expconf/v0/experiment.json +++ b/schemas/expconf/v0/experiment.json @@ -121,8 +121,25 @@ "array", "null" ], - "$comment": "The default value doesn't matter here. It's controlled by WithDefaults()", - "default": [], + "$comment": "If ther's no default field or the default is null, LogPoliciesConfigV0 WithDefaults() won't be called.", + "default": [ + { + "pattern": ".*CUDA out of memory.*", + "actions": [ + { + "signal": "CUDA OOM" + } + ] + }, + { + "pattern": ".*uncorrectable ECC error encountered.*", + "actions": [ + { + "signal": "ECC Error" + } + ] + } + ], "optionalRef": "http://determined.ai/schemas/expconf/v0/log-policies.json" }, "retention_policy": { diff --git a/schemas/expconf/v0/log-policy.json b/schemas/expconf/v0/log-policy.json index fbabe397236..2ffcab97de9 100644 --- a/schemas/expconf/v0/log-policy.json +++ b/schemas/expconf/v0/log-policy.json @@ -14,7 +14,6 @@ "default": null }, "actions": { - "$comment": "\"default\": [] is for passing lint. The actual default can be found in const.go", "type": [ "array", "null" @@ -34,7 +33,7 @@ } }, "checks": { - "both the depreated \"action\" field and the newer \"actions\" field are detected": { + "require either \"pattern\" with the depreated \"action\" field or \"pattern\" with the newer \"actions\" field": { "oneOf": [ { "required": [ diff --git a/schemas/test_cases/v0/experiment.yaml b/schemas/test_cases/v0/experiment.yaml index 6fb85311e27..1bb61889c37 100644 --- a/schemas/test_cases/v0/experiment.yaml +++ b/schemas/test_cases/v0/experiment.yaml @@ -202,7 +202,13 @@ add_capabilities: [] drop_capabilities: [] hyperparameters: {} - log_policies: [] + log_policies: + - pattern: "*" + actions: + - signal: "*" + - pattern: "*" + actions: + - signal: "*" labels: [] max_restarts: 5 min_checkpoint_period: diff --git a/schemas/test_cases/v0/log_policies.yaml b/schemas/test_cases/v0/log_policies.yaml index 2e33ea24ddd..9c4169d88a9 100644 --- a/schemas/test_cases/v0/log_policies.yaml +++ b/schemas/test_cases/v0/log_policies.yaml @@ -11,7 +11,7 @@ - name: log policy is invalid with both action and actions sanity_errors: http://determined.ai/schemas/expconf/v0/log-policy.json: - - ": both the depreated \"action\" field and the newer \"actions\" field are detected" + - "require either \"pattern\" with the depreated \"action\" field or \"pattern\" with the newer \"actions\" field" case: pattern: a actions: @@ -85,16 +85,19 @@ - pattern: a actions: - exclude_node + - signal: sig1 merge_src: - pattern: a actions: - cancel_retries - exclude_node + - signal: sig2 merged: - pattern: a actions: - cancel_retries - exclude_node + - signal: sig1 - name: user can override default values sane_as: @@ -104,6 +107,7 @@ case: [] defaulted: [] + - name: log action cancel_retries sane_as: - http://determined.ai/schemas/expconf/v0/log-legacy-action-cancel-retries.json @@ -139,7 +143,7 @@ actions: - exclude_node -- name: legacy log policies merging +- name: legacy log policies merging merge_as: http://determined.ai/schemas/expconf/v0/log-policies.json case: - pattern: b From efa27ec79e8a1dc21e01a5c1233c7ef198ecafbe Mon Sep 17 00:00:00 2001 From: Jerry Gong Date: Sat, 19 Oct 2024 00:36:39 -0400 Subject: [PATCH 17/27] clean up --- master/pkg/schemas/expconf/const.go | 8 -------- 1 file changed, 8 deletions(-) diff --git a/master/pkg/schemas/expconf/const.go b/master/pkg/schemas/expconf/const.go index e2ea45cd140..77b73cd0132 100644 --- a/master/pkg/schemas/expconf/const.go +++ b/master/pkg/schemas/expconf/const.go @@ -12,11 +12,3 @@ const ( CUDAImage = "determinedai/pytorch-ngc-dev:0736b6d" ROCMImage = "determinedai/environments:rocm-5.6-pytorch-1.3-tf-2.10-rocm-mpich-0736b6d" ) - -// Default log policy values. -const ( - ECCErrorPattern = ".*uncorrectable ECC error encountered.*" - ECCErrorSignal = "ECC Error" - CUDAOOMPattern = ".*CUDA out of memory.*" - CUDAOOMSignal = "CUDA OOM" -) From 531b78ec3a1e148665679093b8f92137ce600794 Mon Sep 17 00:00:00 2001 From: Jerry Gong Date: Sat, 19 Oct 2024 01:11:34 -0400 Subject: [PATCH 18/27] update tests after rebase --- .../configpolicy/postgres_task_config_policy.go | 15 ++++++++++++++- .../configpolicy/task_config_policy_intg_test.go | 15 ++++++++++++++- 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/master/internal/configpolicy/postgres_task_config_policy.go b/master/internal/configpolicy/postgres_task_config_policy.go index dea01bf7e42..496585811c8 100644 --- a/master/internal/configpolicy/postgres_task_config_policy.go +++ b/master/internal/configpolicy/postgres_task_config_policy.go @@ -22,7 +22,20 @@ const ( "resources": {"slots": 4, "max_slots": 8}, "log_policies": [ { - "pattern": "nonrepeat" + "pattern": ".*CUDA out of memory.*", + "actions": [ + { + "signal": "CUDA OOM" + } + ] + }, + { + "pattern": ".*uncorrectable ECC error encountered.*", + "actions": [ + { + "signal": "ECC Error" + } + ] } ] }` diff --git a/master/internal/configpolicy/task_config_policy_intg_test.go b/master/internal/configpolicy/task_config_policy_intg_test.go index ae19901868e..b0c5391e793 100644 --- a/master/internal/configpolicy/task_config_policy_intg_test.go +++ b/master/internal/configpolicy/task_config_policy_intg_test.go @@ -473,7 +473,20 @@ func TestMergeWithInvariantExperimentConfigs(t *testing.T) { ], "log_policies": [ { - "pattern": "nonrepeat" + "pattern": ".*CUDA out of memory.*", + "actions": [ + { + "signal": "CUDA OOM" + } + ] + }, + { + "pattern": ".*uncorrectable ECC error encountered.*", + "actions": [ + { + "signal": "ECC Error" + } + ] } ] }` From a5285b773dfa1bf88691e0a3ca32a2506bec3909 Mon Sep 17 00:00:00 2001 From: Jerry Gong Date: Sat, 19 Oct 2024 09:53:27 -0400 Subject: [PATCH 19/27] bug fix --- master/pkg/schemas/expconf/log_pattern_config.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/master/pkg/schemas/expconf/log_pattern_config.go b/master/pkg/schemas/expconf/log_pattern_config.go index 4e886257d07..6a5e0ebf7be 100644 --- a/master/pkg/schemas/expconf/log_pattern_config.go +++ b/master/pkg/schemas/expconf/log_pattern_config.go @@ -22,6 +22,10 @@ type LogPoliciesConfigV0 []LogPolicyV0 func (b LogPoliciesConfigV0) Merge( src LogPoliciesConfigV0, ) LogPoliciesConfigV0 { + // src is nil and b is an empty slice. + if src == nil && b != nil && len(b) == 0 { + return make(LogPoliciesConfigV0, 0) + } var out LogPoliciesConfigV0 patternTosrcLp := make(map[string]LogPolicyV0) From 0075d869ba1bcd81ff8205d815df1bd47a2d6bb6 Mon Sep 17 00:00:00 2001 From: Jerry Gong Date: Sun, 20 Oct 2024 18:30:24 -0400 Subject: [PATCH 20/27] comments --- .../pkg/schemas/expconf/log_pattern_config.go | 24 +++++++++++-------- master/pkg/schemas/zgen_schemas.go | 1 - schemas/expconf/v0/experiment.json | 1 - schemas/test_cases/v0/misc.yaml | 1 + 4 files changed, 15 insertions(+), 12 deletions(-) diff --git a/master/pkg/schemas/expconf/log_pattern_config.go b/master/pkg/schemas/expconf/log_pattern_config.go index 6a5e0ebf7be..86294f61a0f 100644 --- a/master/pkg/schemas/expconf/log_pattern_config.go +++ b/master/pkg/schemas/expconf/log_pattern_config.go @@ -59,23 +59,27 @@ func (b LogPoliciesConfigV0) Merge( // can be associated with a pattern. For example: // // - pattern: a -// action: cancel_retries +// action: +// type: cancel_retries // - pattern: a -// action: exclude_node +// action: +// type: exclude_node // // All legacy policies become mordern policies after shimming: // - pattern: a -// actions: [cancel_retries] +// actions: +// - cancel_retries // - pattern: a -// actions: [exclude_node] +// actions: +// - exclude_node // // The modern log policy has an actions field. Multiple entires can be associated with a pattern. No more // duplicated pattern in log policies config. For example the policies above will be combined into: // // - pattern: a // actions: -// - cancel_retries -// - exclude_node +// - cancel_retries +// - exclude_node func (b *LogPoliciesConfigV0) UnmarshalJSON(data []byte) error { // jsonItems may have duplicated patterns after applying shim to the legacy policy. type DefaultParser LogPoliciesConfigV0 @@ -100,7 +104,7 @@ func (b *LogPoliciesConfigV0) UnmarshalJSON(data []byte) error { patternToLp[pattern] = LogPolicyV0{RawPattern: pattern, RawActions: mergedActions} } - // if the input data is [] and we use `var temp LogPolicies`, function return will return nil + // Can't use `var temp LogPolicies`. If the input data is [], function will return nil temp := make(LogPoliciesConfigV0, 0) for _, lp := range patternToLp { temp = append(temp, lp) @@ -111,7 +115,7 @@ func (b *LogPoliciesConfigV0) UnmarshalJSON(data []byte) error { return nil } -// Sort LogPolicyV0 by pattern so the output is in deterministic state. Testing will be easier. +// Sort LogPoliciesConfigV0 by pattern so the output is in deterministic state. Testing will be easier. func (b LogPoliciesConfigV0) sort() { sort.Slice(b, func(i, j int) bool { return b[i].RawPattern < b[j].RawPattern @@ -210,8 +214,8 @@ func (b *LogPolicyV0) UnmarshalJSON(data []byte) error { return nil } -// Merge LogActionsV0. The value of LogActionTypeSignal from other takes precedence. -// Union merge the other LogAction types. +// Merge LogActionsV0. The value of LogActionTypeSignal from l takes precedence. +// Union merge other LogAction types from l and src. func (l LogActionsV0) merge(src LogActionsV0) LogActionsV0 { // Store unique actions except signal, and find source signal. actions := set.New[LogActionV0]() diff --git a/master/pkg/schemas/zgen_schemas.go b/master/pkg/schemas/zgen_schemas.go index 18432310d00..1a27e7ae2b8 100644 --- a/master/pkg/schemas/zgen_schemas.go +++ b/master/pkg/schemas/zgen_schemas.go @@ -848,7 +848,6 @@ var ( "array", "null" ], - "$comment": "If ther's no default field or the default is null, LogPoliciesConfigV0 WithDefaults() won't be called.", "default": [ { "pattern": ".*CUDA out of memory.*", diff --git a/schemas/expconf/v0/experiment.json b/schemas/expconf/v0/experiment.json index ee47c584b68..e863ff7b1b3 100644 --- a/schemas/expconf/v0/experiment.json +++ b/schemas/expconf/v0/experiment.json @@ -121,7 +121,6 @@ "array", "null" ], - "$comment": "If ther's no default field or the default is null, LogPoliciesConfigV0 WithDefaults() won't be called.", "default": [ { "pattern": ".*CUDA out of memory.*", diff --git a/schemas/test_cases/v0/misc.yaml b/schemas/test_cases/v0/misc.yaml index 51c386a902e..0dbfef58eea 100644 --- a/schemas/test_cases/v0/misc.yaml +++ b/schemas/test_cases/v0/misc.yaml @@ -331,3 +331,4 @@ - ".shm_size: must be a valid memory size" case: shm_size: 1 i + From ef6d5f361655e61f8834d2a93310dc00823f4194 Mon Sep 17 00:00:00 2001 From: Jerry Gong Date: Wed, 23 Oct 2024 12:49:59 -0400 Subject: [PATCH 21/27] Change Log Signal feature behaviors --- e2e_tests/tests/cluster/test_log_policies.py | 63 +-- harness/determined/common/api/bindings.py | 32 +- master/internal/api_experiment.go | 3 +- master/internal/api_runs.go | 2 +- master/internal/api_tasks_intg_test.go | 29 +- .../postgres_task_config_policy.go | 15 +- .../task_config_policy_intg_test.go | 21 +- .../db/postgres_experiments_intg_test.go | 18 +- master/internal/logpattern/logpattern.go | 89 ++-- master/internal/run/postgres_run_intg_test.go | 2 +- master/pkg/model/experiment.go | 2 +- master/pkg/model/task.go | 4 +- .../pkg/model/task_container_defaults_test.go | 25 +- master/pkg/schemas/expconf/const.go | 8 + .../pkg/schemas/expconf/log_pattern_config.go | 386 ++++++++---------- .../pkg/schemas/expconf/zgen_log_policy_v0.go | 20 +- master/pkg/schemas/zgen_schemas.go | 130 +----- ...experiment-add-log-signal-labels.tx.up.sql | 2 - ...023105802_add-log-policy-matched.tx.up.sql | 5 + master/static/srv/proto_get_trials_plus.sql | 2 +- .../static/views_and_triggers/up/metrics.sql | 2 +- proto/buf.image.bin | Bin 669818 -> 673209 bytes proto/pkg/runv1/run.pb.go | 347 ++++++++-------- proto/pkg/trialv1/trial.pb.go | 368 ++++++++--------- proto/src/determined/run/v1/run.proto | 4 +- proto/src/determined/trial/v1/trial.proto | 4 +- schemas/expconf/v0/experiment.json | 20 +- schemas/expconf/v0/log-action.json | 20 +- schemas/expconf/v0/log-legacy-action.json | 32 -- schemas/expconf/v0/log-policy.json | 33 +- schemas/test_cases/v0/experiment.yaml | 18 +- schemas/test_cases/v0/log_policies.yaml | 250 +++++++----- webui/react/src/services/api-ts-sdk/api.ts | 8 +- 33 files changed, 854 insertions(+), 1110 deletions(-) delete mode 100644 master/static/migrations/20240916170654_experiment-add-log-signal-labels.tx.up.sql create mode 100644 master/static/migrations/20241023105802_add-log-policy-matched.tx.up.sql delete mode 100644 schemas/expconf/v0/log-legacy-action.json diff --git a/e2e_tests/tests/cluster/test_log_policies.py b/e2e_tests/tests/cluster/test_log_policies.py index 87f374a4917..a9921bad969 100644 --- a/e2e_tests/tests/cluster/test_log_policies.py +++ b/e2e_tests/tests/cluster/test_log_policies.py @@ -157,15 +157,15 @@ def test_log_policy_exclude_slurm(should_match: bool) -> None: @pytest.mark.e2e_cpu @pytest.mark.parametrize("should_match", [True, False]) -def test_log_signal(should_match: bool) -> None: +def test_log_policy_matched(should_match: bool) -> None: sess = api_utils.user_session() regex = r"executing.*action.*exit.*code.*7" if not should_match: regex = r"(.*) this should not match (.*)" - expected_signal = "Test Signal" + expected_policy = "Test" config = { - "log_policies": [{"pattern": regex, "actions": [{"signal": expected_signal}]}], + "log_policies": [{"name": expected_policy, "pattern": regex}], "max_restarts": 1, } @@ -173,59 +173,14 @@ def test_log_signal(should_match: bool) -> None: assert exp_ref.wait(interval=0.01) == client.ExperimentState.ERROR searchRes = utils.get_run_by_exp_id(sess, exp_ref.id) - runSignal = searchRes.runs[0].logSignal + runPolicyMatched = searchRes.runs[0].logPolicyMatched trialRes = bindings.get_GetTrial(sess, trialId=searchRes.runs[0].id) - trialSignal = trialRes.trial.logSignal + trialPolicyMatched = trialRes.trial.logPolicyMatched if should_match: - assert runSignal == expected_signal - assert trialSignal == expected_signal + assert runPolicyMatched == expected_policy + assert trialPolicyMatched == expected_policy else: - assert runSignal is None - assert trialSignal is None - - -@pytest.mark.e2e_cpu -def test_signal_clear_after_exp_continue() -> None: - sess = api_utils.user_session() - regex = r"executing.*action.*exit.*code.*7" - - expected_signal = "Test Signal" - config = { - "log_policies": [{"pattern": regex, "actions": [{"signal": expected_signal}]}], - "max_restarts": 0, - } - - exp_ref = noop.create_experiment(sess, [noop.Exit(7)], config=config) - assert exp_ref.wait(interval=0.01) == client.ExperimentState.ERROR - - searchRes = utils.get_run_by_exp_id(sess, exp_ref.id) - runSignal = searchRes.runs[0].logSignal - - trialRes = bindings.get_GetTrial(sess, trialId=searchRes.runs[0].id) - trialSignal = trialRes.trial.logSignal - - assert runSignal == expected_signal - assert trialSignal == expected_signal - - detproc.check_call( - sess, - [ - "det", - "e", - "continue", - str(exp_ref.id), - *noop.cli_config_overrides([noop.Exit(0)]), - ], - ) - exp.wait_for_experiment_state(sess, exp_ref.id, bindings.experimentv1State.COMPLETED) - - searchRes = utils.get_run_by_exp_id(sess, exp_ref.id) - runSignal = searchRes.runs[0].logSignal - - trialRes = bindings.get_GetTrial(sess, trialId=searchRes.runs[0].id) - trialSignal = trialRes.trial.logSignal - - assert runSignal is None - assert trialSignal is None + assert runPolicyMatched is None + assert trialPolicyMatched is None diff --git a/harness/determined/common/api/bindings.py b/harness/determined/common/api/bindings.py index 9121c2a8937..662ce773fc2 100644 --- a/harness/determined/common/api/bindings.py +++ b/harness/determined/common/api/bindings.py @@ -726,8 +726,8 @@ class trialv1Trial(Printable): checkpointCount: "typing.Optional[int]" = None endTime: "typing.Optional[str]" = None latestValidation: "typing.Optional[v1MetricsWorkload]" = None + logPolicyMatched: "typing.Optional[str]" = None logRetentionDays: "typing.Optional[int]" = None - logSignal: "typing.Optional[str]" = None metadata: "typing.Optional[typing.Dict[str, typing.Any]]" = None runnerState: "typing.Optional[str]" = None searcherMetricValue: "typing.Optional[float]" = None @@ -753,8 +753,8 @@ def __init__( checkpointCount: "typing.Union[int, None, Unset]" = _unset, endTime: "typing.Union[str, None, Unset]" = _unset, latestValidation: "typing.Union[v1MetricsWorkload, None, Unset]" = _unset, + logPolicyMatched: "typing.Union[str, None, Unset]" = _unset, logRetentionDays: "typing.Union[int, None, Unset]" = _unset, - logSignal: "typing.Union[str, None, Unset]" = _unset, metadata: "typing.Union[typing.Dict[str, typing.Any], None, Unset]" = _unset, runnerState: "typing.Union[str, None, Unset]" = _unset, searcherMetricValue: "typing.Union[float, None, Unset]" = _unset, @@ -782,10 +782,10 @@ def __init__( self.endTime = endTime if not isinstance(latestValidation, Unset): self.latestValidation = latestValidation + if not isinstance(logPolicyMatched, Unset): + self.logPolicyMatched = logPolicyMatched if not isinstance(logRetentionDays, Unset): self.logRetentionDays = logRetentionDays - if not isinstance(logSignal, Unset): - self.logSignal = logSignal if not isinstance(metadata, Unset): self.metadata = metadata if not isinstance(runnerState, Unset): @@ -826,10 +826,10 @@ def from_json(cls, obj: Json) -> "trialv1Trial": kwargs["endTime"] = obj["endTime"] if "latestValidation" in obj: kwargs["latestValidation"] = v1MetricsWorkload.from_json(obj["latestValidation"]) if obj["latestValidation"] is not None else None + if "logPolicyMatched" in obj: + kwargs["logPolicyMatched"] = obj["logPolicyMatched"] if "logRetentionDays" in obj: kwargs["logRetentionDays"] = obj["logRetentionDays"] - if "logSignal" in obj: - kwargs["logSignal"] = obj["logSignal"] if "metadata" in obj: kwargs["metadata"] = obj["metadata"] if "runnerState" in obj: @@ -870,10 +870,10 @@ def to_json(self, omit_unset: bool = False) -> typing.Dict[str, typing.Any]: out["endTime"] = self.endTime if not omit_unset or "latestValidation" in vars(self): out["latestValidation"] = None if self.latestValidation is None else self.latestValidation.to_json(omit_unset) + if not omit_unset or "logPolicyMatched" in vars(self): + out["logPolicyMatched"] = self.logPolicyMatched if not omit_unset or "logRetentionDays" in vars(self): out["logRetentionDays"] = self.logRetentionDays - if not omit_unset or "logSignal" in vars(self): - out["logSignal"] = self.logSignal if not omit_unset or "metadata" in vars(self): out["metadata"] = self.metadata if not omit_unset or "runnerState" in vars(self): @@ -4589,7 +4589,7 @@ class v1FlatRun(Printable): hyperparameters: "typing.Optional[typing.Dict[str, typing.Any]]" = None labels: "typing.Optional[typing.Sequence[str]]" = None localId: "typing.Optional[str]" = None - logSignal: "typing.Optional[str]" = None + logPolicyMatched: "typing.Optional[str]" = None metadata: "typing.Optional[typing.Dict[str, typing.Any]]" = None searcherMetricValue: "typing.Optional[float]" = None summaryMetrics: "typing.Optional[typing.Dict[str, typing.Any]]" = None @@ -4616,7 +4616,7 @@ def __init__( hyperparameters: "typing.Union[typing.Dict[str, typing.Any], None, Unset]" = _unset, labels: "typing.Union[typing.Sequence[str], None, Unset]" = _unset, localId: "typing.Union[str, None, Unset]" = _unset, - logSignal: "typing.Union[str, None, Unset]" = _unset, + logPolicyMatched: "typing.Union[str, None, Unset]" = _unset, metadata: "typing.Union[typing.Dict[str, typing.Any], None, Unset]" = _unset, searcherMetricValue: "typing.Union[float, None, Unset]" = _unset, summaryMetrics: "typing.Union[typing.Dict[str, typing.Any], None, Unset]" = _unset, @@ -4647,8 +4647,8 @@ def __init__( self.labels = labels if not isinstance(localId, Unset): self.localId = localId - if not isinstance(logSignal, Unset): - self.logSignal = logSignal + if not isinstance(logPolicyMatched, Unset): + self.logPolicyMatched = logPolicyMatched if not isinstance(metadata, Unset): self.metadata = metadata if not isinstance(searcherMetricValue, Unset): @@ -4687,8 +4687,8 @@ def from_json(cls, obj: Json) -> "v1FlatRun": kwargs["labels"] = obj["labels"] if "localId" in obj: kwargs["localId"] = obj["localId"] - if "logSignal" in obj: - kwargs["logSignal"] = obj["logSignal"] + if "logPolicyMatched" in obj: + kwargs["logPolicyMatched"] = obj["logPolicyMatched"] if "metadata" in obj: kwargs["metadata"] = obj["metadata"] if "searcherMetricValue" in obj: @@ -4727,8 +4727,8 @@ def to_json(self, omit_unset: bool = False) -> typing.Dict[str, typing.Any]: out["labels"] = self.labels if not omit_unset or "localId" in vars(self): out["localId"] = self.localId - if not omit_unset or "logSignal" in vars(self): - out["logSignal"] = self.logSignal + if not omit_unset or "logPolicyMatched" in vars(self): + out["logPolicyMatched"] = self.logPolicyMatched if not omit_unset or "metadata" in vars(self): out["metadata"] = self.metadata if not omit_unset or "searcherMetricValue" in vars(self): diff --git a/master/internal/api_experiment.go b/master/internal/api_experiment.go index 7dc7eabcfae..9fe3daa6c81 100644 --- a/master/internal/api_experiment.go +++ b/master/internal/api_experiment.go @@ -1616,10 +1616,9 @@ func (a *apiServer) ContinueExperiment( if _, err := tx.NewUpdate().Table("runs"). // TODO(nick-runs) call runs package. Set("restarts = 0"). Set("end_time = null"). - Set("log_signal = null"). Where("id IN (?)", bun.In(trialIDs)). Exec(ctx); err != nil { - return fmt.Errorf("zeroing out trial stats: %w", err) + return fmt.Errorf("zeroing out trial restarts: %w", err) } } diff --git a/master/internal/api_runs.go b/master/internal/api_runs.go index 7f6322b2958..807540947fc 100644 --- a/master/internal/api_runs.go +++ b/master/internal/api_runs.go @@ -170,7 +170,7 @@ func getRunsColumns(q *bun.SelectQuery) *bun.SelectQuery { 'pachyderm_integration', NULLIF(e.config#>'{integrations,pachyderm}', 'null'), 'id', e.id) AS experiment`). ColumnExpr("rm.metadata AS metadata"). - ColumnExpr("r.log_signal AS log_signal"). + ColumnExpr("r.log_policy_matched AS log_policy_matched"). Join("LEFT JOIN experiments AS e ON r.experiment_id=e.id"). Join("LEFT JOIN runs_metadata AS rm ON r.id=rm.run_id"). Join("LEFT JOIN users u ON e.owner_id = u.id"). diff --git a/master/internal/api_tasks_intg_test.go b/master/internal/api_tasks_intg_test.go index 8757e0f7070..157d95edf4d 100644 --- a/master/internal/api_tasks_intg_test.go +++ b/master/internal/api_tasks_intg_test.go @@ -220,12 +220,12 @@ func TestPostTaskLogsLogPattern(t *testing.T) { require.NoError(t, err) activeConfig.RawLogPolicies = expconf.LogPoliciesConfig{ expconf.LogPolicy{ - RawPattern: "sub", - RawActions: expconf.LogActionsV0{expconf.LogActionV0{Type: expconf.LogActionTypeCancelRetries}}, + RawPattern: ptrs.Ptr("sub"), + RawAction: &expconf.LogActionV0{Type: expconf.LogActionTypeCancelRetries}, }, expconf.LogPolicy{ - RawPattern: `\d{5}$`, - RawActions: expconf.LogActionsV0{expconf.LogActionV0{Type: expconf.LogActionTypeExcludeNode}}, + RawPattern: ptrs.Ptr(`\d{5}$`), + RawAction: &expconf.LogActionV0{Type: expconf.LogActionTypeExcludeNode}, }, } @@ -453,11 +453,10 @@ func TestPostTaskLogsLogSignalDataSaving(t *testing.T) { activeConfig, err := api.m.db.ActiveExperimentConfig(trial.ExperimentID) require.NoError(t, err) - signal := "sub" activeConfig.RawLogPolicies = expconf.LogPoliciesConfig{ expconf.LogPolicy{ - RawPattern: "sub", - RawActions: expconf.LogActionsV0{expconf.LogActionV0{Type: expconf.LogActionTypeSignal, Signal: &signal}}, + RawName: ptrs.Ptr("test"), + RawPattern: ptrs.Ptr("sub"), }, } @@ -490,8 +489,8 @@ func TestPostTaskLogsLogSignalDataSaving(t *testing.T) { require.NoError(t, err) runsOut := struct { - bun.BaseModel `bun:"table:runs"` - LogSignal *string `db:"log_signal"` + bun.BaseModel `bun:"table:runs"` + LogPolicyMatched *string `db:"log_policy_matched"` }{} err = db.Bun().NewSelect().Model(&runsOut). @@ -499,13 +498,13 @@ func TestPostTaskLogsLogSignalDataSaving(t *testing.T) { Scan(ctx) require.NoError(t, err) require.NotNil(t, runsOut) - require.NotNil(t, runsOut.LogSignal) + require.NotNil(t, runsOut.LogPolicyMatched) - require.Equal(t, "sub", *runsOut.LogSignal) + require.Equal(t, "test", *runsOut.LogPolicyMatched) tasksOut := struct { - bun.BaseModel `bun:"table:tasks"` - LogSignal *string `db:"log_signal"` + bun.BaseModel `bun:"table:tasks"` + LogPolicyMatched *string `db:"log_policy_matched"` }{} err = db.Bun().NewSelect().Model(&tasksOut). Join("LEFT JOIN run_id_task_id AS rt on tasks.task_id = rt.task_id"). @@ -513,7 +512,7 @@ func TestPostTaskLogsLogSignalDataSaving(t *testing.T) { Scan(ctx) require.NoError(t, err) require.NotNil(t, tasksOut) - require.NotNil(t, tasksOut.LogSignal) + require.NotNil(t, tasksOut.LogPolicyMatched) - require.Equal(t, "sub", *tasksOut.LogSignal) + require.Equal(t, "test", *tasksOut.LogPolicyMatched) } diff --git a/master/internal/configpolicy/postgres_task_config_policy.go b/master/internal/configpolicy/postgres_task_config_policy.go index 496585811c8..dea01bf7e42 100644 --- a/master/internal/configpolicy/postgres_task_config_policy.go +++ b/master/internal/configpolicy/postgres_task_config_policy.go @@ -22,20 +22,7 @@ const ( "resources": {"slots": 4, "max_slots": 8}, "log_policies": [ { - "pattern": ".*CUDA out of memory.*", - "actions": [ - { - "signal": "CUDA OOM" - } - ] - }, - { - "pattern": ".*uncorrectable ECC error encountered.*", - "actions": [ - { - "signal": "ECC Error" - } - ] + "pattern": "nonrepeat" } ] }` diff --git a/master/internal/configpolicy/task_config_policy_intg_test.go b/master/internal/configpolicy/task_config_policy_intg_test.go index b0c5391e793..faf063cf0f0 100644 --- a/master/internal/configpolicy/task_config_policy_intg_test.go +++ b/master/internal/configpolicy/task_config_policy_intg_test.go @@ -472,23 +472,10 @@ func TestMergeWithInvariantExperimentConfigs(t *testing.T) { } ], "log_policies": [ - { - "pattern": ".*CUDA out of memory.*", - "actions": [ - { - "signal": "CUDA OOM" - } - ] - }, - { - "pattern": ".*uncorrectable ECC error encountered.*", - "actions": [ - { - "signal": "ECC Error" - } - ] - } - ] + { + "pattern": "nonrepeat" + } + ] }` var defaultInvariantConfig expconf.ExperimentConfigV0 diff --git a/master/internal/db/postgres_experiments_intg_test.go b/master/internal/db/postgres_experiments_intg_test.go index 97c36d09db8..68cab82424c 100644 --- a/master/internal/db/postgres_experiments_intg_test.go +++ b/master/internal/db/postgres_experiments_intg_test.go @@ -436,16 +436,14 @@ func TestActiveLogPatternPolicies(t *testing.T) { policies, err := ActiveLogPolicies(ctx, exp.ID) require.NoError(t, err) require.NotEmpty(t, policies) - eccErrorSignal := "ECC Error" - cudaOOMSignal := "CUDA OOM" expected := expconf.LogPoliciesConfig{ expconf.LogPolicy{ - RawPattern: ".*CUDA out of memory.*", - RawActions: expconf.LogActionsV0{expconf.LogActionV0{Type: expconf.LogActionTypeSignal, Signal: &cudaOOMSignal}}, + RawName: ptrs.Ptr(expconf.CUDAOOM), + RawPattern: ptrs.Ptr(expconf.CUDAOOMPattern), }, expconf.LogPolicy{ - RawPattern: ".*uncorrectable ECC error encountered.*", - RawActions: expconf.LogActionsV0{expconf.LogActionV0{Type: expconf.LogActionTypeSignal, Signal: &eccErrorSignal}}, + RawName: ptrs.Ptr(expconf.ECCError), + RawPattern: ptrs.Ptr(expconf.ECCErrorPattern), }, } @@ -455,12 +453,12 @@ func TestActiveLogPatternPolicies(t *testing.T) { require.NoError(t, err) activeConfig.RawLogPolicies = expconf.LogPoliciesConfig{ expconf.LogPolicy{ - RawPattern: `\d{5}$`, - RawActions: expconf.LogActionsV0{expconf.LogActionV0{Type: expconf.LogActionTypeExcludeNode}}, + RawPattern: ptrs.Ptr(`\d{5}$`), + RawAction: &expconf.LogActionV0{Type: expconf.LogActionTypeExcludeNode}, }, expconf.LogPolicy{ - RawPattern: "sub", - RawActions: expconf.LogActionsV0{expconf.LogActionV0{Type: expconf.LogActionTypeCancelRetries}}, + RawPattern: ptrs.Ptr("sub"), + RawAction: &expconf.LogActionV0{Type: expconf.LogActionTypeCancelRetries}, }, } diff --git a/master/internal/logpattern/logpattern.go b/master/internal/logpattern/logpattern.go index 8788d6f47cf..45f06a2eb92 100644 --- a/master/internal/logpattern/logpattern.go +++ b/master/internal/logpattern/logpattern.go @@ -59,7 +59,11 @@ func (l *LogPatternPolicies) monitor(ctx context.Context, ) error { // TODO when we add rm specific log grabbing we will need to also monitor them. for _, policy := range policies { - compiledRegex, err := l.getCompiledRegex(policy.Pattern()) + pattern := policy.Pattern() + if pattern == nil { + continue + } + compiledRegex, err := l.getCompiledRegex(*pattern) if err != nil { return err } @@ -76,50 +80,49 @@ func (l *LogPatternPolicies) monitor(ctx context.Context, } if compiledRegex.MatchString(log.Log) { - for _, a := range policy.Actions() { - switch a.Type { + if policy.Action() != nil { + switch policy.Action().Type { case expconf.LogActionTypeCancelRetries: if err := addDontRetry( - ctx, model.TaskID(log.TaskID), *log.AgentID, policy.Pattern(), log.Log, + ctx, model.TaskID(log.TaskID), *log.AgentID, *pattern, log.Log, ); err != nil { return fmt.Errorf("adding don't retry: %w", err) } case expconf.LogActionTypeExcludeNode: if err := addRetryOnDifferentNode( - ctx, model.TaskID(log.TaskID), *log.AgentID, policy.Pattern(), log.Log, + ctx, model.TaskID(log.TaskID), *log.AgentID, *pattern, log.Log, ); err != nil { return fmt.Errorf("adding retry on different node: %w", err) } - case expconf.LogActionTypeSignal: - signal := a.Signal - if signal != nil { - err = db.Bun().RunInTx(ctx, nil, func(ctx context.Context, tx bun.Tx) error { - if _, err := tx.NewUpdate().Model(&model.Task{}). - Set("log_signal = ?", signal). - Where("task_id = ?", log.TaskID). - Exec(ctx); err != nil { - return fmt.Errorf("updating log signal of task %s: %w", log.TaskID, err) - } - if _, err := tx.NewUpdate().Model(&model.Run{}). - Table("run_id_task_id"). - Set("log_signal = ?", signal). - Where("run.id = run_id_task_id.run_id"). - Where("run_id_task_id.task_id = ?", log.TaskID). - Exec(ctx); err != nil { - return fmt.Errorf("updating log signal of task %s: %w", log.TaskID, err) - } - - return nil - }) - if err != nil { - return fmt.Errorf("updating log signal: %w", err) - } - } default: return fmt.Errorf("unrecognized log pattern policy type") } } + + if policy.Name() != nil { + err = db.Bun().RunInTx(ctx, nil, func(ctx context.Context, tx bun.Tx) error { + if _, err := tx.NewUpdate().Model(&model.Task{}). + Set("log_policy_matched = ?", *policy.Name()). + Where("task_id = ?", log.TaskID). + Exec(ctx); err != nil { + return fmt.Errorf("updating log signal of task %s: %w", log.TaskID, err) + } + if _, err := tx.NewUpdate().Model(&model.Run{}). + Table("run_id_task_id"). + Set("log_policy_matched = ?", *policy.Name()). + Where("run.id = run_id_task_id.run_id"). + Where("run_id_task_id.task_id = ?", log.TaskID). + Exec(ctx); err != nil { + return fmt.Errorf("updating log signal of task %s: %w", log.TaskID, err) + } + + return nil + }) + if err != nil { + return fmt.Errorf("updating log_policy_matched: %w", err) + } + } } } } @@ -257,29 +260,3 @@ func TaskLogsFromDontRetryTriggers(taskID model.TaskID, t []DontRetryTrigger) [] return taskLogs } - -// ClearSignal resets the log_signal. -func ClearSignal(ctx context.Context, taskID model.TaskID) error { - if err := db.Bun().RunInTx(ctx, nil, func(ctx context.Context, tx bun.Tx) error { - if _, err := tx.NewUpdate().Model(&model.Task{}). - Set("log_signal = null"). - Where("task_id = ?", taskID). - Exec(ctx); err != nil { - return fmt.Errorf("resetting log signal of task %s: %w", taskID, err) - } - if _, err := tx.NewUpdate().Model(&model.Run{}). - Table("run_id_task_id"). - Set("log_signal = null"). - Where("run.id = run_id_task_id.run_id"). - Where("run_id_task_id.task_id = ?", taskID). - Exec(ctx); err != nil { - return fmt.Errorf("resetting log signal of task %s: %w", taskID, err) - } - - return nil - }); err != nil { - return fmt.Errorf("resetting log signal: %w", err) - } - - return nil -} diff --git a/master/internal/run/postgres_run_intg_test.go b/master/internal/run/postgres_run_intg_test.go index 36df531103b..634d044f488 100644 --- a/master/internal/run/postgres_run_intg_test.go +++ b/master/internal/run/postgres_run_intg_test.go @@ -29,7 +29,7 @@ func TestMigrateTrials(t *testing.T) { } // get all trial info, excluding additional fields added after the transition to runs. require.NoError(t, db.Bun().NewSelect().Table("trials"). - ColumnExpr("to_jsonb(trials.*)-'metadata'-'log_signal' AS trial_data"). + ColumnExpr("to_jsonb(trials.*)-'metadata'-'log_policy_matched' AS trial_data"). Order("id"). Scan(ctx, ¤tTrialsViewData), ) diff --git a/master/pkg/model/experiment.go b/master/pkg/model/experiment.go index a14ef5f9a9e..85b9d9ae89d 100644 --- a/master/pkg/model/experiment.go +++ b/master/pkg/model/experiment.go @@ -522,7 +522,7 @@ type Run struct { LogRetentionDays *int16 `db:"log_retention_days"` Metadata map[string]any `db:"metadata" bun:"metadata,scanonly"` LocalID int `db:"local_id"` - LogSignal *string `db:"log_signal"` + LogPolicyMatched *string `db:"log_policy_matched"` } // RunTaskID represents a row from the `run_id_task_id` table. diff --git a/master/pkg/model/task.go b/master/pkg/model/task.go index 5a87070a4d2..4152ee909b5 100644 --- a/master/pkg/model/task.go +++ b/master/pkg/model/task.go @@ -91,8 +91,8 @@ type Task struct { Config *string `db:"config"` - NoPause *bool `db:"no_pause"` - LogSignal *string `db:"log_signal"` + NoPause *bool `db:"no_pause"` + LogPolicyMatched *string `db:"log_policy_matched"` } // AllocationID is the ID of an allocation of a task. It is usually of the form diff --git a/master/pkg/model/task_container_defaults_test.go b/master/pkg/model/task_container_defaults_test.go index 31d73f4a55e..28d0e8ec915 100644 --- a/master/pkg/model/task_container_defaults_test.go +++ b/master/pkg/model/task_container_defaults_test.go @@ -436,12 +436,12 @@ func TestTaskContainerDefaultsConfigMerging(t *testing.T) { } } -func TestLogPatternUnmarshal(t *testing.T) { +func TestLegacyLogPatternUnmarshal(t *testing.T) { var tcd TaskContainerDefaultsConfig require.NoError(t, json.Unmarshal([]byte(string(`{ "log_policies": [ - {"pattern": "test", "actions": ["exclude_node"]}, - {"pattern": "test2", "actions": ["cancel_retries"]} + {"pattern": "test", "action": {"type": "exclude_node"}}, + {"pattern": "test2", "action": {"type": "cancel_retries"}} ] }`)), &tcd)) @@ -451,18 +451,29 @@ func TestLogPatternUnmarshal(t *testing.T) { PreemptionTimeout: DefaultPreemptionTimeout, LogPolicies: expconf.LogPoliciesConfig{ expconf.LogPolicy{ - RawPattern: "test", - RawActions: expconf.LogActionsV0{expconf.LogActionV0{Type: expconf.LogActionTypeExcludeNode}}, + RawPattern: ptrs.Ptr("test"), + RawAction: &expconf.LogActionV0{Type: expconf.LogActionTypeExcludeNode}, }, expconf.LogPolicy{ - RawPattern: "test2", - RawActions: expconf.LogActionsV0{expconf.LogActionV0{Type: expconf.LogActionTypeCancelRetries}}, + RawPattern: ptrs.Ptr("test2"), + RawAction: &expconf.LogActionV0{Type: expconf.LogActionTypeCancelRetries}, }, }, } require.Equal(t, expected, tcd) } +func TestLogPatternUnmarshal(t *testing.T) { + var tcd TaskContainerDefaultsConfig + err := json.Unmarshal([]byte(string(`{ + "log_policies": [ + {"name": "policy name", "pattern": "a", "action": "exclude_node"}, + {"name": "policy name", "pattern": "b", "action": "exclude_node"} + ] + }`)), &tcd) + require.ErrorContains(t, err, "log_policies have duplicated names \"policy name\" but different patterns") +} + func TestPodSpecsDefaultMerging(t *testing.T) { defaults := &TaskContainerDefaultsConfig{ CPUPodSpec: &k8sV1.Pod{ diff --git a/master/pkg/schemas/expconf/const.go b/master/pkg/schemas/expconf/const.go index 77b73cd0132..48c3d0a20ec 100644 --- a/master/pkg/schemas/expconf/const.go +++ b/master/pkg/schemas/expconf/const.go @@ -12,3 +12,11 @@ const ( CUDAImage = "determinedai/pytorch-ngc-dev:0736b6d" ROCMImage = "determinedai/environments:rocm-5.6-pytorch-1.3-tf-2.10-rocm-mpich-0736b6d" ) + +// Default log policies values. +const ( + CUDAOOM = "CUDA OOM" + CUDAOOMPattern = ".*CUDA out of memory.*" + ECCError = "ECC Error" + ECCErrorPattern = ".*uncorrectable ECC error encountered.*" +) diff --git a/master/pkg/schemas/expconf/log_pattern_config.go b/master/pkg/schemas/expconf/log_pattern_config.go index 86294f61a0f..3d05e9282bd 100644 --- a/master/pkg/schemas/expconf/log_pattern_config.go +++ b/master/pkg/schemas/expconf/log_pattern_config.go @@ -5,8 +5,11 @@ import ( "fmt" "sort" + log "github.com/sirupsen/logrus" + "github.com/pkg/errors" + "github.com/determined-ai/determined/master/pkg/ptrs" "github.com/determined-ai/determined/master/pkg/schemas" "github.com/determined-ai/determined/master/pkg/set" ) @@ -16,9 +19,31 @@ import ( //go:generate ../gen.sh type LogPoliciesConfigV0 []LogPolicyV0 +// WithDefaults implements the Defaultable pseudo-interface. +func (b LogPoliciesConfigV0) WithDefaults() LogPoliciesConfigV0 { + defaultPolicies := LogPoliciesConfigV0{ + LogPolicyV0{ + RawName: ptrs.Ptr(CUDAOOM), + RawPattern: ptrs.Ptr(CUDAOOMPattern), + }, + LogPolicyV0{ + RawName: ptrs.Ptr(ECCError), + RawPattern: ptrs.Ptr(ECCErrorPattern), + }, + } + + return schemas.Merge(b, defaultPolicies) +} + // Merge implements the Mergable pseudo-interface. -// We appends all LogPolicyV0s to the output slice, but if there are any with the same pattern, we merge -// their actions and save them as one LogPolicyV0. +// Union merge log policies. +// Special cases: +// 1. We may see policies with different names same patterns. We keep both of them, but which name +// will be shown in the UI is undefined. +// 2. There could be policies with the same name different patterns. We save the one with the higher priority. +// Unsetting default values depends on this behavior. +// 2.1 Polices that don't have name but have same patterns is a special case. These are legacy +// policies. func (b LogPoliciesConfigV0) Merge( src LogPoliciesConfigV0, ) LogPoliciesConfigV0 { @@ -27,231 +52,152 @@ func (b LogPoliciesConfigV0) Merge( return make(LogPoliciesConfigV0, 0) } var out LogPoliciesConfigV0 + var policiesWoName LogPoliciesConfigV0 + seenPolicies := set.New[string]() + nameToLp := make(map[string]LogPolicyV0) - patternTosrcLp := make(map[string]LogPolicyV0) for _, lp := range src { - patternTosrcLp[lp.RawPattern] = lp + if lp.RawName == nil { + policiesWoName.appendLegacyPolicies(lp, seenPolicies) + continue + } + nameToLp[*lp.RawName] = lp } for _, otherLp := range b { - pattern := otherLp.RawPattern - if srcLp, ok := patternTosrcLp[pattern]; ok { - // Merge actions of two LogPolicies if they have the same pattern. - patternTosrcLp[pattern] = LogPolicyV0{ - RawPattern: pattern, - RawActions: otherLp.RawActions.merge(srcLp.RawActions), - } + if otherLp.RawName == nil { + policiesWoName.appendLegacyPolicies(otherLp, seenPolicies) + continue + } + name := *otherLp.RawName + if srcLp, ok := nameToLp[name]; ok { + // Merge two LogPolicies if they have the same name. + nameToLp[name] = schemas.Merge(otherLp, srcLp) } else { - // Source LogPoliciesConfig doesn't have this pattern. - patternTosrcLp[pattern] = otherLp + nameToLp[name] = otherLp } } - for _, lp := range patternTosrcLp { + for _, lp := range nameToLp { out = append(out, lp) } + out = append(out, policiesWoName...) out.sort() return out } +// Only keep unique legacy policies. +func (b *LogPoliciesConfigV0) appendLegacyPolicies(policy LogPolicyV0, seenPolicies set.Set[string]) { + // policy without name is legacy policy. + if policy.RawName == nil { + json, err := json.Marshal(policy) + if err != nil { + log.Errorf("marshaling error %+v %v", policy, err) + } + if seenPolicies.Contains(string(json)) { + return + } + seenPolicies.Insert(string(json)) + *b = append(*b, policy) + } +} + // UnmarshalJSON implements the json.Unmarshaler interface. -// The legacy log policies config allows entries with duplicated pattern because only single action -// can be associated with a pattern. For example: -// -// - pattern: a -// action: -// type: cancel_retries -// - pattern: a -// action: -// type: exclude_node -// -// All legacy policies become mordern policies after shimming: -// - pattern: a -// actions: -// - cancel_retries -// - pattern: a -// actions: -// - exclude_node -// -// The modern log policy has an actions field. Multiple entires can be associated with a pattern. No more -// duplicated pattern in log policies config. For example the policies above will be combined into: -// -// - pattern: a -// actions: -// - cancel_retries -// - exclude_node +// Special cases: +// 1. User may submit policies that have different names but same patterns. We keep both of them, but which name +// will be shown in the UI is undefined. +// 2. User can't submit policies that have the same names but different patterns. func (b *LogPoliciesConfigV0) UnmarshalJSON(data []byte) error { - // jsonItems may have duplicated patterns after applying shim to the legacy policy. type DefaultParser LogPoliciesConfigV0 var jsonItems DefaultParser if err := json.Unmarshal(data, &jsonItems); err != nil { return errors.Wrapf(err, "failed to parse runtime items") } - if jsonItems == nil { - return nil - } - // Merge LogPolicyV0s with the same pattern into one. - patternToLp := make(map[string]LogPolicyV0) + // Detect policies that have the same names but different patterns + names := set.New[string]() for _, jsonItem := range jsonItems { - pattern := jsonItem.RawPattern - if _, ok := patternToLp[pattern]; !ok { - patternToLp[pattern] = jsonItem + n := jsonItem.RawName + if n == nil { continue } - mergedActions := patternToLp[pattern].RawActions.merge(jsonItem.RawActions) - patternToLp[pattern] = LogPolicyV0{RawPattern: pattern, RawActions: mergedActions} + if names.Contains(*n) { + return fmt.Errorf("log_policies have duplicated names %q but different patterns", *n) + } + names.Insert(*n) } - // Can't use `var temp LogPolicies`. If the input data is [], function will return nil - temp := make(LogPoliciesConfigV0, 0) - for _, lp := range patternToLp { - temp = append(temp, lp) - } - temp.sort() - *b = temp + out := LogPoliciesConfig(jsonItems) + out.sort() + *b = out return nil } -// Sort LogPoliciesConfigV0 by pattern so the output is in deterministic state. Testing will be easier. +// Sort LogPoliciesConfigV0 so the output is in deterministic state. Testing will be easier. func (b LogPoliciesConfigV0) sort() { sort.Slice(b, func(i, j int) bool { - return b[i].RawPattern < b[j].RawPattern + // Sort policies by name first. + var iName string + var jName string + if b[i].RawName != nil { + iName = *b[i].RawName + } + if b[j].RawName != nil { + jName = *b[j].RawName + } + + // Sort policies by pattern if their names are the same. + if iName == jName { + var iPattern string + var jPattern string + if b[i].RawPattern != nil { + iPattern = *b[i].RawPattern + } + if b[j].RawPattern != nil { + jPattern = *b[j].RawPattern + } + // Sort by action if patterns and names are the same. + if iPattern == jPattern { + var iAction string + var jAction string + if b[i].RawAction != nil { + iAction = string(b[i].RawAction.Type) + } + if b[j].RawAction != nil { + jAction = string(b[j].RawAction.Type) + } + return iAction < jAction + } + + return iPattern < jPattern + } + + return iName < jName }) } -// LogActionsV0 is a list of log actions. -type LogActionsV0 []LogActionV0 - // LogPolicyV0 is an action to take if we match against trial logs. // //go:generate ../gen.sh type LogPolicyV0 struct { - RawPattern string `json:"pattern"` - RawActions LogActionsV0 `json:"actions,omitempty"` -} - -// UnmarshalJSON implements the json.Unmarshaler interface. -// It applies shim to the legacy policy. -// For the modern policy, if a user provides multiple LogActionTypeSignal, only -// the last one will be stored. As for the other action types, the unique ones -// will be stored. For example: -// -// actions: -// - cancel_retries -// - exclude_node -// - cancel_retries -// - signal: a -// - signal: b -// -// We store: -// -// []RawActionV0{ -// {Type: LogActionTypeCancelRetries}, -// {Type: LogActionTypeExcludeNode}, -// {Type: LogActionTypeSignal, Signal: "b"} -// } -func (b *LogPolicyV0) UnmarshalJSON(data []byte) error { - // First, check if we can unmarshal the log policy item as a legacy item. - type LegacyAction struct { - Type LogActionType `json:"type"` - } - - type LegacyPolicy struct { - Pattern string `json:"pattern"` - Action *LegacyAction `json:"action"` - } - - var legacy LegacyPolicy - err := json.Unmarshal(data, &legacy) - - // For this to be a valid LegacyPolicy, the Action must have been provided. - if err == nil && legacy.Action != nil { - // Apply shim to bring legacy policy into the current format. - var action LogActionV0 - switch legacy.Action.Type { - case LogActionTypeCancelRetries: - action = LogActionV0{Type: LogActionTypeCancelRetries} - case LogActionTypeExcludeNode: - action = LogActionV0{Type: LogActionTypeExcludeNode} - default: - return fmt.Errorf("unregonized legacy action type: %s, data: %q", legacy.Action.Type, string(data)) - } - *b = LogPolicyV0{ - RawPattern: legacy.Pattern, - RawActions: []LogActionV0{action}, - } - return nil - } - // Modern policy doesn't need shimming. - type DefaultParser *LogPolicyV0 - var lp LogPolicyV0 - if err := json.Unmarshal(data, DefaultParser(&lp)); err != nil { - return fmt.Errorf("failed to parse LogPolicyV0: %w, data: %q", err, string(data)) - } - - // Get the last LogActionTypeSignal, and get the unique values of the other types. - var signal *LogActionV0 - otherActions := set.New[LogActionV0]() - for _, a := range lp.RawActions { - if a.Type == LogActionTypeSignal { - signal = &a - } else { - otherActions.Insert(a) - } - } - - // Prepare output. - if signal != nil { - b.RawActions = []LogActionV0{*signal} - } - b.RawActions = append(b.RawActions, otherActions.ToSlice()...) - b.RawActions.sort() - b.RawPattern = lp.RawPattern - - return nil + // Legacy log policy doesn't have a name. Legacy log policy will be deprecated. + RawName *string `json:"name,omitempty"` + // Pattern can be nil. So user can override it to disable the default log polices. + RawPattern *string `json:"pattern,omitempty"` + RawAction *LogActionV0 `json:"action,omitempty"` } -// Merge LogActionsV0. The value of LogActionTypeSignal from l takes precedence. -// Union merge other LogAction types from l and src. -func (l LogActionsV0) merge(src LogActionsV0) LogActionsV0 { - // Store unique actions except signal, and find source signal. - actions := set.New[LogActionV0]() - var srcSignal *LogActionV0 - for _, a := range src { - if a.Type == LogActionTypeSignal { - srcSignal = &a - continue - } - actions.Insert(a) +// Merge LogPolicyV0. +func (l LogPolicyV0) Merge(src LogPolicyV0) LogPolicyV0 { + // Merging only applies to the LogActionV0 with the same name. + if src.RawName == nil || l.RawName == nil || *src.RawName != *l.RawName { + log.Errorf("the names of %+v and %+v are not the same", l, src) + return src } - // Store unique actions except signal, and find other signal. - var otherSignal *LogActionV0 - for _, a := range l { - if a.Type == LogActionTypeSignal { - otherSignal = &a - continue - } - actions.Insert(a) - } - // Other signal takes precedence. - if otherSignal != nil || srcSignal != nil { - actions.Insert(*schemas.Merge(otherSignal, srcSignal)) - } - - out := LogActionsV0(actions.ToSlice()) - out.sort() - return out -} - -// Sort LogActionsV0 by type so the output is in deterministic state. Testing will be easier. -func (l LogActionsV0) sort() { - sort.Slice(l, func(i, j int) bool { - return l[i].Type < l[j].Type - }) + return l } // LogActionType is the type of an action. @@ -261,56 +207,58 @@ type LogActionType string const ( LogActionTypeCancelRetries LogActionType = "cancel_retries" LogActionTypeExcludeNode LogActionType = "exclude_node" - LogActionTypeSignal LogActionType = "signal" ) // LogActionV0 is a policy to take after matching. type LogActionV0 struct { Type LogActionType - - // Only used by the "signal" action. - Signal *string -} - -// MarshalJSON implements the json.Marshaler interface. -func (s LogActionV0) MarshalJSON() ([]byte, error) { - switch s.Type { - case LogActionTypeSignal: - return json.Marshal(struct { - Signal *string `json:"signal"` - }{Signal: s.Signal}) - case LogActionTypeCancelRetries: - return json.Marshal(LogActionTypeCancelRetries) - case LogActionTypeExcludeNode: - return json.Marshal(LogActionTypeExcludeNode) - } - return nil, fmt.Errorf("failed to marshal LogActionV0: %+v", s) } // UnmarshalJSON implements the json.Unmarshaler interface. +// It applies a shim to the legacy actions. For example we have a legacy action: +// +// action: +// type: cancel_retries +// +// It will become: +// +// action: cancel_retries func (s *LogActionV0) UnmarshalJSON(data []byte) error { - var action string - // err is not nil means input data is not cancel_retries or exclude_node. - if err := json.Unmarshal(data, &action); err == nil { - // Handle all the types beside signal - switch LogActionType(action) { - case LogActionTypeCancelRetries: - *s = LogActionV0{Type: LogActionTypeCancelRetries} - return nil - case LogActionTypeExcludeNode: - *s = LogActionV0{Type: LogActionTypeExcludeNode} - return nil + // First, check if we can unmarshal the log policy item as a legacy item. + type LegacyAction struct { + Type LogActionType `json:"type"` + } + + var legacy LegacyAction + err := json.Unmarshal(data, &legacy) + if err == nil { + // Apply shim to bring legacy policy into the current format. + switch legacy.Type { + case LogActionTypeCancelRetries, LogActionTypeExcludeNode: + *s = LogActionV0(legacy) + default: + return fmt.Errorf("unrecognized legacy action type: %s, data: %q", legacy.Type, string(data)) } + return nil } - // Handle Signal - temp := struct { - Signal *string `json:"signal"` - }{} - if err := json.Unmarshal(data, &temp); err != nil || temp.Signal == nil { - return fmt.Errorf("failed to unmarshal log action: %w, data: %q", err, string(data)) + // It is not a legacy item. Try to unmarshal it as a modern item. + var lat LogActionType + if err := json.Unmarshal(data, &lat); err == nil { + switch lat { + case LogActionTypeCancelRetries, LogActionTypeExcludeNode: + *s = LogActionV0{Type: lat} + return nil + } } - *s = LogActionV0{Type: LogActionTypeSignal, Signal: temp.Signal} - return nil + return fmt.Errorf("failed to unmarshal log action: %w, data: %q", err, string(data)) +} + +// MarshalJSON implements the json.Marshaler interface. +func (s LogActionV0) MarshalJSON() ([]byte, error) { + if s.Type == LogActionTypeCancelRetries || s.Type == LogActionTypeExcludeNode { + return json.Marshal(s.Type) + } + return nil, fmt.Errorf("failed to marshal LogActionV0: %+v", s) } diff --git a/master/pkg/schemas/expconf/zgen_log_policy_v0.go b/master/pkg/schemas/expconf/zgen_log_policy_v0.go index b774f07e918..73034950a8c 100644 --- a/master/pkg/schemas/expconf/zgen_log_policy_v0.go +++ b/master/pkg/schemas/expconf/zgen_log_policy_v0.go @@ -8,20 +8,28 @@ import ( "github.com/determined-ai/determined/master/pkg/schemas" ) -func (l LogPolicyV0) Pattern() string { +func (l LogPolicyV0) Name() *string { + return l.RawName +} + +func (l *LogPolicyV0) SetName(val *string) { + l.RawName = val +} + +func (l LogPolicyV0) Pattern() *string { return l.RawPattern } -func (l *LogPolicyV0) SetPattern(val string) { +func (l *LogPolicyV0) SetPattern(val *string) { l.RawPattern = val } -func (l LogPolicyV0) Actions() LogActionsV0 { - return l.RawActions +func (l LogPolicyV0) Action() *LogActionV0 { + return l.RawAction } -func (l *LogPolicyV0) SetActions(val LogActionsV0) { - l.RawActions = val +func (l *LogPolicyV0) SetAction(val *LogActionV0) { + l.RawAction = val } func (l LogPolicyV0) ParsedSchema() interface{} { diff --git a/master/pkg/schemas/zgen_schemas.go b/master/pkg/schemas/zgen_schemas.go index 1a27e7ae2b8..a467a7d320f 100644 --- a/master/pkg/schemas/zgen_schemas.go +++ b/master/pkg/schemas/zgen_schemas.go @@ -848,24 +848,8 @@ var ( "array", "null" ], - "default": [ - { - "pattern": ".*CUDA out of memory.*", - "actions": [ - { - "signal": "CUDA OOM" - } - ] - }, - { - "pattern": ".*uncorrectable ECC error encountered.*", - "actions": [ - { - "signal": "ECC Error" - } - ] - } - ], + "$comment": "setting default to [] lets the actual default always comes from WithDefaults()", + "default": [], "optionalRef": "http://determined.ai/schemas/expconf/v0/log-policies.json" }, "retention_policy": { @@ -1516,18 +1500,8 @@ var ( "$id": "http://determined.ai/schemas/expconf/v0/log-action.json", "title": "LogAction", "union": { - "defaultMessage": "expect one of the followings: cancel_retries, exclude_node, or an object with a single \"signal\" field", + "defaultMessage": "expect one of the followings: cancel_retries, exclude_node, or an legacy object where object[\"type\"] is one of 'cancel_retries' or 'exclude_node'", "items": [ - { - "unionKey": "singleproperty:signal", - "properties": { - "signal": { - "type": "string" - } - }, - "type": "object", - "additionalProperties": false - }, { "unionKey": "never", "const": "cancel_retries" @@ -1535,6 +1509,14 @@ var ( { "unionKey": "never", "const": "exclude_node" + }, + { + "unionKey": "const:type=cancel_retries", + "$ref": "http://determined.ai/schemas/expconf/v0/log-legacy-action-cancel-retries.json" + }, + { + "unionKey": "const:type=exclude_node", + "$ref": "http://determined.ai/schemas/expconf/v0/log-legacy-action-exclude-node.json" } ] } @@ -1573,39 +1555,6 @@ var ( } } } -`) - textLogLegacyActionV0 = []byte(`{ - "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "http://determined.ai/schemas/expconf/v0/log-legacy-action.json", - "title": "LogLegacyAction", - "if": { - "required": [ - "type" - ] - }, - "then": { - "union": { - "defaultMessage": "is not an object where object[\"type\"] is one of 'cancel_retries' or 'exclude_node'", - "items": [ - { - "unionKey": "const:type=cancel_retries", - "$ref": "http://determined.ai/schemas/expconf/v0/log-legacy-action-cancel-retries.json" - }, - { - "unionKey": "const:type=exclude_node", - "$ref": "http://determined.ai/schemas/expconf/v0/log-legacy-action-exclude-node.json" - } - ] - } - }, - "additionalProperties": false, - "eventuallyRequired": [ - "type" - ], - "properties": { - "type": true - } -} `) textLogPoliciesConfigV0 = []byte(`{ "$schema": "http://json-schema.org/draft-07/schema#", @@ -1624,50 +1573,31 @@ var ( "additionalProperties": false, "type": "object", "properties": { - "pattern": { - "$comment": "default doesn't apply here because pattern is a required field", + "name": { "type": [ "string", "null" ], + "$comment": "Comment can be found in type LogPolicyV0", "default": null }, - "actions": { + "pattern": { "type": [ - "array", + "string", "null" ], - "items": { - "$ref": "http://determined.ai/schemas/expconf/v0/log-action.json" - }, + "$comment": "Comment can be found in type LogPolicyV0", "default": null }, "action": { "type": [ + "string", "object", "null" ], - "optionalRef": "http://determined.ai/schemas/expconf/v0/log-legacy-action.json", + "optionalRef": "http://determined.ai/schemas/expconf/v0/log-action.json", "default": null } - }, - "checks": { - "require either \"pattern\" with the depreated \"action\" field or \"pattern\" with the newer \"actions\" field": { - "oneOf": [ - { - "required": [ - "pattern", - "actions" - ] - }, - { - "required": [ - "pattern", - "action" - ] - } - ] - } } } `) @@ -3542,8 +3472,6 @@ var ( schemaLogLegacyActionExcludeNodeV0 interface{} - schemaLogLegacyActionV0 interface{} - schemaLogPoliciesConfigV0 interface{} schemaLogPolicyV0 interface{} @@ -4241,26 +4169,6 @@ func ParsedLogLegacyActionExcludeNodeV0() interface{} { return schemaLogLegacyActionExcludeNodeV0 } -func ParsedLogLegacyActionV0() interface{} { - cacheLock.RLock() - if schemaLogLegacyActionV0 != nil { - cacheLock.RUnlock() - return schemaLogLegacyActionV0 - } - cacheLock.RUnlock() - - cacheLock.Lock() - defer cacheLock.Unlock() - if schemaLogLegacyActionV0 != nil { - return schemaLogLegacyActionV0 - } - err := json.Unmarshal(textLogLegacyActionV0, &schemaLogLegacyActionV0) - if err != nil { - panic("invalid embedded json for LogLegacyActionV0") - } - return schemaLogLegacyActionV0 -} - func ParsedLogPoliciesConfigV0() interface{} { cacheLock.RLock() if schemaLogPoliciesConfigV0 != nil { @@ -5038,8 +4946,6 @@ func schemaBytesMap() map[string][]byte { cachedSchemaBytesMap[url] = textLogLegacyActionCancelRetriesV0 url = "http://determined.ai/schemas/expconf/v0/log-legacy-action-exclude-node.json" cachedSchemaBytesMap[url] = textLogLegacyActionExcludeNodeV0 - url = "http://determined.ai/schemas/expconf/v0/log-legacy-action.json" - cachedSchemaBytesMap[url] = textLogLegacyActionV0 url = "http://determined.ai/schemas/expconf/v0/log-policies.json" cachedSchemaBytesMap[url] = textLogPoliciesConfigV0 url = "http://determined.ai/schemas/expconf/v0/log-policy.json" diff --git a/master/static/migrations/20240916170654_experiment-add-log-signal-labels.tx.up.sql b/master/static/migrations/20240916170654_experiment-add-log-signal-labels.tx.up.sql deleted file mode 100644 index 7112682ef4e..00000000000 --- a/master/static/migrations/20240916170654_experiment-add-log-signal-labels.tx.up.sql +++ /dev/null @@ -1,2 +0,0 @@ -ALTER table public.tasks - ADD COLUMN log_signal text; \ No newline at end of file diff --git a/master/static/migrations/20241023105802_add-log-policy-matched.tx.up.sql b/master/static/migrations/20241023105802_add-log-policy-matched.tx.up.sql new file mode 100644 index 00000000000..3899b4291fb --- /dev/null +++ b/master/static/migrations/20241023105802_add-log-policy-matched.tx.up.sql @@ -0,0 +1,5 @@ +ALTER table public.tasks + ADD COLUMN log_policy_matched text; + +ALTER TABLE public.runs + RENAME COLUMN log_signal TO log_policy_matched; \ No newline at end of file diff --git a/master/static/srv/proto_get_trials_plus.sql b/master/static/srv/proto_get_trials_plus.sql index 7e89acc8c8b..b38eb65a39a 100644 --- a/master/static/srv/proto_get_trials_plus.sql +++ b/master/static/srv/proto_get_trials_plus.sql @@ -113,7 +113,7 @@ SELECT t.start_time, t.end_time, t.hparams, - t.log_signal, + t.log_policy_matched, new_ckpt.uuid AS warm_start_checkpoint_uuid, ( SELECT tt.task_id FROM run_id_task_id tt diff --git a/master/static/views_and_triggers/up/metrics.sql b/master/static/views_and_triggers/up/metrics.sql index 023af9cf5b4..ca68ea108a1 100644 --- a/master/static/views_and_triggers/up/metrics.sql +++ b/master/static/views_and_triggers/up/metrics.sql @@ -24,7 +24,7 @@ CREATE VIEW trials AS r.experiment_id, r.warm_start_checkpoint_id, r.runner_state, - r.log_signal, + r.log_policy_matched, rm.metadata AS metadata FROM trials_v2 t JOIN runs r ON t.run_id = r.id diff --git a/proto/buf.image.bin b/proto/buf.image.bin index 7110db10061d3296ad615fdfcb33a09741ea200c..b731e53fa1db1edab57ed514cf1a70fd4e6ac1e0 100644 GIT binary patch delta 19246 zcmZ{s2bfjW)%Q7P_MDk}n0wFd7lw90+R&?jjfjmTSYi^BH}O#kOcVo*0mPK|&7~?G z1ZC5k6zQFzH$egEAX23%g7hXG-v8Qr-*x@+e#!IX3HyK7S?jmg?)#j{?Mu~{^)Kkw z%UWdDeZd-ET<-;YOqMmRc-yb+*G1j>Zn%BXo3Fo9)V^JtzrWt0b(c186}Bx^Drr^s zwX|Oj;_8(pAM)4!9?W*xg54l3vc04RbSayy4 zSh7{on@Q~}nRb?X&#n`!?E2kovdkjWY6Tyxde6pxv+!S0yz0H|FFv%+7Wei!t+TAH z#oy;SJG1@+bV5Pr{$J?gzn6B(TJmbhDf^$$%Y~h&&o};AHz%B3qQrUmVfXBq+_}hU zB&YAs@`{I+abjM@N~n>A8fE>1E2Q(5G^QQ20-mqC|9$gd*X&!}w6+FhNR zEg%X~UDZ8M%0{VVFkOP{S3_meC3Kxv%U5ulDrflVHMB}i)hgsH_u;e?3r=3MbIfBF zS>}e*B{+F47I%}%i5<>*Lq7Ryr>yi|ahylrU=ABD-Y6L|57}_>Mz~bE1Q&0VDeF@6 zqdT=J(GJZoyI07H$hY5f+(&-e*tvMJO>TMB6?2iCw)UTOa&lF2RPj-u=f#2Yxt7!_9 z;G-*=q5-P!hO_@sqPr#6{@tl9-}rAw)c8kk1(kTr8seY!|L6p`Dgmna7?@ooKy;H& zrktivQ_c5rN2R4`` z`dMfR0zeTF`dMfSQv^>x3vJO@ImC|xb`S$BG=#Q9L!gM3Fhx8#z(P}0 zR1ex=G038}C?#L&?g%;lUyk$mpbP~X1O{jk{WMwxMKp;i!r34TZPFN7$qPF}tr8<; zz5jM<$+d9SFb~#4t=x*R&JuaZ$}r3R*kmP7WyJ{8IC+$n5!5(O$YwLM!!>nFK!%qQ z>6SndF-K5Kv_wpQ*coG$m?lg9kMmSRKMcoMxn)&2Kf;Z*?C~a?UxhOQO{#thH%WH? zALscN_&55(q;#!OM6D;KbB`h%PfF(=MG#D)+&`uK;K#`__%EkGqH3L9tpPEiPEa7KrI_)UJ&s0i#eWV}|c4k;5=F1cRa%wdU zz;K3z37Rq%Q(dPcBbK!7r%h@^qe2 z#KX(ec}5XYmQ$Xe0jx(kSZQGuXd>s2&yLG)x;a%FL4^y*y0@dKtW0l55iVAyGwxBw zS7$OVo`QmgU!4J>ysu8@9lyfc>U7>wgtyg{cbbBNVQ0NnVzZRpF$G;Z>WI4QElk3c z_#hJhnPqP@i4Us88-WrZq?Bw@iEo5|17TAd0YzkfQ#$h~0%21+^C$vg(+6#OII>NT zoVV?9&5L*R$oW}ri%LUnH`%K7yB*uS^_#+uor^mCsi<|Qw*R*3hs(ImpWC(XQrM}K zrM8MCUN+@EmL-qhu=)t2ZmJw`<$E zl#|S=p8c{;27~B0 zG{-wVtr9~lEB`4uWNA(;)g@E0@ncJFlOK@lz2n>tSog@PYHuw>DQoQh4gGYjqhfo1Ed)m0t& z1FLie6N)E3s2;Cb%cZpp?GzRRE!k^i&hsy5XSBT? zm>!Y%9ZwIma--&H+P@F95~bYc>S@>xrj@Y1X%W+;Nu*i+^;M_xbFQ6@2(U?KIa@tq z)tBamQhb5)<`+`zp~5K8JTWd ztk9&U+ZIJUGLqW1wt56@JKB6F)4EGx z+eea57bQCub;0N=EIzm_=fsVy4+lE(jSe`r?U|oDI?LX6uK1nBxf5HLUuC(a|Ks=$ z%ZgibqjGi497m3e`$~rXZ63;RBzN7mORL|SWPZ~}s!ENAKIyu-KktZXs$=W+J@$M% z%&t_aT;101w5!{-eqBA=6n0*c@C9B74Y%BYItHc>IC8?d+_{!o_}^H~Ked>>Hwo}3$YK5J% zdG@uuto&b?Nfr~$Pp6|l7j;S&bu3Kce>xO(${d@_JoTq{+y5oqv-R9UlSsgIlyfxV z**vELm(c;v<|TiIa!MPZSrRp{eb^9^TzK^<{Rn=$`|ZAdx%jZJdg>$c(cSNqHqU%SKDzt!8*v#vdicE>rjj37sANyS z=U`?nc%ml`Lpu#<&({GB699`zCcP6)r1TcCOj%Lm829+$WTe5n&=mwFclO{WmHY2 z>?ZrgRHlMLR7Pu{s76qVru*&r^39{ZTB#8PGu`iaCMCE}qjE6GWx!1L*L=*RXavE` z@Qc|D14Y9aRl_K|IesyZVW1F<(P}7|kp#2QZ?BN0kNGO6MiR_IzY{dUF|SiOREf)g zS?I6b+`x<^m_>e3%rHdZ$Nv6l26JG$I*pW z`JJ$dK8k2o`6E@i44PH``p+1eQAD%aFV->*6is7PO{47A`Nd~U1BGaeR$0-E2F&OF zl)Vi!k>qbX(iK>{u7#K!I~27oY@eRuMpMc@_d8Jog$0euu^L(V9Inh|z#Q<`ZDBGqmS7J0#bJhlqG61xVU*nwzc|V;Pzc6o)m37~(cnMfx6h#Y z60&uEb~Ke)m&Os=iA9NC4HeV`YQ1ZI`>x#nt*>@!0>NDKJ2BH*6A0#-KVFN=fVuAX zs+!iDKrlc0#Z88RqG61xVU*o1zqrFNPzc6oEfmZ|YQ6h@`~5&xetlWCdUh!}_B&tl z`H4hz-|xf?#YCdI?@!d>GN>N-y=sPPB2jebqmNvr+hU7dLo>Jczpu) zhw`aYmB1jhvwm7~?U445T>hAj=uRDvlEh-C}| zMZ*|X!zjDu0kM){pb(7F>Zrs_Bbc=Td$Zhj20O%Q1hY24yUb|>vo^rH%xTo0)&;!U z24)(;tPhBd3(_5PA~^DBW*gt91Msf3a_3SNAWhT&0pRrYS*z#dLJ>9VqXe46;1!dT_%+)JE?oK4Am-?InH#ut2z9-d7w?|akPZ0xe1(4wAevadM@C`dG=Jf>!RE zJn76OxbYddxdb=fi`O)8a|v#OCnhl*6b)xo4X5lr@x&B{gQDP!_=bX;2e=uYy+Cfi zLAXWDYmtROZahBe8fLyNecoAPfF4uUxh%X=)Ydx=;+(a&(P*e-yVvA?*mTzA5H7!_3 zF1C2QqAesBTRdLT7LtoEJ+G1cRtD zU=g|c$}8KH%W$>N^V0ov5oP;pPaNQEL($nbs?N5uJLrkSoNXvHHjNgjqb^p+OdRn< zIlQ*Z!F37B6^-zks`z41ANTAtvf_2$?^BD3_PFPy4}Ob@_PF=ZvxastRs4kKH8dS} zG0~p%#CJ>!MbjEp(<-}Do;c04P>9xO`hH~z%_QeM`>K3!-B*xWLNMntePap1oXhl$ zB?NQM<8$B=f;sPriwpxr!x&Y=D7#CZxWX_{2*zlSsl*f$%yrMcC+Ga=dpK20FxNdN z-(;egV6J;*9_BJ&e)PQbkS->e8=knuFiS)0bN@z290!Bz=NB^(`ZkKEZ}9W%e!Ke~Dp1d$e4M zr&7zQ3d4dtq2m=Am3dlPP7e+X@&S4|RpFzckX!{Qx(Y_sRZw;#f?^a`0SZ;YXu1k3 zf~dmSpgkp+g|C?C!w^~FrZ16NK~!UdeEeBKRAYmD{8>R%P&ABDHH@-b5)?}r1`5F#?ID$!)p!Z(tO(lc(O%)?OYJ(! ziZ^|+)M|oS5#;3quRW>E%g1VhS`p+aVl_dn42snZ1w}&{RYNJeH9@hCp`Z|y(W)q@ zHI$(ZLHkR2=Z>#JY7N0`$n@wn1hXO2qt_73hD?uMLogeI;tPg>qG61xVU*pbpxDAN zPzc6o`W(NOV73QwB{bnK&hcvrW_ysAu(brUJ;+PgT7uaT7DDz?dhQSfvXEeSJ$Yzy2|csP@L!LLZRv!t+VO~pHc3w1noQW=zU*@ z)Mo^9CFs;O!SR|rEUpHf^jh&5!CVV^KQl0&5zO_VxWO<`G>lO-jIz5K6t@`$3c(mn z9}YHz0CPWR_X**eDcPc3$F`m2_!n~@epzowHW1wXpp(8j*+6g)f=*)tyMf@kg}n4# z?*@YF9umDm>7@gThU+fNSZ=LA6P1IFh}u775BqqWTfbl<1?eAh(pR<{DMjD?+E;-F9xjaFN+d_fmc6GQf=NMc#_4k7hUr=qtD+t62# zI+0%x;KUGLntwrn6GMDy{sjR}4DqG;7X&ydBqlQ;6b)!p4XEsnfm{t5H|JwvfRg0n+ax7h{yP5f>{*eF}|5#7KeC@Zzh-}A+eNUplBGQ zY8Yj=EF@Mi3>1Pfnw|)^5X|b3{keQzxV2^ZY&ZAm^wrcBB3T{cS#S%HtO@ZfxP?g8 zhQxX%fuc#)%D1!8+4W0;FA-vM$lf(P+by5^lB&Bo#24OQQjpCdzVQB%f@}`)h4+^f zWJ^eF;~-FU5TokqD!c6=v6HI{g{o^b{kCf>Rd-LwJ|rh+yHyId63m_uZ>zQv%$^W$ ztF{u%o>1doO6g$a>4j_?TpS76r{s%{TOqZLTpY>trETQmNTx4sBNyLf`qDP4&(V-L z!S#Wn>tj@1A7ytkB);SNK%x2=O{aD{?aWSx>`NGVc>nTF+tzYoj+>u49pY7fI{}>z z@hZQafKG>amETT4XF}o}13}S1M%6&d?tDmGWFRO6WHb%513XtkwtY*E$U$rGpzK@; z@mYTd!CVRPhIa?STnX{kdk4W>4T0cM{Frkhsq@P&AEEHI1@+5E9+P=~)sA(HKqd zS9ek0>lL;K$m6+g#ndi>=@sVv>Mnxm73TfwE`sSD=Kbm}f_Wdu0|e1`GEg*(Q8kRR z>x%`1VW1F<(e$|AO)!JP_Hfz9=hjZ`CYV8C9+kTZW>A<%$1_L7T9VV>dll8Z@Up5gbBi%-Hl!|$c~OvZVE>jOpC z$EdnK%I;Gv7F-`FR3D@1_at9YeddJi#d20jw{okm2xd+uxnB{?oJ?}RBA7Xu~+KZ|Qzosg02-{ob?L7DA&Auj>4Vfx`O)wiWRsNb_ zHe{;&HNkAemyI$RaAQq3A(oD=e%OrrBaRM*q8m-mI{OiPN7&vcH~W!^{SKuxgfr1amTM zpOc>F)=M2En3G|?mpMo-n%jk6oN6D zo{kR@%*C+%qul7Z6-Vwk^sIYcm*uv{<<6b*Apo(;OS zyhl}?4o4Nz(=nL zrs^so_X&xwiT`F6|8MZa;D{~dnUGsS&Ir3rUlYFM4IKeE4papf1;{>xXA{H|Y z6b)n4pV%#lh@}hzJAyIVlM3bp!K{qf8|Cq+`)KL}!K{q%OnHJ}Rz@P}neqg|tc>tX zd4gb8MZ{W$fudoIs$rDf`iR)TFi;4_X!`Q}B*AQo*t_JUnA=_uG7J=gF<&i6VTOT1Fh={Cg87bM zjzw_2ULoN=Qt%zY9ECE`qUNwlrMmD}nFf;zq=NAZM3ypG|#FE^cH-;`ij@MrI^_ zPcCj?B$||bPbs+>5qCHxP;^R+s#Buu?ncCYP6-rBiP3(is&krB(j#j3mwzwq{wj5v zV0uLPtazGWdPMoGc$#2(M)|CGnqYcG#Rt*!6a_`Y7*)e4yFO9T4|aNrfr+L9Qy-B86D-1V}GD1W1?c5tWXv^iE#*~ z>!^Mh9~Be1j!>wMM$_NaoJH^{QG1S@QO12Nb(Vrp$>jSi1)q}1_gM-)HIwhN6nt7# z%-||Q(N#36W>I!CqhdC*Kp_^R=}U`ql<)abd%0|17Pl7X2xfi;<{ZJy&%m4`m<1V_ za|E+6DwZ$|6b)lk4WsOeqhcAuKp_~THBvqCJi)Ar;-a8@Ik!&gJi)Ar^6GM)U{*zW zbvaKktE0TSoF|wyQL&z3plBGQY8YkrSyXIf7$^i|G(BxyAehZjyn)x6wae-jAMEOPB1%ml9Dz-BW6b)lk4WsOKM8z(KfkH4wYppVKkzn>k?Qi6V z<=sEZMHR4FXsLd=gddJaF`sp+;I=5ZM3l#)Jh@&X%HvUT$J~3m&wKXDDU4cQ$;UC#U-vN6kSoH>WV76%TaNa zD++}wYBasezCy?88&SJkOdd(P<>YvL(fHhrC?B$}5Y3G!AF{3x&CMtuvaS%#t*E%m zG*C3nEqS&gCIG!pyoxYAW0>>0Rdg#9T%|hqjPW#cm7?^F@icRlqV$U44Rui^q)nf- zuffHj7*4lUD&fzNu91sDF@76!ja&?h@!Obd=A~G?tz=p=i({vgtzr z)$hfwBg)7a_H0`#xygd-6lG+LuQ;z$l#wyM;=E2#M#gG4mkSRKpERd(MG zlvADM2As`}*~JonJ)bDJLC)sJcyoM%oXw5#=J*CVn;R==CNES*tt%)OH{oJ=%w8{N zRd!1k+$0yvGdwrR#qtc#O>(iEcm`F0i==XK3obUs>}~RwRowi7TjXM6jJKh;$i>DO zZ$od9i;XdCL+e%LE_)j;cE;@e@@y6CWN(v;oiRR)-6j`1V|*C9O)hrE@XGTFxzMxl z9k@6Yvro!_Ro%zrz-n%8#q_7&cj);;Fh=W$HWb$fJBe4nDvqZO)7_kd>5K5@Jv zf33iMJ@tTK`owvc^MGLb#CezVfMEK@d6)BmVEVQR`>Z7<<-=x2r4VaJP zVg$oL(J)5UFv@OZT#RNICX={Av#yxwRICoW4-(2^UM^_G;O+7TzPgS{p+)eTwKw&o9YP_N3>FK`DFJ#`DcoO?%<_ zb#Z(PT2R|t(gk-CU4s^ZP=UAwmmL(a@wHiv~8CU>bTAISE>BU-|a|%K5l`T{f%hmPz%Y_~+w%;Os;3&&T<|*@xnvkMn`E55>O_ z7neCc6dm8Fx(_M4D{*m+`w$fBLq^kI1N6-W%+0vnEg=up#U`RJ(cFylOw*TWZpL}0 z=}R;><2=*!C7N4tahGYJXd0tx8fAAcE*>xq6rwSjo(}sFP0xfqAR+%%&n=hgM>IVX zJoEG;nw|-sdHNAeuLRFL{fMS_Li9_1&c0fsJ?N!3myu`V-9H1kXJE31&#bt1fRga9<2Rt9tGLxEP(VKS^ZeH?F^MY% zg(_w=z4aVO6`PvCL1thBw?%3o!AwnXn-3(IsR?fLfdn%(!EHW}V5TL+42FTCVT`I_ zl- zplBGQY8Yi#oDj$ZeDwOfai5FoOwZRR(4-!K}`}3?`U0 z39*i0plBGQY8Yj=J|Q+R3>1PfnuZxdFq;y%4Emg4h7ioA1n+f+5X`0o?{$X|%;p3i ssfQ5EmW0^GFiM3 zNUuV+bdX*`hf57zdR2-bN{~(h0YV87_}1Ec-*so+c^;o(|L;0${r1{@pOaOCGw+l) z%*ppG)0%hl6vSF|(l(&~J>Pw+*Zc2&+~dQC9!mE#^u3_H;sfnf zA@RTbkCq=WjT&zy%<7I$G9KnFC+UHh`6umZ!&4D^xrbIiTqoUNOw+@Kij|TrG!GA1vq)+v2BR#uglYM%RB+v4g z$JF;H{V&Lffb{+cRHyqsQ~H$ z(4>C>r6D_tlqvmnpw< zKG92)GSdu=LA9DzYwt<5(_ei5!w=JYW%u~>Z|`OIYG%Oyl30?*O={|`$v5=YYx!RW zj3lj^o7756Pkn}W&eiMi5Blhhc)QQ^HvabFSFmrRMN%8_1Fz`;KKL`;;+1`Llh64~ z&*{`0;xU(XsMgt&`bhdK*}XpL`Ejr8^xi#1>{nIjxULWwxa{CfP#EMWo<{f@W% zm)?f!pX)iPEy1bovToJ>=t+I_zp(h?|K|()6^IUxb@SHd8$Sn-#_&wC%l=gDZC?C^ z-XJ*{e*6>s_*Gv3c@?50Bgh*7@|*-|_iw!kAO4!IXT6~rqQStTI+)b3<0WwMhEp?M zg2sHKb{(UQ@C84;#ho|wCT*~(lQj=K`IeTfY(4n1*NvCp$+tpb1G{&!rnyNy_|<&< zAwHs)&Yl8+fkFrxIOa`JcWJthW?H+d91CW;+_#fusFfG;l4Tk{S7)EUqDLf1Qfpitmf z9e%D+O9Y1!c>SREqwx};e9$n{XbLDkm-VdnFHdR)Z}hsJ%DaE1ds|^&^}!Q8z3GC( z2OOVhpGpp&;1I>vfWwCzpYh>e>3`sF{#!S)K2r$r17zbf)gVv;ho7kiff68mrW(YD ziRvR7gzp9yjrypzfFJM|)fP~Khx(|t@L^l@O|(T95TGskQgm#KzN#(YH*nNfwFQ*G zQD10_Hjn_rt^Q@Te2*u!0k0Ea;ldois>{ zMghrVeBqmVD_)eRo2_fW6a772oH&zMjbv9J;L!#Fhz0IzAV^%m`J@Jt4CQ5c&=@Db z0+OL|68Hs1`cMxv2Zn|s_#Ntj_E1G28R~%sd0M!EABTHj=4s1w^Yy0uZZ4AGI6*mC zJp+vcMQ9!A0$K-(&^#H!h32x6{OX_eM;;%UfPh9}BR$Y6@)2kiC_=NSB6t|-QDY>< z)yH_MP2l7E=}lUvfZ-UA7t@%;QXmV39_?F|g%ptmC187`U>3%U_IMut4Rhr9H~|!) zLB_`$1d8Bze7r%R2n6G?L4FSnV!HZdPqn$wAf0&k9x%U&1~K7@$#E1DP^Nga?-YtD zP?UfTVj{{6*&usg*Y!>_;w113ia;_08{}Ex27a8y|DCTt)!G!z zFe}~+P{iNj%>YGsa8|q-Oc*6>4mLxrr{Uj_ia7}YY>YYa#(-bJ&zyK;KoR`R!NzC> zsqnjck*C@+{!)S7wD3)xH81jbYm3J4Lk1Rjv_&cdevttsU}N|(14~6?w1IyE!O}PZ z6rnMe#v22QK(I947*GU)rTx>s)_L03$^S^Y+0x*4^QDEyh$&{dTvQxeoI^3KUROO zcTtj7aUphey?1JlSckg4sz1c`ed25Dt1)j_l61W`A~p{kHV-vo-Tvik=kctK6@1}q z_o2UuQC+X5jqH z^|BOpB}F%ah9RC{i*~N9OX`2vm8Y<4VCNcMX@|pKY0U-8H?Vx@H&e9xDg5ONzJ{6K zAkEDb{b3dT8xPRjOi67+WuUp0Vm0E|&-=RjvxJk;;3L=62Ji?;gEAz)h7*@fai=Z0de01L0uPs!e4e9BgKGP=sTUaEQr< z5g{m&P^pqo*fkw)vXO)c5+W+Cu|O;Y!ZD^c0SHsM=Zr7FdtUPSqlHK{CP4)g6qbpq z5c4)BK~;!Ug(mx!s6dfaN|jW?Zmh}16BH;2rLs2n-zxK^MaLq{5>t0o@UaN9#0=D@GQcb`TRy8` z#v;s8lPxD0P$Y~}C5*6JVX~D30}8?@t)74xhcIhQZ4>ug_SKG#Lzp$D9#p}{Aul*fHT2fQ#&MP zf@iWn%NAmLuqtw&`b+-o-PqS7@13HO=o01<*B z36&}dg@2~6f-p*JE%GxNHnaK#Q@aXHSXWM!Z#Ar5 zoqv7R=R_wX+67Y&skWMoXcx?I6DkARMboOU+HNwUT{2l2p@AaNlq%7L-DQ)N6B;Op zrnD{sZ3-@&*G=uN$!lNpb&O6yn(L+>R?%Tkhvi5!Dg#Z0X*EzZQ;_C{$!-x1D3V60 zl1A9wHrXAb0R?H4)=1Dy#er94YG3nTulee=pNcS537Dw}Qv*QK^}jyXAhZn7@D1 z*DyL0VV3)8*EthmmiuYfITL%x3P0^SXCh3@&sGx*C=y1g5=Pjq@w0UV0}8?@t)+mO zg)kfa+D;z12!tC+WK%R{-d;K(!XCur$KRZY;phy^{N*H1H zgP$ED7*G&KY4run9EADFubtvUZ~M}ta}egI#CU_l7?x?g%|V!BiSafEVUGLRF9ZXM zgi)%55q7`&SqZ^_f-p*ZMZnC(>GzCZD}$c=6hB|#YsWvT^toN(gzK_1emz6=+_^|~ z#$WdtDg)J7ztu|hux{2x0nLv^Oh4 znEn>+&5As5x*cFy@dd32;RaZ+pvj;`2shATg9#242{(|xd>7`gk3^@bFzOhKaf%9xjH*;gC+xnp*f>fF zD5Ja)S6AX+1>8b_n`~*b_&=(Ae~vChxXFo>EJV1;iIglvxG4#^g$Oq#k&=Z7H`QX( z2o4kpr&I|i?5106Cc%Ls;FS1B0k;U?=33ff{^$F?ZqY>uH`k(#{UU^$YthDj5yH*2 zTDDi+ei6dWwX(XZ%)u20ECd!SA~;YaoKhv6uv=iUMFabC@3thcmnW0H)l%q8eyy`{(3wk7Cc zy+v!=67;aag0*et1Mu({;bAFw*lB4$jM-?smbnx??6mZnDgkg&gXOd=Duahzmepj; zJ4W~B@*KGolEMyJtQK5ICd0uF${9*{Q$$z>5e{40FMOTH_&?EQ7~-&{$4_?4FvMZ& zk>^y1We9o1vRbR0FTGtf^rTD4q&Cp+mYpPsjZh-+IcI9 zUv2Hp;KiDuN0%d0NupmYN2ZcQzgUh;B^Dj{mLtw$^di8vf{&e1;UhC>@vZCB4Ly& zVT9cki(MrcP!L9G@+vEa397KPyO5wek9%vYBTNjTDiU)}452C#b50C1RAJFpCx%cr zEOv{aK#@>Nl~BU&w#Duc6etL#wE7{DqLlzuWoZR=QfdgVGWcJs8xKWSB2-lZY9&Hd zC7@O!R8<0MB|_b|Sgsvk3qX-jN|jK;uAj~F!A`CPpdgge_GyX3dWozU3{pv<~ht>GtD4UKvtMS26HXV6ZY@BcHK zPxTt1=o$o>VAJfg20JlGY%|B%4j4T!JEVsZ_}&?55gm8gYSwTuPJY zfVDUTX4={U{$jH6NOUd2%(Uqouohuv+H?+Bi!ifnItQ#pnAtX)OE92F7^O-WVK>ia zMFay1!YJ(#k)(BSzppQ{wHRc{O@E0`_8O7sIz(Dz)6%gHkrvssbgV<9MK(kw(N z&6W}pC=yAj5=q!Cv)Kwl0tJzj_NYKA#{8_ZwGEtmjXKd{gjtp7Zp8?*D$(7F5oQ&1 zH`O_c5oWc`))EXT5=N;KM%b;h*?NKj1!0sX&+zLJX0r`rtS0w48Gr{7NbmZ7uipHnSG|w$MP5Vix)ITi+j{)wX(OT?w;%nzn#VUH+RwJtR#kr^ zqMfkWuY?AQL_5I;8b%X;FVPn^LCkZuR?feuVSE(bgi+4fdUNG<6Gl02>+#iM6GpjU zTTiMen=r~no0U=&P-GOP$^;0zGMinY1b~7GP@3G|Zx(kZwpL~Ho+-w&(alJ6-G&bX zMQ?8c-ySQYDSByJNF^L<0)aD6PH7%ogmJIS#zG zc+50fcHM$7IgTE`w%vj-IZlIa3T6wYCfBjrtJG{kn0^k+cjC(fC=#Y$p$Yl_9}#6M z1R3mT-|(xZ@lxhi3^LfElhalVGT5P$(^d>J#IaiO(WcSeZ7N)R4=xHFZK9KunkMfP ze(@W2^n3JG=+Gtj_vow8p-b@Z(O02Em*C$cFn8Ek0tQ6_D^+Gs*o|}81hA9q87P=N zrOA7tZJ52u4s7ImTSk-UHiVh%(8{z8VJ17YGHpYcDGsen+Yn}|!=@1oC=y1g5=Pif zci2pV0R>@{_O!^*c7&PZz{b74Z9E>`jxci^8s^&(W{yL{d^^I-b!eDxN0@mID}cQf364>_>rR9zcIdu)C&Cmvbl<%b zVTv8P@7{?p>m9a-pOb^m6%1YZpY>?r8h@ldkb-bQeb1?$G{b7e?9c(Eer@ zM%nJrg0l;w>~Pp_iUNv^qEuOBVYkO&`>4vGV3n07SHIm@<%5oPoL_Q{I+?o>=3t^{ z?nan{iJrL|VSa#~$%nZ{cfU;A9&mBO(a!J@0i$+w54t$v&}zO1U7T=eHQ$3SPB^rh z@4@=~;;<5`4=A!eN|p5ycBdS6n(6}z)<KSFG8MkXwSVDAb{RmU(&~9}X-t~Q1r2^-C$2N7l{eEX;txq}EZ)TNPn z5MhS8G*S;D%rN-qhG0OEFiMp$!fq7YBM}TJ2%|K4l>Gr=xT{U!Ln21I%pVYjyR?S< zfH2&pHRK0``PPMz+B{-(x8<9UAHl^mSDWi5@lw~DrVgM#qN{0%B>#x6rX`a6Bf6TF zNb-+Zujz1Vpn8EK>!nm#FJU(e789x$C|ED0)r*QO9|EW%S6lAlh<)voci>$*ym{=C z{q}o3dwxJ8_7FlAxiqaDLdYUF5Z{3uLdYVQrj z!$7gh)i(0NhmGGw4`Z}diIg11XsZ$_IgHU(B~o%2qpfz?I*JC0jHXnX5@A;iH${{Z zP%tG*latsHoW!=c+Afh2n#BHhN{%4p7MCWmBM7<0rAh1vLT+(s5<7yBTV1w|AVHCk zN|lgDJWarGJKRnYB^V;5(&U@{qX4TaU(=L6gatvWkyYvCgF@!k-%MZbTB4Ly&VT9dzxW6J8P!L9G9Yto2<1ym0tKD>Y zaxJ4x*W*ZY*`-fYjw8)wmp(H%jx?8D`ZVP@(p+)bHKGAU(p=%Y(_mBcjEM6ygsF64 z5!qPFcp~#>j8d8C4L@U)%0zGY8KYD}Z{Y9Lf_;R{+X--yAJB&Ig4#wM{-BoexnK6Q zU*Lz~0j-cXs$-<_>9vilZleP63I3Nj&WHd*RVD{ixYrM`F`&qOIVhMMrTsxb{|b)B z2DGVscOB#T%wG|DY=CZPe?{oA0lK066`{uktR}o?9e{pExHt(eW(44NWo)|fX7nVw zm=Vz9Uyz(c7c&ETOSNR2L>IFHR!4<-5;127*j%a(C^EZBmDLe;^8&1hssjpEM`=$9 zmJ;lEiv!wf{$+;IFmJnt+!8^Y`eXb1Vdx<-fSZwRv^px08(@*BeJ2-NOGWq{ck zu$rl6`VC=r1=t>f0Y$%kb1R~2W$MQK2*;J!pL-mc9`S3>#Z#4ed#5sKaR)7wq=kWR40XmSL z!+cZ**geVzC^8>PmH7~MRRQ*Z@&O9wLuqo0bRIzRg4&QEFRKsNrRT9Sc|qC@pGTOy zAnk_FBTRmfcEjfprXa`$1mp7)C=y1g5=Ph!46?ysC+8dJ(HUE=U&w7ct7XAm#reR(X7o@_!MdObD{c6a^F+MX9pN!fr~CeMeOW z1*@zyc~x==t2{HPE#%%zxGK4XFf$V{mk?%V0_GCJ%u2vqLYUb>HjiLHkuXY?Fv4zr zkS!n>P!L9GZA7muMVKW)xSd#?X*7$LBFvH?EfA#$vm{6hL@B~74O;Pc(4`2oEXZO6 z1B!%Es)P}CD}!t`!GMA=N|SR~8Nw6?wQc<4Mlgq!Axv?Qrj0U$DGt)KQHC(4*XH!#ZB5beKjV3e_;%np2UbK`A>e_F^R zEsU>Usw14;0%r?DS}c^5>W^P2efrtQi4Wv&q5p-6THHea3lp`th5i>pE%>DtM)#M~ zh1c8Qb#+MF1YWK9+rsq!a(f%yt`5;MdK=xY4$(4t8{MuB!7@6irO`bs`+Ft0-5S#N zfLka1wU2x7vX(|#M*JhXN_4R`Ll-b(olz9g! z_J?Rca0gxN57B<$4!YPMg8jg5aN)|cz+G^0Jf!^w6s`IG&PG$7*2XY1@1n2ciA3E+ zU&j-Px{JP!W1@z&1{^ui?t!nfA+3xrYy&rC_t3@J5M9CCLl@Dx*`f>G$5CGu(hB&Uw#Hl0 z`v_ANqGkI&!c>K5*}ji3_d~R7-$$4SA(k7C4<=9~j8Y|xuA<7B^Xd7j8Y|x zup1U;BM1f*gi+e#A~QM38obF5!>6y4+Z*kpIrz6%qr*RzZkU2_p;N+L742s0&- znOuaKn#fEp!h9EI(+LI?3G*GVn`JZ!%Zt-~5M^FiTN;Lszj3AGi`yHI@*!D4%}_CFzxmdHioqw{Hw=|7Mb~2?Tv|Q=c9{_iE8Jgi%n2%p7EH`Ju3ew zp#WU$4r@OGi=F<5p8w4L&p-K=Z$S&t@9r?2aSPDz?l7Hk3()WGFr9G=Fr9nCY(J$F z6q!z?V%u`o(P%4Ig2y`=6M5~&jfeQPj>hL|^&bFWWnt}Rn6G;rR{sGAQ5L3oU;sjt zg=robfDo6%G!G0wh$~@sjUYgg5K5IDOxRryvm4aGK*0{CG`Xe@#12*&*81_kJ^_=` zK!m9b(>yQ`VJgEk4-7<@J7Jm!1|rPeFuPAMphy^{N*H1HAk1TEdCK}b~)p}WdKNL3J_rDzaR6-4N+au8B|9bp3@n!Lyd zMN%nMQVF|35jKRVKtU>{$qnLQOwovl_ATGt88(PHa6?gVM1)q6!3Z-VLaWGNgc%Wm zPm1O|0sUFN0UrV`#z(a2eDsq>%jgiS&-e&6`4Dt5K0-}C1YJysP?HbA`b>>m%%@M%Xl}4=7k4rOEGhhhlwZMc|P1;!{SO=um{2m4F$FFtZXcLlI_n0%j<} z%!#mh1OtkMQL2OycJm`_0l|QRFiMj!!w_ak1P)2ny8z5Egjo`y6Z0^HSrVZW^Du;2 v8le;OFoanaVKIUMMZzdm!U(&S5w@COKtUL(y{*GLzTQ1s$5wVRJW2l_n}r}i diff --git a/proto/pkg/runv1/run.pb.go b/proto/pkg/runv1/run.pb.go index 6d550de0c92..75cc2e146d7 100644 --- a/proto/pkg/runv1/run.pb.go +++ b/proto/pkg/runv1/run.pb.go @@ -224,8 +224,8 @@ type FlatRun struct { Archived bool `protobuf:"varint,21,opt,name=archived,proto3" json:"archived,omitempty"` // Project level local id of run. LocalId string `protobuf:"bytes,22,opt,name=local_id,json=localId,proto3" json:"local_id,omitempty"` - // Log signal. - LogSignal *string `protobuf:"bytes,23,opt,name=log_signal,json=logSignal,proto3,oneof" json:"log_signal,omitempty"` + // Log policy matched. + LogPolicyMatched *string `protobuf:"bytes,23,opt,name=log_policy_matched,json=logPolicyMatched,proto3,oneof" json:"log_policy_matched,omitempty"` } func (x *FlatRun) Reset() { @@ -414,9 +414,9 @@ func (x *FlatRun) GetLocalId() string { return "" } -func (x *FlatRun) GetLogSignal() string { - if x != nil && x.LogSignal != nil { - return *x.LogSignal +func (x *FlatRun) GetLogPolicyMatched() string { + if x != nil && x.LogPolicyMatched != nil { + return *x.LogPolicyMatched } return "" } @@ -904,7 +904,7 @@ var file_determined_run_v1_run_proto_rawDesc = []byte{ 0x78, 0x74, 0x65, 0x72, 0x6e, 0x61, 0x6c, 0x5f, 0x65, 0x78, 0x70, 0x65, 0x72, 0x69, 0x6d, 0x65, 0x6e, 0x74, 0x5f, 0x69, 0x64, 0x42, 0x18, 0x0a, 0x16, 0x5f, 0x70, 0x61, 0x63, 0x68, 0x79, 0x64, 0x65, 0x72, 0x6d, 0x5f, 0x69, 0x6e, 0x74, 0x65, 0x67, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x22, - 0xb5, 0x0a, 0x0a, 0x07, 0x46, 0x6c, 0x61, 0x74, 0x52, 0x75, 0x6e, 0x12, 0x0e, 0x0a, 0x02, 0x69, + 0xcc, 0x0a, 0x0a, 0x07, 0x46, 0x6c, 0x61, 0x74, 0x52, 0x75, 0x6e, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x02, 0x69, 0x64, 0x12, 0x39, 0x0a, 0x0a, 0x73, 0x74, 0x61, 0x72, 0x74, 0x5f, 0x74, 0x69, 0x6d, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, @@ -965,175 +965,176 @@ var file_determined_run_v1_run_proto_rawDesc = []byte{ 0x68, 0x69, 0x76, 0x65, 0x64, 0x18, 0x15, 0x20, 0x01, 0x28, 0x08, 0x52, 0x08, 0x61, 0x72, 0x63, 0x68, 0x69, 0x76, 0x65, 0x64, 0x12, 0x19, 0x0a, 0x08, 0x6c, 0x6f, 0x63, 0x61, 0x6c, 0x5f, 0x69, 0x64, 0x18, 0x16, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x6c, 0x6f, 0x63, 0x61, 0x6c, 0x49, 0x64, - 0x12, 0x22, 0x0a, 0x0a, 0x6c, 0x6f, 0x67, 0x5f, 0x73, 0x69, 0x67, 0x6e, 0x61, 0x6c, 0x18, 0x17, - 0x20, 0x01, 0x28, 0x09, 0x48, 0x08, 0x52, 0x09, 0x6c, 0x6f, 0x67, 0x53, 0x69, 0x67, 0x6e, 0x61, - 0x6c, 0x88, 0x01, 0x01, 0x3a, 0xa6, 0x01, 0x92, 0x41, 0xa2, 0x01, 0x0a, 0x9f, 0x01, 0xd2, 0x01, - 0x02, 0x69, 0x64, 0xd2, 0x01, 0x0a, 0x73, 0x74, 0x61, 0x72, 0x74, 0x5f, 0x74, 0x69, 0x6d, 0x65, - 0xd2, 0x01, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0xd2, 0x01, 0x04, 0x74, 0x61, 0x67, 0x73, 0xd2, - 0x01, 0x0f, 0x63, 0x68, 0x65, 0x63, 0x6b, 0x70, 0x6f, 0x69, 0x6e, 0x74, 0x5f, 0x73, 0x69, 0x7a, - 0x65, 0xd2, 0x01, 0x10, 0x63, 0x68, 0x65, 0x63, 0x6b, 0x70, 0x6f, 0x69, 0x6e, 0x74, 0x5f, 0x63, - 0x6f, 0x75, 0x6e, 0x74, 0xd2, 0x01, 0x0a, 0x70, 0x72, 0x6f, 0x6a, 0x65, 0x63, 0x74, 0x5f, 0x69, - 0x64, 0xd2, 0x01, 0x0c, 0x70, 0x72, 0x6f, 0x6a, 0x65, 0x63, 0x74, 0x5f, 0x6e, 0x61, 0x6d, 0x65, - 0xd2, 0x01, 0x0c, 0x77, 0x6f, 0x72, 0x6b, 0x73, 0x70, 0x61, 0x63, 0x65, 0x5f, 0x69, 0x64, 0xd2, - 0x01, 0x0e, 0x77, 0x6f, 0x72, 0x6b, 0x73, 0x70, 0x61, 0x63, 0x65, 0x5f, 0x6e, 0x61, 0x6d, 0x65, - 0xd2, 0x01, 0x0f, 0x70, 0x61, 0x72, 0x65, 0x6e, 0x74, 0x5f, 0x61, 0x72, 0x63, 0x68, 0x69, 0x76, - 0x65, 0x64, 0xd2, 0x01, 0x08, 0x61, 0x72, 0x63, 0x68, 0x69, 0x76, 0x65, 0x64, 0x42, 0x18, 0x0a, - 0x16, 0x5f, 0x73, 0x65, 0x61, 0x72, 0x63, 0x68, 0x65, 0x72, 0x5f, 0x6d, 0x65, 0x74, 0x72, 0x69, - 0x63, 0x5f, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x42, 0x12, 0x0a, 0x10, 0x5f, 0x65, 0x78, 0x74, 0x65, - 0x72, 0x6e, 0x61, 0x6c, 0x5f, 0x72, 0x75, 0x6e, 0x5f, 0x69, 0x64, 0x42, 0x12, 0x0a, 0x10, 0x5f, - 0x68, 0x79, 0x70, 0x65, 0x72, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x65, 0x74, 0x65, 0x72, 0x73, 0x42, - 0x12, 0x0a, 0x10, 0x5f, 0x73, 0x75, 0x6d, 0x6d, 0x61, 0x72, 0x79, 0x5f, 0x6d, 0x65, 0x74, 0x72, - 0x69, 0x63, 0x73, 0x42, 0x0a, 0x0a, 0x08, 0x5f, 0x75, 0x73, 0x65, 0x72, 0x5f, 0x69, 0x64, 0x42, - 0x0b, 0x0a, 0x09, 0x5f, 0x64, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x42, 0x0d, 0x0a, 0x0b, - 0x5f, 0x65, 0x78, 0x70, 0x65, 0x72, 0x69, 0x6d, 0x65, 0x6e, 0x74, 0x42, 0x0b, 0x0a, 0x09, 0x5f, - 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x42, 0x0d, 0x0a, 0x0b, 0x5f, 0x6c, 0x6f, 0x67, - 0x5f, 0x73, 0x69, 0x67, 0x6e, 0x61, 0x6c, 0x22, 0xd9, 0x11, 0x0a, 0x03, 0x52, 0x75, 0x6e, 0x12, - 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x02, 0x69, 0x64, 0x12, - 0x39, 0x0a, 0x0a, 0x73, 0x74, 0x61, 0x72, 0x74, 0x5f, 0x74, 0x69, 0x6d, 0x65, 0x18, 0x02, 0x20, + 0x12, 0x31, 0x0a, 0x12, 0x6c, 0x6f, 0x67, 0x5f, 0x70, 0x6f, 0x6c, 0x69, 0x63, 0x79, 0x5f, 0x6d, + 0x61, 0x74, 0x63, 0x68, 0x65, 0x64, 0x18, 0x17, 0x20, 0x01, 0x28, 0x09, 0x48, 0x08, 0x52, 0x10, + 0x6c, 0x6f, 0x67, 0x50, 0x6f, 0x6c, 0x69, 0x63, 0x79, 0x4d, 0x61, 0x74, 0x63, 0x68, 0x65, 0x64, + 0x88, 0x01, 0x01, 0x3a, 0xa6, 0x01, 0x92, 0x41, 0xa2, 0x01, 0x0a, 0x9f, 0x01, 0xd2, 0x01, 0x02, + 0x69, 0x64, 0xd2, 0x01, 0x0a, 0x73, 0x74, 0x61, 0x72, 0x74, 0x5f, 0x74, 0x69, 0x6d, 0x65, 0xd2, + 0x01, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0xd2, 0x01, 0x04, 0x74, 0x61, 0x67, 0x73, 0xd2, 0x01, + 0x0f, 0x63, 0x68, 0x65, 0x63, 0x6b, 0x70, 0x6f, 0x69, 0x6e, 0x74, 0x5f, 0x73, 0x69, 0x7a, 0x65, + 0xd2, 0x01, 0x10, 0x63, 0x68, 0x65, 0x63, 0x6b, 0x70, 0x6f, 0x69, 0x6e, 0x74, 0x5f, 0x63, 0x6f, + 0x75, 0x6e, 0x74, 0xd2, 0x01, 0x0a, 0x70, 0x72, 0x6f, 0x6a, 0x65, 0x63, 0x74, 0x5f, 0x69, 0x64, + 0xd2, 0x01, 0x0c, 0x70, 0x72, 0x6f, 0x6a, 0x65, 0x63, 0x74, 0x5f, 0x6e, 0x61, 0x6d, 0x65, 0xd2, + 0x01, 0x0c, 0x77, 0x6f, 0x72, 0x6b, 0x73, 0x70, 0x61, 0x63, 0x65, 0x5f, 0x69, 0x64, 0xd2, 0x01, + 0x0e, 0x77, 0x6f, 0x72, 0x6b, 0x73, 0x70, 0x61, 0x63, 0x65, 0x5f, 0x6e, 0x61, 0x6d, 0x65, 0xd2, + 0x01, 0x0f, 0x70, 0x61, 0x72, 0x65, 0x6e, 0x74, 0x5f, 0x61, 0x72, 0x63, 0x68, 0x69, 0x76, 0x65, + 0x64, 0xd2, 0x01, 0x08, 0x61, 0x72, 0x63, 0x68, 0x69, 0x76, 0x65, 0x64, 0x42, 0x18, 0x0a, 0x16, + 0x5f, 0x73, 0x65, 0x61, 0x72, 0x63, 0x68, 0x65, 0x72, 0x5f, 0x6d, 0x65, 0x74, 0x72, 0x69, 0x63, + 0x5f, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x42, 0x12, 0x0a, 0x10, 0x5f, 0x65, 0x78, 0x74, 0x65, 0x72, + 0x6e, 0x61, 0x6c, 0x5f, 0x72, 0x75, 0x6e, 0x5f, 0x69, 0x64, 0x42, 0x12, 0x0a, 0x10, 0x5f, 0x68, + 0x79, 0x70, 0x65, 0x72, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x65, 0x74, 0x65, 0x72, 0x73, 0x42, 0x12, + 0x0a, 0x10, 0x5f, 0x73, 0x75, 0x6d, 0x6d, 0x61, 0x72, 0x79, 0x5f, 0x6d, 0x65, 0x74, 0x72, 0x69, + 0x63, 0x73, 0x42, 0x0a, 0x0a, 0x08, 0x5f, 0x75, 0x73, 0x65, 0x72, 0x5f, 0x69, 0x64, 0x42, 0x0b, + 0x0a, 0x09, 0x5f, 0x64, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x42, 0x0d, 0x0a, 0x0b, 0x5f, + 0x65, 0x78, 0x70, 0x65, 0x72, 0x69, 0x6d, 0x65, 0x6e, 0x74, 0x42, 0x0b, 0x0a, 0x09, 0x5f, 0x6d, + 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x42, 0x15, 0x0a, 0x13, 0x5f, 0x6c, 0x6f, 0x67, 0x5f, + 0x70, 0x6f, 0x6c, 0x69, 0x63, 0x79, 0x5f, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x65, 0x64, 0x22, 0xd9, + 0x11, 0x0a, 0x03, 0x52, 0x75, 0x6e, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, + 0x28, 0x05, 0x52, 0x02, 0x69, 0x64, 0x12, 0x39, 0x0a, 0x0a, 0x73, 0x74, 0x61, 0x72, 0x74, 0x5f, + 0x74, 0x69, 0x6d, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x67, 0x6f, 0x6f, + 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x54, 0x69, 0x6d, + 0x65, 0x73, 0x74, 0x61, 0x6d, 0x70, 0x52, 0x09, 0x73, 0x74, 0x61, 0x72, 0x74, 0x54, 0x69, 0x6d, + 0x65, 0x12, 0x35, 0x0a, 0x08, 0x65, 0x6e, 0x64, 0x5f, 0x74, 0x69, 0x6d, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x54, 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61, 0x6d, 0x70, 0x52, - 0x09, 0x73, 0x74, 0x61, 0x72, 0x74, 0x54, 0x69, 0x6d, 0x65, 0x12, 0x35, 0x0a, 0x08, 0x65, 0x6e, - 0x64, 0x5f, 0x74, 0x69, 0x6d, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x67, - 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x54, - 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61, 0x6d, 0x70, 0x52, 0x07, 0x65, 0x6e, 0x64, 0x54, 0x69, 0x6d, - 0x65, 0x12, 0x30, 0x0a, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0e, - 0x32, 0x1a, 0x2e, 0x64, 0x65, 0x74, 0x65, 0x72, 0x6d, 0x69, 0x6e, 0x65, 0x64, 0x2e, 0x74, 0x72, - 0x69, 0x61, 0x6c, 0x2e, 0x76, 0x31, 0x2e, 0x53, 0x74, 0x61, 0x74, 0x65, 0x52, 0x05, 0x73, 0x74, - 0x61, 0x74, 0x65, 0x12, 0x16, 0x0a, 0x06, 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x73, 0x18, 0x05, 0x20, - 0x03, 0x28, 0x09, 0x52, 0x06, 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x73, 0x12, 0x27, 0x0a, 0x0f, 0x63, - 0x68, 0x65, 0x63, 0x6b, 0x70, 0x6f, 0x69, 0x6e, 0x74, 0x5f, 0x73, 0x69, 0x7a, 0x65, 0x18, 0x06, - 0x20, 0x01, 0x28, 0x03, 0x52, 0x0e, 0x63, 0x68, 0x65, 0x63, 0x6b, 0x70, 0x6f, 0x69, 0x6e, 0x74, - 0x53, 0x69, 0x7a, 0x65, 0x12, 0x29, 0x0a, 0x10, 0x63, 0x68, 0x65, 0x63, 0x6b, 0x70, 0x6f, 0x69, - 0x6e, 0x74, 0x5f, 0x63, 0x6f, 0x75, 0x6e, 0x74, 0x18, 0x07, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0f, - 0x63, 0x68, 0x65, 0x63, 0x6b, 0x70, 0x6f, 0x69, 0x6e, 0x74, 0x43, 0x6f, 0x75, 0x6e, 0x74, 0x12, - 0x37, 0x0a, 0x15, 0x73, 0x65, 0x61, 0x72, 0x63, 0x68, 0x65, 0x72, 0x5f, 0x6d, 0x65, 0x74, 0x72, - 0x69, 0x63, 0x5f, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x08, 0x20, 0x01, 0x28, 0x01, 0x48, 0x00, - 0x52, 0x13, 0x73, 0x65, 0x61, 0x72, 0x63, 0x68, 0x65, 0x72, 0x4d, 0x65, 0x74, 0x72, 0x69, 0x63, - 0x56, 0x61, 0x6c, 0x75, 0x65, 0x88, 0x01, 0x01, 0x12, 0x2b, 0x0a, 0x0f, 0x65, 0x78, 0x74, 0x65, - 0x72, 0x6e, 0x61, 0x6c, 0x5f, 0x72, 0x75, 0x6e, 0x5f, 0x69, 0x64, 0x18, 0x09, 0x20, 0x01, 0x28, - 0x09, 0x48, 0x01, 0x52, 0x0d, 0x65, 0x78, 0x74, 0x65, 0x72, 0x6e, 0x61, 0x6c, 0x52, 0x75, 0x6e, - 0x49, 0x64, 0x88, 0x01, 0x01, 0x12, 0x46, 0x0a, 0x0f, 0x68, 0x79, 0x70, 0x65, 0x72, 0x70, 0x61, - 0x72, 0x61, 0x6d, 0x65, 0x74, 0x65, 0x72, 0x73, 0x18, 0x0a, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x17, - 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, - 0x2e, 0x53, 0x74, 0x72, 0x75, 0x63, 0x74, 0x48, 0x02, 0x52, 0x0f, 0x68, 0x79, 0x70, 0x65, 0x72, - 0x70, 0x61, 0x72, 0x61, 0x6d, 0x65, 0x74, 0x65, 0x72, 0x73, 0x88, 0x01, 0x01, 0x12, 0x45, 0x0a, - 0x0f, 0x73, 0x75, 0x6d, 0x6d, 0x61, 0x72, 0x79, 0x5f, 0x6d, 0x65, 0x74, 0x72, 0x69, 0x63, 0x73, - 0x18, 0x0b, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x17, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, - 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x53, 0x74, 0x72, 0x75, 0x63, 0x74, 0x48, - 0x03, 0x52, 0x0e, 0x73, 0x75, 0x6d, 0x6d, 0x61, 0x72, 0x79, 0x4d, 0x65, 0x74, 0x72, 0x69, 0x63, - 0x73, 0x88, 0x01, 0x01, 0x12, 0x1c, 0x0a, 0x07, 0x75, 0x73, 0x65, 0x72, 0x5f, 0x69, 0x64, 0x18, - 0x0c, 0x20, 0x01, 0x28, 0x05, 0x48, 0x04, 0x52, 0x06, 0x75, 0x73, 0x65, 0x72, 0x49, 0x64, 0x88, - 0x01, 0x01, 0x12, 0x1f, 0x0a, 0x08, 0x64, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x0d, - 0x20, 0x01, 0x28, 0x05, 0x48, 0x05, 0x52, 0x08, 0x64, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, - 0x88, 0x01, 0x01, 0x12, 0x1d, 0x0a, 0x0a, 0x70, 0x72, 0x6f, 0x6a, 0x65, 0x63, 0x74, 0x5f, 0x69, - 0x64, 0x18, 0x0e, 0x20, 0x01, 0x28, 0x05, 0x52, 0x09, 0x70, 0x72, 0x6f, 0x6a, 0x65, 0x63, 0x74, - 0x49, 0x64, 0x12, 0x21, 0x0a, 0x0c, 0x70, 0x72, 0x6f, 0x6a, 0x65, 0x63, 0x74, 0x5f, 0x6e, 0x61, - 0x6d, 0x65, 0x18, 0x0f, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x70, 0x72, 0x6f, 0x6a, 0x65, 0x63, - 0x74, 0x4e, 0x61, 0x6d, 0x65, 0x12, 0x21, 0x0a, 0x0c, 0x77, 0x6f, 0x72, 0x6b, 0x73, 0x70, 0x61, - 0x63, 0x65, 0x5f, 0x69, 0x64, 0x18, 0x10, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0b, 0x77, 0x6f, 0x72, - 0x6b, 0x73, 0x70, 0x61, 0x63, 0x65, 0x49, 0x64, 0x12, 0x25, 0x0a, 0x0e, 0x77, 0x6f, 0x72, 0x6b, - 0x73, 0x70, 0x61, 0x63, 0x65, 0x5f, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x11, 0x20, 0x01, 0x28, 0x09, - 0x52, 0x0d, 0x77, 0x6f, 0x72, 0x6b, 0x73, 0x70, 0x61, 0x63, 0x65, 0x4e, 0x61, 0x6d, 0x65, 0x12, - 0x27, 0x0a, 0x0f, 0x70, 0x61, 0x72, 0x65, 0x6e, 0x74, 0x5f, 0x61, 0x72, 0x63, 0x68, 0x69, 0x76, - 0x65, 0x64, 0x18, 0x12, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0e, 0x70, 0x61, 0x72, 0x65, 0x6e, 0x74, - 0x41, 0x72, 0x63, 0x68, 0x69, 0x76, 0x65, 0x64, 0x12, 0x38, 0x0a, 0x08, 0x6d, 0x65, 0x74, 0x61, - 0x64, 0x61, 0x74, 0x61, 0x18, 0x13, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x17, 0x2e, 0x67, 0x6f, 0x6f, - 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x53, 0x74, 0x72, - 0x75, 0x63, 0x74, 0x48, 0x06, 0x52, 0x08, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x88, - 0x01, 0x01, 0x12, 0x1a, 0x0a, 0x08, 0x61, 0x72, 0x63, 0x68, 0x69, 0x76, 0x65, 0x64, 0x18, 0x14, - 0x20, 0x01, 0x28, 0x08, 0x52, 0x08, 0x61, 0x72, 0x63, 0x68, 0x69, 0x76, 0x65, 0x64, 0x12, 0x1b, - 0x0a, 0x09, 0x73, 0x65, 0x61, 0x72, 0x63, 0x68, 0x5f, 0x69, 0x64, 0x18, 0x15, 0x20, 0x01, 0x28, - 0x05, 0x52, 0x08, 0x73, 0x65, 0x61, 0x72, 0x63, 0x68, 0x49, 0x64, 0x12, 0x23, 0x0a, 0x0d, 0x73, - 0x65, 0x61, 0x72, 0x63, 0x68, 0x65, 0x72, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x18, 0x16, 0x20, 0x01, - 0x28, 0x09, 0x52, 0x0c, 0x73, 0x65, 0x61, 0x72, 0x63, 0x68, 0x65, 0x72, 0x54, 0x79, 0x70, 0x65, - 0x12, 0x27, 0x0a, 0x0f, 0x73, 0x65, 0x61, 0x72, 0x63, 0x68, 0x65, 0x72, 0x5f, 0x6d, 0x65, 0x74, - 0x72, 0x69, 0x63, 0x18, 0x17, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0e, 0x73, 0x65, 0x61, 0x72, 0x63, - 0x68, 0x65, 0x72, 0x4d, 0x65, 0x74, 0x72, 0x69, 0x63, 0x12, 0x24, 0x0a, 0x0b, 0x66, 0x6f, 0x72, - 0x6b, 0x65, 0x64, 0x5f, 0x66, 0x72, 0x6f, 0x6d, 0x18, 0x18, 0x20, 0x01, 0x28, 0x05, 0x48, 0x07, - 0x52, 0x0a, 0x66, 0x6f, 0x72, 0x6b, 0x65, 0x64, 0x46, 0x72, 0x6f, 0x6d, 0x88, 0x01, 0x01, 0x12, - 0x31, 0x0a, 0x12, 0x65, 0x78, 0x74, 0x65, 0x72, 0x6e, 0x61, 0x6c, 0x5f, 0x73, 0x65, 0x61, 0x72, - 0x63, 0x68, 0x5f, 0x69, 0x64, 0x18, 0x19, 0x20, 0x01, 0x28, 0x09, 0x48, 0x08, 0x52, 0x10, 0x65, - 0x78, 0x74, 0x65, 0x72, 0x6e, 0x61, 0x6c, 0x53, 0x65, 0x61, 0x72, 0x63, 0x68, 0x49, 0x64, 0x88, - 0x01, 0x01, 0x12, 0x23, 0x0a, 0x0d, 0x72, 0x65, 0x73, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x5f, 0x70, - 0x6f, 0x6f, 0x6c, 0x18, 0x1a, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0c, 0x72, 0x65, 0x73, 0x6f, 0x75, - 0x72, 0x63, 0x65, 0x50, 0x6f, 0x6f, 0x6c, 0x12, 0x1a, 0x0a, 0x08, 0x70, 0x72, 0x6f, 0x67, 0x72, - 0x65, 0x73, 0x73, 0x18, 0x1b, 0x20, 0x01, 0x28, 0x02, 0x52, 0x08, 0x70, 0x72, 0x6f, 0x67, 0x72, - 0x65, 0x73, 0x73, 0x12, 0x20, 0x0a, 0x0b, 0x64, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x69, - 0x6f, 0x6e, 0x18, 0x1c, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x64, 0x65, 0x73, 0x63, 0x72, 0x69, - 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x1f, 0x0a, 0x0b, 0x73, 0x65, 0x61, 0x72, 0x63, 0x68, 0x5f, - 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x1d, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0a, 0x73, 0x65, 0x61, 0x72, - 0x63, 0x68, 0x4e, 0x61, 0x6d, 0x65, 0x12, 0x1c, 0x0a, 0x09, 0x75, 0x6e, 0x6d, 0x61, 0x6e, 0x61, - 0x67, 0x65, 0x64, 0x18, 0x1e, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x75, 0x6e, 0x6d, 0x61, 0x6e, - 0x61, 0x67, 0x65, 0x64, 0x12, 0x1f, 0x0a, 0x0b, 0x69, 0x73, 0x5f, 0x6d, 0x75, 0x6c, 0x74, 0x69, - 0x72, 0x75, 0x6e, 0x18, 0x1f, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0a, 0x69, 0x73, 0x4d, 0x75, 0x6c, - 0x74, 0x69, 0x72, 0x75, 0x6e, 0x12, 0x51, 0x0a, 0x15, 0x70, 0x61, 0x63, 0x68, 0x79, 0x64, 0x65, - 0x72, 0x6d, 0x5f, 0x69, 0x6e, 0x74, 0x65, 0x67, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x20, - 0x20, 0x01, 0x28, 0x0b, 0x32, 0x17, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, - 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x53, 0x74, 0x72, 0x75, 0x63, 0x74, 0x48, 0x09, 0x52, - 0x14, 0x70, 0x61, 0x63, 0x68, 0x79, 0x64, 0x65, 0x72, 0x6d, 0x49, 0x6e, 0x74, 0x65, 0x67, 0x72, - 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x88, 0x01, 0x01, 0x12, 0x1a, 0x0a, 0x08, 0x72, 0x65, 0x73, 0x74, - 0x61, 0x72, 0x74, 0x73, 0x18, 0x21, 0x20, 0x01, 0x28, 0x05, 0x52, 0x08, 0x72, 0x65, 0x73, 0x74, - 0x61, 0x72, 0x74, 0x73, 0x12, 0x36, 0x0a, 0x17, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x62, 0x61, - 0x74, 0x63, 0x68, 0x65, 0x73, 0x5f, 0x70, 0x72, 0x6f, 0x63, 0x65, 0x73, 0x73, 0x65, 0x64, 0x18, - 0x22, 0x20, 0x01, 0x28, 0x05, 0x52, 0x15, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x42, 0x61, 0x74, 0x63, - 0x68, 0x65, 0x73, 0x50, 0x72, 0x6f, 0x63, 0x65, 0x73, 0x73, 0x65, 0x64, 0x12, 0x4d, 0x0a, 0x0f, - 0x62, 0x65, 0x73, 0x74, 0x5f, 0x76, 0x61, 0x6c, 0x69, 0x64, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x18, - 0x23, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x24, 0x2e, 0x64, 0x65, 0x74, 0x65, 0x72, 0x6d, 0x69, 0x6e, - 0x65, 0x64, 0x2e, 0x74, 0x72, 0x69, 0x61, 0x6c, 0x2e, 0x76, 0x31, 0x2e, 0x4d, 0x65, 0x74, 0x72, - 0x69, 0x63, 0x73, 0x57, 0x6f, 0x72, 0x6b, 0x6c, 0x6f, 0x61, 0x64, 0x52, 0x0e, 0x62, 0x65, 0x73, - 0x74, 0x56, 0x61, 0x6c, 0x69, 0x64, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x51, 0x0a, 0x11, 0x6c, - 0x61, 0x74, 0x65, 0x73, 0x74, 0x5f, 0x76, 0x61, 0x6c, 0x69, 0x64, 0x61, 0x74, 0x69, 0x6f, 0x6e, - 0x18, 0x24, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x24, 0x2e, 0x64, 0x65, 0x74, 0x65, 0x72, 0x6d, 0x69, - 0x6e, 0x65, 0x64, 0x2e, 0x74, 0x72, 0x69, 0x61, 0x6c, 0x2e, 0x76, 0x31, 0x2e, 0x4d, 0x65, 0x74, - 0x72, 0x69, 0x63, 0x73, 0x57, 0x6f, 0x72, 0x6b, 0x6c, 0x6f, 0x61, 0x64, 0x52, 0x10, 0x6c, 0x61, - 0x74, 0x65, 0x73, 0x74, 0x56, 0x61, 0x6c, 0x69, 0x64, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x50, - 0x0a, 0x0f, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x63, 0x68, 0x65, 0x63, 0x6b, 0x70, 0x6f, 0x69, 0x6e, - 0x74, 0x18, 0x25, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x27, 0x2e, 0x64, 0x65, 0x74, 0x65, 0x72, 0x6d, - 0x69, 0x6e, 0x65, 0x64, 0x2e, 0x74, 0x72, 0x69, 0x61, 0x6c, 0x2e, 0x76, 0x31, 0x2e, 0x43, 0x68, - 0x65, 0x63, 0x6b, 0x70, 0x6f, 0x69, 0x6e, 0x74, 0x57, 0x6f, 0x72, 0x6b, 0x6c, 0x6f, 0x61, 0x64, - 0x52, 0x0e, 0x62, 0x65, 0x73, 0x74, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x70, 0x6f, 0x69, 0x6e, 0x74, - 0x12, 0x21, 0x0a, 0x0c, 0x72, 0x75, 0x6e, 0x6e, 0x65, 0x72, 0x5f, 0x73, 0x74, 0x61, 0x74, 0x65, - 0x18, 0x26, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x72, 0x75, 0x6e, 0x6e, 0x65, 0x72, 0x53, 0x74, - 0x61, 0x74, 0x65, 0x12, 0x26, 0x0a, 0x0f, 0x77, 0x61, 0x6c, 0x6c, 0x5f, 0x63, 0x6c, 0x6f, 0x63, - 0x6b, 0x5f, 0x74, 0x69, 0x6d, 0x65, 0x18, 0x27, 0x20, 0x01, 0x28, 0x01, 0x52, 0x0d, 0x77, 0x61, - 0x6c, 0x6c, 0x43, 0x6c, 0x6f, 0x63, 0x6b, 0x54, 0x69, 0x6d, 0x65, 0x12, 0x3b, 0x0a, 0x1a, 0x77, - 0x61, 0x72, 0x6d, 0x5f, 0x73, 0x74, 0x61, 0x72, 0x74, 0x5f, 0x63, 0x68, 0x65, 0x63, 0x6b, 0x70, - 0x6f, 0x69, 0x6e, 0x74, 0x5f, 0x75, 0x75, 0x69, 0x64, 0x18, 0x28, 0x20, 0x01, 0x28, 0x09, 0x52, - 0x17, 0x77, 0x61, 0x72, 0x6d, 0x53, 0x74, 0x61, 0x72, 0x74, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x70, - 0x6f, 0x69, 0x6e, 0x74, 0x55, 0x75, 0x69, 0x64, 0x12, 0x19, 0x0a, 0x08, 0x74, 0x61, 0x73, 0x6b, - 0x5f, 0x69, 0x64, 0x73, 0x18, 0x29, 0x20, 0x03, 0x28, 0x09, 0x52, 0x07, 0x74, 0x61, 0x73, 0x6b, - 0x49, 0x64, 0x73, 0x12, 0x31, 0x0a, 0x12, 0x6c, 0x6f, 0x67, 0x5f, 0x72, 0x65, 0x74, 0x65, 0x6e, - 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x64, 0x61, 0x79, 0x73, 0x18, 0x2a, 0x20, 0x01, 0x28, 0x05, 0x48, - 0x0a, 0x52, 0x10, 0x6c, 0x6f, 0x67, 0x52, 0x65, 0x74, 0x65, 0x6e, 0x74, 0x69, 0x6f, 0x6e, 0x44, - 0x61, 0x79, 0x73, 0x88, 0x01, 0x01, 0x3a, 0xa6, 0x01, 0x92, 0x41, 0xa2, 0x01, 0x0a, 0x9f, 0x01, - 0xd2, 0x01, 0x02, 0x69, 0x64, 0xd2, 0x01, 0x0a, 0x73, 0x74, 0x61, 0x72, 0x74, 0x5f, 0x74, 0x69, - 0x6d, 0x65, 0xd2, 0x01, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0xd2, 0x01, 0x04, 0x74, 0x61, 0x67, - 0x73, 0xd2, 0x01, 0x0f, 0x63, 0x68, 0x65, 0x63, 0x6b, 0x70, 0x6f, 0x69, 0x6e, 0x74, 0x5f, 0x73, - 0x69, 0x7a, 0x65, 0xd2, 0x01, 0x10, 0x63, 0x68, 0x65, 0x63, 0x6b, 0x70, 0x6f, 0x69, 0x6e, 0x74, - 0x5f, 0x63, 0x6f, 0x75, 0x6e, 0x74, 0xd2, 0x01, 0x0a, 0x70, 0x72, 0x6f, 0x6a, 0x65, 0x63, 0x74, - 0x5f, 0x69, 0x64, 0xd2, 0x01, 0x0c, 0x70, 0x72, 0x6f, 0x6a, 0x65, 0x63, 0x74, 0x5f, 0x6e, 0x61, - 0x6d, 0x65, 0xd2, 0x01, 0x0c, 0x77, 0x6f, 0x72, 0x6b, 0x73, 0x70, 0x61, 0x63, 0x65, 0x5f, 0x69, - 0x64, 0xd2, 0x01, 0x0e, 0x77, 0x6f, 0x72, 0x6b, 0x73, 0x70, 0x61, 0x63, 0x65, 0x5f, 0x6e, 0x61, - 0x6d, 0x65, 0xd2, 0x01, 0x0f, 0x70, 0x61, 0x72, 0x65, 0x6e, 0x74, 0x5f, 0x61, 0x72, 0x63, 0x68, - 0x69, 0x76, 0x65, 0x64, 0xd2, 0x01, 0x08, 0x61, 0x72, 0x63, 0x68, 0x69, 0x76, 0x65, 0x64, 0x42, - 0x18, 0x0a, 0x16, 0x5f, 0x73, 0x65, 0x61, 0x72, 0x63, 0x68, 0x65, 0x72, 0x5f, 0x6d, 0x65, 0x74, - 0x72, 0x69, 0x63, 0x5f, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x42, 0x12, 0x0a, 0x10, 0x5f, 0x65, 0x78, - 0x74, 0x65, 0x72, 0x6e, 0x61, 0x6c, 0x5f, 0x72, 0x75, 0x6e, 0x5f, 0x69, 0x64, 0x42, 0x12, 0x0a, - 0x10, 0x5f, 0x68, 0x79, 0x70, 0x65, 0x72, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x65, 0x74, 0x65, 0x72, - 0x73, 0x42, 0x12, 0x0a, 0x10, 0x5f, 0x73, 0x75, 0x6d, 0x6d, 0x61, 0x72, 0x79, 0x5f, 0x6d, 0x65, - 0x74, 0x72, 0x69, 0x63, 0x73, 0x42, 0x0a, 0x0a, 0x08, 0x5f, 0x75, 0x73, 0x65, 0x72, 0x5f, 0x69, - 0x64, 0x42, 0x0b, 0x0a, 0x09, 0x5f, 0x64, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x42, 0x0b, - 0x0a, 0x09, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x42, 0x0e, 0x0a, 0x0c, 0x5f, - 0x66, 0x6f, 0x72, 0x6b, 0x65, 0x64, 0x5f, 0x66, 0x72, 0x6f, 0x6d, 0x42, 0x15, 0x0a, 0x13, 0x5f, - 0x65, 0x78, 0x74, 0x65, 0x72, 0x6e, 0x61, 0x6c, 0x5f, 0x73, 0x65, 0x61, 0x72, 0x63, 0x68, 0x5f, - 0x69, 0x64, 0x42, 0x18, 0x0a, 0x16, 0x5f, 0x70, 0x61, 0x63, 0x68, 0x79, 0x64, 0x65, 0x72, 0x6d, - 0x5f, 0x69, 0x6e, 0x74, 0x65, 0x67, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x42, 0x15, 0x0a, 0x13, - 0x5f, 0x6c, 0x6f, 0x67, 0x5f, 0x72, 0x65, 0x74, 0x65, 0x6e, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x64, - 0x61, 0x79, 0x73, 0x42, 0x35, 0x5a, 0x33, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, - 0x6d, 0x2f, 0x64, 0x65, 0x74, 0x65, 0x72, 0x6d, 0x69, 0x6e, 0x65, 0x64, 0x2d, 0x61, 0x69, 0x2f, - 0x64, 0x65, 0x74, 0x65, 0x72, 0x6d, 0x69, 0x6e, 0x65, 0x64, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, - 0x2f, 0x70, 0x6b, 0x67, 0x2f, 0x72, 0x75, 0x6e, 0x76, 0x31, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, - 0x6f, 0x33, + 0x07, 0x65, 0x6e, 0x64, 0x54, 0x69, 0x6d, 0x65, 0x12, 0x30, 0x0a, 0x05, 0x73, 0x74, 0x61, 0x74, + 0x65, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x1a, 0x2e, 0x64, 0x65, 0x74, 0x65, 0x72, 0x6d, + 0x69, 0x6e, 0x65, 0x64, 0x2e, 0x74, 0x72, 0x69, 0x61, 0x6c, 0x2e, 0x76, 0x31, 0x2e, 0x53, 0x74, + 0x61, 0x74, 0x65, 0x52, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x12, 0x16, 0x0a, 0x06, 0x6c, 0x61, + 0x62, 0x65, 0x6c, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x09, 0x52, 0x06, 0x6c, 0x61, 0x62, 0x65, + 0x6c, 0x73, 0x12, 0x27, 0x0a, 0x0f, 0x63, 0x68, 0x65, 0x63, 0x6b, 0x70, 0x6f, 0x69, 0x6e, 0x74, + 0x5f, 0x73, 0x69, 0x7a, 0x65, 0x18, 0x06, 0x20, 0x01, 0x28, 0x03, 0x52, 0x0e, 0x63, 0x68, 0x65, + 0x63, 0x6b, 0x70, 0x6f, 0x69, 0x6e, 0x74, 0x53, 0x69, 0x7a, 0x65, 0x12, 0x29, 0x0a, 0x10, 0x63, + 0x68, 0x65, 0x63, 0x6b, 0x70, 0x6f, 0x69, 0x6e, 0x74, 0x5f, 0x63, 0x6f, 0x75, 0x6e, 0x74, 0x18, + 0x07, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0f, 0x63, 0x68, 0x65, 0x63, 0x6b, 0x70, 0x6f, 0x69, 0x6e, + 0x74, 0x43, 0x6f, 0x75, 0x6e, 0x74, 0x12, 0x37, 0x0a, 0x15, 0x73, 0x65, 0x61, 0x72, 0x63, 0x68, + 0x65, 0x72, 0x5f, 0x6d, 0x65, 0x74, 0x72, 0x69, 0x63, 0x5f, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, + 0x08, 0x20, 0x01, 0x28, 0x01, 0x48, 0x00, 0x52, 0x13, 0x73, 0x65, 0x61, 0x72, 0x63, 0x68, 0x65, + 0x72, 0x4d, 0x65, 0x74, 0x72, 0x69, 0x63, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x88, 0x01, 0x01, 0x12, + 0x2b, 0x0a, 0x0f, 0x65, 0x78, 0x74, 0x65, 0x72, 0x6e, 0x61, 0x6c, 0x5f, 0x72, 0x75, 0x6e, 0x5f, + 0x69, 0x64, 0x18, 0x09, 0x20, 0x01, 0x28, 0x09, 0x48, 0x01, 0x52, 0x0d, 0x65, 0x78, 0x74, 0x65, + 0x72, 0x6e, 0x61, 0x6c, 0x52, 0x75, 0x6e, 0x49, 0x64, 0x88, 0x01, 0x01, 0x12, 0x46, 0x0a, 0x0f, + 0x68, 0x79, 0x70, 0x65, 0x72, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x65, 0x74, 0x65, 0x72, 0x73, 0x18, + 0x0a, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x17, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, + 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x53, 0x74, 0x72, 0x75, 0x63, 0x74, 0x48, 0x02, + 0x52, 0x0f, 0x68, 0x79, 0x70, 0x65, 0x72, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x65, 0x74, 0x65, 0x72, + 0x73, 0x88, 0x01, 0x01, 0x12, 0x45, 0x0a, 0x0f, 0x73, 0x75, 0x6d, 0x6d, 0x61, 0x72, 0x79, 0x5f, + 0x6d, 0x65, 0x74, 0x72, 0x69, 0x63, 0x73, 0x18, 0x0b, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x17, 0x2e, + 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, + 0x53, 0x74, 0x72, 0x75, 0x63, 0x74, 0x48, 0x03, 0x52, 0x0e, 0x73, 0x75, 0x6d, 0x6d, 0x61, 0x72, + 0x79, 0x4d, 0x65, 0x74, 0x72, 0x69, 0x63, 0x73, 0x88, 0x01, 0x01, 0x12, 0x1c, 0x0a, 0x07, 0x75, + 0x73, 0x65, 0x72, 0x5f, 0x69, 0x64, 0x18, 0x0c, 0x20, 0x01, 0x28, 0x05, 0x48, 0x04, 0x52, 0x06, + 0x75, 0x73, 0x65, 0x72, 0x49, 0x64, 0x88, 0x01, 0x01, 0x12, 0x1f, 0x0a, 0x08, 0x64, 0x75, 0x72, + 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x0d, 0x20, 0x01, 0x28, 0x05, 0x48, 0x05, 0x52, 0x08, 0x64, + 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x88, 0x01, 0x01, 0x12, 0x1d, 0x0a, 0x0a, 0x70, 0x72, + 0x6f, 0x6a, 0x65, 0x63, 0x74, 0x5f, 0x69, 0x64, 0x18, 0x0e, 0x20, 0x01, 0x28, 0x05, 0x52, 0x09, + 0x70, 0x72, 0x6f, 0x6a, 0x65, 0x63, 0x74, 0x49, 0x64, 0x12, 0x21, 0x0a, 0x0c, 0x70, 0x72, 0x6f, + 0x6a, 0x65, 0x63, 0x74, 0x5f, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x0f, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x0b, 0x70, 0x72, 0x6f, 0x6a, 0x65, 0x63, 0x74, 0x4e, 0x61, 0x6d, 0x65, 0x12, 0x21, 0x0a, 0x0c, + 0x77, 0x6f, 0x72, 0x6b, 0x73, 0x70, 0x61, 0x63, 0x65, 0x5f, 0x69, 0x64, 0x18, 0x10, 0x20, 0x01, + 0x28, 0x05, 0x52, 0x0b, 0x77, 0x6f, 0x72, 0x6b, 0x73, 0x70, 0x61, 0x63, 0x65, 0x49, 0x64, 0x12, + 0x25, 0x0a, 0x0e, 0x77, 0x6f, 0x72, 0x6b, 0x73, 0x70, 0x61, 0x63, 0x65, 0x5f, 0x6e, 0x61, 0x6d, + 0x65, 0x18, 0x11, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0d, 0x77, 0x6f, 0x72, 0x6b, 0x73, 0x70, 0x61, + 0x63, 0x65, 0x4e, 0x61, 0x6d, 0x65, 0x12, 0x27, 0x0a, 0x0f, 0x70, 0x61, 0x72, 0x65, 0x6e, 0x74, + 0x5f, 0x61, 0x72, 0x63, 0x68, 0x69, 0x76, 0x65, 0x64, 0x18, 0x12, 0x20, 0x01, 0x28, 0x08, 0x52, + 0x0e, 0x70, 0x61, 0x72, 0x65, 0x6e, 0x74, 0x41, 0x72, 0x63, 0x68, 0x69, 0x76, 0x65, 0x64, 0x12, + 0x38, 0x0a, 0x08, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x18, 0x13, 0x20, 0x01, 0x28, + 0x0b, 0x32, 0x17, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, + 0x62, 0x75, 0x66, 0x2e, 0x53, 0x74, 0x72, 0x75, 0x63, 0x74, 0x48, 0x06, 0x52, 0x08, 0x6d, 0x65, + 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x88, 0x01, 0x01, 0x12, 0x1a, 0x0a, 0x08, 0x61, 0x72, 0x63, + 0x68, 0x69, 0x76, 0x65, 0x64, 0x18, 0x14, 0x20, 0x01, 0x28, 0x08, 0x52, 0x08, 0x61, 0x72, 0x63, + 0x68, 0x69, 0x76, 0x65, 0x64, 0x12, 0x1b, 0x0a, 0x09, 0x73, 0x65, 0x61, 0x72, 0x63, 0x68, 0x5f, + 0x69, 0x64, 0x18, 0x15, 0x20, 0x01, 0x28, 0x05, 0x52, 0x08, 0x73, 0x65, 0x61, 0x72, 0x63, 0x68, + 0x49, 0x64, 0x12, 0x23, 0x0a, 0x0d, 0x73, 0x65, 0x61, 0x72, 0x63, 0x68, 0x65, 0x72, 0x5f, 0x74, + 0x79, 0x70, 0x65, 0x18, 0x16, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0c, 0x73, 0x65, 0x61, 0x72, 0x63, + 0x68, 0x65, 0x72, 0x54, 0x79, 0x70, 0x65, 0x12, 0x27, 0x0a, 0x0f, 0x73, 0x65, 0x61, 0x72, 0x63, + 0x68, 0x65, 0x72, 0x5f, 0x6d, 0x65, 0x74, 0x72, 0x69, 0x63, 0x18, 0x17, 0x20, 0x01, 0x28, 0x09, + 0x52, 0x0e, 0x73, 0x65, 0x61, 0x72, 0x63, 0x68, 0x65, 0x72, 0x4d, 0x65, 0x74, 0x72, 0x69, 0x63, + 0x12, 0x24, 0x0a, 0x0b, 0x66, 0x6f, 0x72, 0x6b, 0x65, 0x64, 0x5f, 0x66, 0x72, 0x6f, 0x6d, 0x18, + 0x18, 0x20, 0x01, 0x28, 0x05, 0x48, 0x07, 0x52, 0x0a, 0x66, 0x6f, 0x72, 0x6b, 0x65, 0x64, 0x46, + 0x72, 0x6f, 0x6d, 0x88, 0x01, 0x01, 0x12, 0x31, 0x0a, 0x12, 0x65, 0x78, 0x74, 0x65, 0x72, 0x6e, + 0x61, 0x6c, 0x5f, 0x73, 0x65, 0x61, 0x72, 0x63, 0x68, 0x5f, 0x69, 0x64, 0x18, 0x19, 0x20, 0x01, + 0x28, 0x09, 0x48, 0x08, 0x52, 0x10, 0x65, 0x78, 0x74, 0x65, 0x72, 0x6e, 0x61, 0x6c, 0x53, 0x65, + 0x61, 0x72, 0x63, 0x68, 0x49, 0x64, 0x88, 0x01, 0x01, 0x12, 0x23, 0x0a, 0x0d, 0x72, 0x65, 0x73, + 0x6f, 0x75, 0x72, 0x63, 0x65, 0x5f, 0x70, 0x6f, 0x6f, 0x6c, 0x18, 0x1a, 0x20, 0x01, 0x28, 0x09, + 0x52, 0x0c, 0x72, 0x65, 0x73, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x50, 0x6f, 0x6f, 0x6c, 0x12, 0x1a, + 0x0a, 0x08, 0x70, 0x72, 0x6f, 0x67, 0x72, 0x65, 0x73, 0x73, 0x18, 0x1b, 0x20, 0x01, 0x28, 0x02, + 0x52, 0x08, 0x70, 0x72, 0x6f, 0x67, 0x72, 0x65, 0x73, 0x73, 0x12, 0x20, 0x0a, 0x0b, 0x64, 0x65, + 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x1c, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x0b, 0x64, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x1f, 0x0a, 0x0b, + 0x73, 0x65, 0x61, 0x72, 0x63, 0x68, 0x5f, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x1d, 0x20, 0x01, 0x28, + 0x09, 0x52, 0x0a, 0x73, 0x65, 0x61, 0x72, 0x63, 0x68, 0x4e, 0x61, 0x6d, 0x65, 0x12, 0x1c, 0x0a, + 0x09, 0x75, 0x6e, 0x6d, 0x61, 0x6e, 0x61, 0x67, 0x65, 0x64, 0x18, 0x1e, 0x20, 0x01, 0x28, 0x08, + 0x52, 0x09, 0x75, 0x6e, 0x6d, 0x61, 0x6e, 0x61, 0x67, 0x65, 0x64, 0x12, 0x1f, 0x0a, 0x0b, 0x69, + 0x73, 0x5f, 0x6d, 0x75, 0x6c, 0x74, 0x69, 0x72, 0x75, 0x6e, 0x18, 0x1f, 0x20, 0x01, 0x28, 0x08, + 0x52, 0x0a, 0x69, 0x73, 0x4d, 0x75, 0x6c, 0x74, 0x69, 0x72, 0x75, 0x6e, 0x12, 0x51, 0x0a, 0x15, + 0x70, 0x61, 0x63, 0x68, 0x79, 0x64, 0x65, 0x72, 0x6d, 0x5f, 0x69, 0x6e, 0x74, 0x65, 0x67, 0x72, + 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x20, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x17, 0x2e, 0x67, 0x6f, + 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x53, 0x74, + 0x72, 0x75, 0x63, 0x74, 0x48, 0x09, 0x52, 0x14, 0x70, 0x61, 0x63, 0x68, 0x79, 0x64, 0x65, 0x72, + 0x6d, 0x49, 0x6e, 0x74, 0x65, 0x67, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x88, 0x01, 0x01, 0x12, + 0x1a, 0x0a, 0x08, 0x72, 0x65, 0x73, 0x74, 0x61, 0x72, 0x74, 0x73, 0x18, 0x21, 0x20, 0x01, 0x28, + 0x05, 0x52, 0x08, 0x72, 0x65, 0x73, 0x74, 0x61, 0x72, 0x74, 0x73, 0x12, 0x36, 0x0a, 0x17, 0x74, + 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x62, 0x61, 0x74, 0x63, 0x68, 0x65, 0x73, 0x5f, 0x70, 0x72, 0x6f, + 0x63, 0x65, 0x73, 0x73, 0x65, 0x64, 0x18, 0x22, 0x20, 0x01, 0x28, 0x05, 0x52, 0x15, 0x74, 0x6f, + 0x74, 0x61, 0x6c, 0x42, 0x61, 0x74, 0x63, 0x68, 0x65, 0x73, 0x50, 0x72, 0x6f, 0x63, 0x65, 0x73, + 0x73, 0x65, 0x64, 0x12, 0x4d, 0x0a, 0x0f, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x76, 0x61, 0x6c, 0x69, + 0x64, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x23, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x24, 0x2e, 0x64, + 0x65, 0x74, 0x65, 0x72, 0x6d, 0x69, 0x6e, 0x65, 0x64, 0x2e, 0x74, 0x72, 0x69, 0x61, 0x6c, 0x2e, + 0x76, 0x31, 0x2e, 0x4d, 0x65, 0x74, 0x72, 0x69, 0x63, 0x73, 0x57, 0x6f, 0x72, 0x6b, 0x6c, 0x6f, + 0x61, 0x64, 0x52, 0x0e, 0x62, 0x65, 0x73, 0x74, 0x56, 0x61, 0x6c, 0x69, 0x64, 0x61, 0x74, 0x69, + 0x6f, 0x6e, 0x12, 0x51, 0x0a, 0x11, 0x6c, 0x61, 0x74, 0x65, 0x73, 0x74, 0x5f, 0x76, 0x61, 0x6c, + 0x69, 0x64, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x24, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x24, 0x2e, + 0x64, 0x65, 0x74, 0x65, 0x72, 0x6d, 0x69, 0x6e, 0x65, 0x64, 0x2e, 0x74, 0x72, 0x69, 0x61, 0x6c, + 0x2e, 0x76, 0x31, 0x2e, 0x4d, 0x65, 0x74, 0x72, 0x69, 0x63, 0x73, 0x57, 0x6f, 0x72, 0x6b, 0x6c, + 0x6f, 0x61, 0x64, 0x52, 0x10, 0x6c, 0x61, 0x74, 0x65, 0x73, 0x74, 0x56, 0x61, 0x6c, 0x69, 0x64, + 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x50, 0x0a, 0x0f, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x63, 0x68, + 0x65, 0x63, 0x6b, 0x70, 0x6f, 0x69, 0x6e, 0x74, 0x18, 0x25, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x27, + 0x2e, 0x64, 0x65, 0x74, 0x65, 0x72, 0x6d, 0x69, 0x6e, 0x65, 0x64, 0x2e, 0x74, 0x72, 0x69, 0x61, + 0x6c, 0x2e, 0x76, 0x31, 0x2e, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x70, 0x6f, 0x69, 0x6e, 0x74, 0x57, + 0x6f, 0x72, 0x6b, 0x6c, 0x6f, 0x61, 0x64, 0x52, 0x0e, 0x62, 0x65, 0x73, 0x74, 0x43, 0x68, 0x65, + 0x63, 0x6b, 0x70, 0x6f, 0x69, 0x6e, 0x74, 0x12, 0x21, 0x0a, 0x0c, 0x72, 0x75, 0x6e, 0x6e, 0x65, + 0x72, 0x5f, 0x73, 0x74, 0x61, 0x74, 0x65, 0x18, 0x26, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x72, + 0x75, 0x6e, 0x6e, 0x65, 0x72, 0x53, 0x74, 0x61, 0x74, 0x65, 0x12, 0x26, 0x0a, 0x0f, 0x77, 0x61, + 0x6c, 0x6c, 0x5f, 0x63, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x74, 0x69, 0x6d, 0x65, 0x18, 0x27, 0x20, + 0x01, 0x28, 0x01, 0x52, 0x0d, 0x77, 0x61, 0x6c, 0x6c, 0x43, 0x6c, 0x6f, 0x63, 0x6b, 0x54, 0x69, + 0x6d, 0x65, 0x12, 0x3b, 0x0a, 0x1a, 0x77, 0x61, 0x72, 0x6d, 0x5f, 0x73, 0x74, 0x61, 0x72, 0x74, + 0x5f, 0x63, 0x68, 0x65, 0x63, 0x6b, 0x70, 0x6f, 0x69, 0x6e, 0x74, 0x5f, 0x75, 0x75, 0x69, 0x64, + 0x18, 0x28, 0x20, 0x01, 0x28, 0x09, 0x52, 0x17, 0x77, 0x61, 0x72, 0x6d, 0x53, 0x74, 0x61, 0x72, + 0x74, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x70, 0x6f, 0x69, 0x6e, 0x74, 0x55, 0x75, 0x69, 0x64, 0x12, + 0x19, 0x0a, 0x08, 0x74, 0x61, 0x73, 0x6b, 0x5f, 0x69, 0x64, 0x73, 0x18, 0x29, 0x20, 0x03, 0x28, + 0x09, 0x52, 0x07, 0x74, 0x61, 0x73, 0x6b, 0x49, 0x64, 0x73, 0x12, 0x31, 0x0a, 0x12, 0x6c, 0x6f, + 0x67, 0x5f, 0x72, 0x65, 0x74, 0x65, 0x6e, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x64, 0x61, 0x79, 0x73, + 0x18, 0x2a, 0x20, 0x01, 0x28, 0x05, 0x48, 0x0a, 0x52, 0x10, 0x6c, 0x6f, 0x67, 0x52, 0x65, 0x74, + 0x65, 0x6e, 0x74, 0x69, 0x6f, 0x6e, 0x44, 0x61, 0x79, 0x73, 0x88, 0x01, 0x01, 0x3a, 0xa6, 0x01, + 0x92, 0x41, 0xa2, 0x01, 0x0a, 0x9f, 0x01, 0xd2, 0x01, 0x02, 0x69, 0x64, 0xd2, 0x01, 0x0a, 0x73, + 0x74, 0x61, 0x72, 0x74, 0x5f, 0x74, 0x69, 0x6d, 0x65, 0xd2, 0x01, 0x05, 0x73, 0x74, 0x61, 0x74, + 0x65, 0xd2, 0x01, 0x04, 0x74, 0x61, 0x67, 0x73, 0xd2, 0x01, 0x0f, 0x63, 0x68, 0x65, 0x63, 0x6b, + 0x70, 0x6f, 0x69, 0x6e, 0x74, 0x5f, 0x73, 0x69, 0x7a, 0x65, 0xd2, 0x01, 0x10, 0x63, 0x68, 0x65, + 0x63, 0x6b, 0x70, 0x6f, 0x69, 0x6e, 0x74, 0x5f, 0x63, 0x6f, 0x75, 0x6e, 0x74, 0xd2, 0x01, 0x0a, + 0x70, 0x72, 0x6f, 0x6a, 0x65, 0x63, 0x74, 0x5f, 0x69, 0x64, 0xd2, 0x01, 0x0c, 0x70, 0x72, 0x6f, + 0x6a, 0x65, 0x63, 0x74, 0x5f, 0x6e, 0x61, 0x6d, 0x65, 0xd2, 0x01, 0x0c, 0x77, 0x6f, 0x72, 0x6b, + 0x73, 0x70, 0x61, 0x63, 0x65, 0x5f, 0x69, 0x64, 0xd2, 0x01, 0x0e, 0x77, 0x6f, 0x72, 0x6b, 0x73, + 0x70, 0x61, 0x63, 0x65, 0x5f, 0x6e, 0x61, 0x6d, 0x65, 0xd2, 0x01, 0x0f, 0x70, 0x61, 0x72, 0x65, + 0x6e, 0x74, 0x5f, 0x61, 0x72, 0x63, 0x68, 0x69, 0x76, 0x65, 0x64, 0xd2, 0x01, 0x08, 0x61, 0x72, + 0x63, 0x68, 0x69, 0x76, 0x65, 0x64, 0x42, 0x18, 0x0a, 0x16, 0x5f, 0x73, 0x65, 0x61, 0x72, 0x63, + 0x68, 0x65, 0x72, 0x5f, 0x6d, 0x65, 0x74, 0x72, 0x69, 0x63, 0x5f, 0x76, 0x61, 0x6c, 0x75, 0x65, + 0x42, 0x12, 0x0a, 0x10, 0x5f, 0x65, 0x78, 0x74, 0x65, 0x72, 0x6e, 0x61, 0x6c, 0x5f, 0x72, 0x75, + 0x6e, 0x5f, 0x69, 0x64, 0x42, 0x12, 0x0a, 0x10, 0x5f, 0x68, 0x79, 0x70, 0x65, 0x72, 0x70, 0x61, + 0x72, 0x61, 0x6d, 0x65, 0x74, 0x65, 0x72, 0x73, 0x42, 0x12, 0x0a, 0x10, 0x5f, 0x73, 0x75, 0x6d, + 0x6d, 0x61, 0x72, 0x79, 0x5f, 0x6d, 0x65, 0x74, 0x72, 0x69, 0x63, 0x73, 0x42, 0x0a, 0x0a, 0x08, + 0x5f, 0x75, 0x73, 0x65, 0x72, 0x5f, 0x69, 0x64, 0x42, 0x0b, 0x0a, 0x09, 0x5f, 0x64, 0x75, 0x72, + 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x42, 0x0b, 0x0a, 0x09, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, + 0x74, 0x61, 0x42, 0x0e, 0x0a, 0x0c, 0x5f, 0x66, 0x6f, 0x72, 0x6b, 0x65, 0x64, 0x5f, 0x66, 0x72, + 0x6f, 0x6d, 0x42, 0x15, 0x0a, 0x13, 0x5f, 0x65, 0x78, 0x74, 0x65, 0x72, 0x6e, 0x61, 0x6c, 0x5f, + 0x73, 0x65, 0x61, 0x72, 0x63, 0x68, 0x5f, 0x69, 0x64, 0x42, 0x18, 0x0a, 0x16, 0x5f, 0x70, 0x61, + 0x63, 0x68, 0x79, 0x64, 0x65, 0x72, 0x6d, 0x5f, 0x69, 0x6e, 0x74, 0x65, 0x67, 0x72, 0x61, 0x74, + 0x69, 0x6f, 0x6e, 0x42, 0x15, 0x0a, 0x13, 0x5f, 0x6c, 0x6f, 0x67, 0x5f, 0x72, 0x65, 0x74, 0x65, + 0x6e, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x64, 0x61, 0x79, 0x73, 0x42, 0x35, 0x5a, 0x33, 0x67, 0x69, + 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x64, 0x65, 0x74, 0x65, 0x72, 0x6d, 0x69, + 0x6e, 0x65, 0x64, 0x2d, 0x61, 0x69, 0x2f, 0x64, 0x65, 0x74, 0x65, 0x72, 0x6d, 0x69, 0x6e, 0x65, + 0x64, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2f, 0x70, 0x6b, 0x67, 0x2f, 0x72, 0x75, 0x6e, 0x76, + 0x31, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, } var ( diff --git a/proto/pkg/trialv1/trial.pb.go b/proto/pkg/trialv1/trial.pb.go index 91a3733a68e..9769cbc4bde 100644 --- a/proto/pkg/trialv1/trial.pb.go +++ b/proto/pkg/trialv1/trial.pb.go @@ -524,8 +524,8 @@ type Trial struct { // metadata associated with the trial (based off the metadata stored in the // run). Metadata *_struct.Struct `protobuf:"bytes,23,opt,name=metadata,proto3,oneof" json:"metadata,omitempty"` - // The log signals. - LogSignal *string `protobuf:"bytes,24,opt,name=log_signal,json=logSignal,proto3,oneof" json:"log_signal,omitempty"` + // Log Policy Matched. + LogPolicyMatched *string `protobuf:"bytes,24,opt,name=log_policy_matched,json=logPolicyMatched,proto3,oneof" json:"log_policy_matched,omitempty"` } func (x *Trial) Reset() { @@ -715,9 +715,9 @@ func (x *Trial) GetMetadata() *_struct.Struct { return nil } -func (x *Trial) GetLogSignal() string { - if x != nil && x.LogSignal != nil { - return *x.LogSignal +func (x *Trial) GetLogPolicyMatched() string { + if x != nil && x.LogPolicyMatched != nil { + return *x.LogPolicyMatched } return "" } @@ -1399,7 +1399,7 @@ var file_determined_trial_v1_trial_proto_rawDesc = []byte{ 0x68, 0x65, 0x73, 0x3a, 0x34, 0x92, 0x41, 0x31, 0x0a, 0x2f, 0xd2, 0x01, 0x07, 0x6d, 0x65, 0x74, 0x72, 0x69, 0x63, 0x73, 0xd2, 0x01, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0xd2, 0x01, 0x0a, 0x6e, 0x75, 0x6d, 0x5f, 0x69, 0x6e, 0x70, 0x75, 0x74, 0x73, 0xd2, 0x01, 0x0d, 0x74, 0x6f, 0x74, 0x61, - 0x6c, 0x5f, 0x62, 0x61, 0x74, 0x63, 0x68, 0x65, 0x73, 0x22, 0x90, 0x0a, 0x0a, 0x05, 0x54, 0x72, + 0x6c, 0x5f, 0x62, 0x61, 0x74, 0x63, 0x68, 0x65, 0x73, 0x22, 0xa7, 0x0a, 0x0a, 0x05, 0x54, 0x72, 0x69, 0x61, 0x6c, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x02, 0x69, 0x64, 0x12, 0x23, 0x0a, 0x0d, 0x65, 0x78, 0x70, 0x65, 0x72, 0x69, 0x6d, 0x65, 0x6e, 0x74, 0x5f, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0c, 0x65, 0x78, 0x70, 0x65, @@ -1469,184 +1469,186 @@ var file_determined_trial_v1_trial_proto_rawDesc = []byte{ 0x38, 0x0a, 0x08, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x18, 0x17, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x17, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x53, 0x74, 0x72, 0x75, 0x63, 0x74, 0x48, 0x01, 0x52, 0x08, 0x6d, 0x65, - 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x88, 0x01, 0x01, 0x12, 0x22, 0x0a, 0x0a, 0x6c, 0x6f, 0x67, - 0x5f, 0x73, 0x69, 0x67, 0x6e, 0x61, 0x6c, 0x18, 0x18, 0x20, 0x01, 0x28, 0x09, 0x48, 0x02, 0x52, - 0x09, 0x6c, 0x6f, 0x67, 0x53, 0x69, 0x67, 0x6e, 0x61, 0x6c, 0x88, 0x01, 0x01, 0x3a, 0x5a, 0x92, - 0x41, 0x57, 0x0a, 0x55, 0xd2, 0x01, 0x02, 0x69, 0x64, 0xd2, 0x01, 0x0c, 0x65, 0x78, 0x70, 0x65, - 0x72, 0x69, 0x6d, 0x65, 0x6e, 0x74, 0x49, 0x64, 0xd2, 0x01, 0x09, 0x73, 0x74, 0x61, 0x72, 0x74, - 0x54, 0x69, 0x6d, 0x65, 0xd2, 0x01, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0xd2, 0x01, 0x08, 0x72, - 0x65, 0x73, 0x74, 0x61, 0x72, 0x74, 0x73, 0xd2, 0x01, 0x07, 0x68, 0x70, 0x61, 0x72, 0x61, 0x6d, - 0x73, 0xd2, 0x01, 0x15, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x42, 0x61, 0x74, 0x63, 0x68, 0x65, 0x73, - 0x50, 0x72, 0x6f, 0x63, 0x65, 0x73, 0x73, 0x65, 0x64, 0x42, 0x15, 0x0a, 0x13, 0x5f, 0x6c, 0x6f, - 0x67, 0x5f, 0x72, 0x65, 0x74, 0x65, 0x6e, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x64, 0x61, 0x79, 0x73, - 0x42, 0x0b, 0x0a, 0x09, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x42, 0x0d, 0x0a, - 0x0b, 0x5f, 0x6c, 0x6f, 0x67, 0x5f, 0x73, 0x69, 0x67, 0x6e, 0x61, 0x6c, 0x22, 0x9b, 0x03, 0x0a, - 0x19, 0x54, 0x72, 0x69, 0x61, 0x6c, 0x50, 0x72, 0x6f, 0x66, 0x69, 0x6c, 0x65, 0x72, 0x4d, 0x65, - 0x74, 0x72, 0x69, 0x63, 0x4c, 0x61, 0x62, 0x65, 0x6c, 0x73, 0x12, 0x19, 0x0a, 0x08, 0x74, 0x72, - 0x69, 0x61, 0x6c, 0x5f, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x07, 0x74, 0x72, - 0x69, 0x61, 0x6c, 0x49, 0x64, 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x02, 0x20, - 0x01, 0x28, 0x09, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x19, 0x0a, 0x08, 0x61, 0x67, 0x65, - 0x6e, 0x74, 0x5f, 0x69, 0x64, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x61, 0x67, 0x65, - 0x6e, 0x74, 0x49, 0x64, 0x12, 0x19, 0x0a, 0x08, 0x67, 0x70, 0x75, 0x5f, 0x75, 0x75, 0x69, 0x64, - 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x67, 0x70, 0x75, 0x55, 0x75, 0x69, 0x64, 0x12, - 0x62, 0x0a, 0x0b, 0x6d, 0x65, 0x74, 0x72, 0x69, 0x63, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x18, 0x05, - 0x20, 0x01, 0x28, 0x0e, 0x32, 0x41, 0x2e, 0x64, 0x65, 0x74, 0x65, 0x72, 0x6d, 0x69, 0x6e, 0x65, - 0x64, 0x2e, 0x74, 0x72, 0x69, 0x61, 0x6c, 0x2e, 0x76, 0x31, 0x2e, 0x54, 0x72, 0x69, 0x61, 0x6c, - 0x50, 0x72, 0x6f, 0x66, 0x69, 0x6c, 0x65, 0x72, 0x4d, 0x65, 0x74, 0x72, 0x69, 0x63, 0x4c, 0x61, - 0x62, 0x65, 0x6c, 0x73, 0x2e, 0x50, 0x72, 0x6f, 0x66, 0x69, 0x6c, 0x65, 0x72, 0x4d, 0x65, 0x74, - 0x72, 0x69, 0x63, 0x54, 0x79, 0x70, 0x65, 0x52, 0x0a, 0x6d, 0x65, 0x74, 0x72, 0x69, 0x63, 0x54, - 0x79, 0x70, 0x65, 0x22, 0x9b, 0x01, 0x0a, 0x12, 0x50, 0x72, 0x6f, 0x66, 0x69, 0x6c, 0x65, 0x72, - 0x4d, 0x65, 0x74, 0x72, 0x69, 0x63, 0x54, 0x79, 0x70, 0x65, 0x12, 0x24, 0x0a, 0x20, 0x50, 0x52, - 0x4f, 0x46, 0x49, 0x4c, 0x45, 0x52, 0x5f, 0x4d, 0x45, 0x54, 0x52, 0x49, 0x43, 0x5f, 0x54, 0x59, - 0x50, 0x45, 0x5f, 0x55, 0x4e, 0x53, 0x50, 0x45, 0x43, 0x49, 0x46, 0x49, 0x45, 0x44, 0x10, 0x00, - 0x12, 0x1f, 0x0a, 0x1b, 0x50, 0x52, 0x4f, 0x46, 0x49, 0x4c, 0x45, 0x52, 0x5f, 0x4d, 0x45, 0x54, - 0x52, 0x49, 0x43, 0x5f, 0x54, 0x59, 0x50, 0x45, 0x5f, 0x53, 0x59, 0x53, 0x54, 0x45, 0x4d, 0x10, - 0x01, 0x12, 0x1f, 0x0a, 0x1b, 0x50, 0x52, 0x4f, 0x46, 0x49, 0x4c, 0x45, 0x52, 0x5f, 0x4d, 0x45, - 0x54, 0x52, 0x49, 0x43, 0x5f, 0x54, 0x59, 0x50, 0x45, 0x5f, 0x54, 0x49, 0x4d, 0x49, 0x4e, 0x47, - 0x10, 0x02, 0x12, 0x1d, 0x0a, 0x19, 0x50, 0x52, 0x4f, 0x46, 0x49, 0x4c, 0x45, 0x52, 0x5f, 0x4d, - 0x45, 0x54, 0x52, 0x49, 0x43, 0x5f, 0x54, 0x59, 0x50, 0x45, 0x5f, 0x4d, 0x49, 0x53, 0x43, 0x10, - 0x03, 0x3a, 0x17, 0x92, 0x41, 0x14, 0x0a, 0x12, 0xd2, 0x01, 0x08, 0x74, 0x72, 0x69, 0x61, 0x6c, - 0x5f, 0x69, 0x64, 0xd2, 0x01, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x22, 0x81, 0x02, 0x0a, 0x19, 0x54, - 0x72, 0x69, 0x61, 0x6c, 0x50, 0x72, 0x6f, 0x66, 0x69, 0x6c, 0x65, 0x72, 0x4d, 0x65, 0x74, 0x72, - 0x69, 0x63, 0x73, 0x42, 0x61, 0x74, 0x63, 0x68, 0x12, 0x16, 0x0a, 0x06, 0x76, 0x61, 0x6c, 0x75, - 0x65, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x02, 0x52, 0x06, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x73, - 0x12, 0x18, 0x0a, 0x07, 0x62, 0x61, 0x74, 0x63, 0x68, 0x65, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, - 0x05, 0x52, 0x07, 0x62, 0x61, 0x74, 0x63, 0x68, 0x65, 0x73, 0x12, 0x3a, 0x0a, 0x0a, 0x74, 0x69, - 0x6d, 0x65, 0x73, 0x74, 0x61, 0x6d, 0x70, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1a, - 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, - 0x2e, 0x54, 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61, 0x6d, 0x70, 0x52, 0x0a, 0x74, 0x69, 0x6d, 0x65, - 0x73, 0x74, 0x61, 0x6d, 0x70, 0x73, 0x12, 0x46, 0x0a, 0x06, 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x73, - 0x18, 0x04, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x2e, 0x2e, 0x64, 0x65, 0x74, 0x65, 0x72, 0x6d, 0x69, - 0x6e, 0x65, 0x64, 0x2e, 0x74, 0x72, 0x69, 0x61, 0x6c, 0x2e, 0x76, 0x31, 0x2e, 0x54, 0x72, 0x69, - 0x61, 0x6c, 0x50, 0x72, 0x6f, 0x66, 0x69, 0x6c, 0x65, 0x72, 0x4d, 0x65, 0x74, 0x72, 0x69, 0x63, - 0x4c, 0x61, 0x62, 0x65, 0x6c, 0x73, 0x52, 0x06, 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x73, 0x3a, 0x2e, - 0x92, 0x41, 0x2b, 0x0a, 0x29, 0xd2, 0x01, 0x06, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x73, 0xd2, 0x01, - 0x07, 0x62, 0x61, 0x74, 0x63, 0x68, 0x65, 0x73, 0xd2, 0x01, 0x0a, 0x74, 0x69, 0x6d, 0x65, 0x73, - 0x74, 0x61, 0x6d, 0x70, 0x73, 0xd2, 0x01, 0x06, 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x73, 0x22, 0xda, - 0x01, 0x0a, 0x0e, 0x54, 0x72, 0x69, 0x61, 0x6c, 0x45, 0x61, 0x72, 0x6c, 0x79, 0x45, 0x78, 0x69, - 0x74, 0x12, 0x48, 0x0a, 0x06, 0x72, 0x65, 0x61, 0x73, 0x6f, 0x6e, 0x18, 0x01, 0x20, 0x01, 0x28, - 0x0e, 0x32, 0x30, 0x2e, 0x64, 0x65, 0x74, 0x65, 0x72, 0x6d, 0x69, 0x6e, 0x65, 0x64, 0x2e, 0x74, - 0x72, 0x69, 0x61, 0x6c, 0x2e, 0x76, 0x31, 0x2e, 0x54, 0x72, 0x69, 0x61, 0x6c, 0x45, 0x61, 0x72, - 0x6c, 0x79, 0x45, 0x78, 0x69, 0x74, 0x2e, 0x45, 0x78, 0x69, 0x74, 0x65, 0x64, 0x52, 0x65, 0x61, - 0x73, 0x6f, 0x6e, 0x52, 0x06, 0x72, 0x65, 0x61, 0x73, 0x6f, 0x6e, 0x22, 0x6e, 0x0a, 0x0c, 0x45, - 0x78, 0x69, 0x74, 0x65, 0x64, 0x52, 0x65, 0x61, 0x73, 0x6f, 0x6e, 0x12, 0x1d, 0x0a, 0x19, 0x45, - 0x58, 0x49, 0x54, 0x45, 0x44, 0x5f, 0x52, 0x45, 0x41, 0x53, 0x4f, 0x4e, 0x5f, 0x55, 0x4e, 0x53, - 0x50, 0x45, 0x43, 0x49, 0x46, 0x49, 0x45, 0x44, 0x10, 0x00, 0x12, 0x1c, 0x0a, 0x18, 0x45, 0x58, - 0x49, 0x54, 0x45, 0x44, 0x5f, 0x52, 0x45, 0x41, 0x53, 0x4f, 0x4e, 0x5f, 0x49, 0x4e, 0x56, 0x41, - 0x4c, 0x49, 0x44, 0x5f, 0x48, 0x50, 0x10, 0x01, 0x12, 0x21, 0x0a, 0x1d, 0x45, 0x58, 0x49, 0x54, - 0x45, 0x44, 0x5f, 0x52, 0x45, 0x41, 0x53, 0x4f, 0x4e, 0x5f, 0x49, 0x4e, 0x49, 0x54, 0x5f, 0x49, - 0x4e, 0x56, 0x41, 0x4c, 0x49, 0x44, 0x5f, 0x48, 0x50, 0x10, 0x03, 0x3a, 0x0e, 0x92, 0x41, 0x0b, - 0x0a, 0x09, 0xd2, 0x01, 0x06, 0x72, 0x65, 0x61, 0x73, 0x6f, 0x6e, 0x22, 0x7a, 0x0a, 0x0e, 0x52, - 0x65, 0x6e, 0x64, 0x65, 0x7a, 0x76, 0x6f, 0x75, 0x73, 0x49, 0x6e, 0x66, 0x6f, 0x12, 0x1c, 0x0a, - 0x09, 0x61, 0x64, 0x64, 0x72, 0x65, 0x73, 0x73, 0x65, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x09, - 0x52, 0x09, 0x61, 0x64, 0x64, 0x72, 0x65, 0x73, 0x73, 0x65, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x72, - 0x61, 0x6e, 0x6b, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x12, - 0x14, 0x0a, 0x05, 0x73, 0x6c, 0x6f, 0x74, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x05, 0x52, 0x05, - 0x73, 0x6c, 0x6f, 0x74, 0x73, 0x3a, 0x20, 0x92, 0x41, 0x1d, 0x0a, 0x1b, 0xd2, 0x01, 0x09, 0x61, - 0x64, 0x64, 0x72, 0x65, 0x73, 0x73, 0x65, 0x73, 0xd2, 0x01, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0xd2, - 0x01, 0x05, 0x73, 0x6c, 0x6f, 0x74, 0x73, 0x22, 0x3a, 0x0a, 0x13, 0x54, 0x72, 0x69, 0x61, 0x6c, - 0x52, 0x75, 0x6e, 0x6e, 0x65, 0x72, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x12, 0x14, - 0x0a, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x73, - 0x74, 0x61, 0x74, 0x65, 0x3a, 0x0d, 0x92, 0x41, 0x0a, 0x0a, 0x08, 0xd2, 0x01, 0x05, 0x73, 0x74, - 0x61, 0x74, 0x65, 0x22, 0xc3, 0x02, 0x0a, 0x0c, 0x54, 0x72, 0x69, 0x61, 0x6c, 0x4d, 0x65, 0x74, - 0x72, 0x69, 0x63, 0x73, 0x12, 0x19, 0x0a, 0x08, 0x74, 0x72, 0x69, 0x61, 0x6c, 0x5f, 0x69, 0x64, - 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x07, 0x74, 0x72, 0x69, 0x61, 0x6c, 0x49, 0x64, 0x12, - 0x20, 0x0a, 0x0c, 0x74, 0x72, 0x69, 0x61, 0x6c, 0x5f, 0x72, 0x75, 0x6e, 0x5f, 0x69, 0x64, 0x18, - 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0a, 0x74, 0x72, 0x69, 0x61, 0x6c, 0x52, 0x75, 0x6e, 0x49, - 0x64, 0x12, 0x2c, 0x0a, 0x0f, 0x73, 0x74, 0x65, 0x70, 0x73, 0x5f, 0x63, 0x6f, 0x6d, 0x70, 0x6c, - 0x65, 0x74, 0x65, 0x64, 0x18, 0x03, 0x20, 0x01, 0x28, 0x05, 0x48, 0x00, 0x52, 0x0e, 0x73, 0x74, - 0x65, 0x70, 0x73, 0x43, 0x6f, 0x6d, 0x70, 0x6c, 0x65, 0x74, 0x65, 0x64, 0x88, 0x01, 0x01, 0x12, - 0x40, 0x0a, 0x0b, 0x72, 0x65, 0x70, 0x6f, 0x72, 0x74, 0x5f, 0x74, 0x69, 0x6d, 0x65, 0x18, 0x04, - 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, - 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x54, 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61, 0x6d, 0x70, - 0x48, 0x01, 0x52, 0x0a, 0x72, 0x65, 0x70, 0x6f, 0x72, 0x74, 0x54, 0x69, 0x6d, 0x65, 0x88, 0x01, - 0x01, 0x12, 0x37, 0x0a, 0x07, 0x6d, 0x65, 0x74, 0x72, 0x69, 0x63, 0x73, 0x18, 0x09, 0x20, 0x01, - 0x28, 0x0b, 0x32, 0x1d, 0x2e, 0x64, 0x65, 0x74, 0x65, 0x72, 0x6d, 0x69, 0x6e, 0x65, 0x64, 0x2e, - 0x63, 0x6f, 0x6d, 0x6d, 0x6f, 0x6e, 0x2e, 0x76, 0x31, 0x2e, 0x4d, 0x65, 0x74, 0x72, 0x69, 0x63, - 0x73, 0x52, 0x07, 0x6d, 0x65, 0x74, 0x72, 0x69, 0x63, 0x73, 0x3a, 0x29, 0x92, 0x41, 0x26, 0x0a, - 0x24, 0xd2, 0x01, 0x08, 0x74, 0x72, 0x69, 0x61, 0x6c, 0x5f, 0x69, 0x64, 0xd2, 0x01, 0x0c, 0x74, - 0x72, 0x69, 0x61, 0x6c, 0x5f, 0x72, 0x75, 0x6e, 0x5f, 0x69, 0x64, 0xd2, 0x01, 0x07, 0x6d, 0x65, - 0x74, 0x72, 0x69, 0x63, 0x73, 0x42, 0x12, 0x0a, 0x10, 0x5f, 0x73, 0x74, 0x65, 0x70, 0x73, 0x5f, - 0x63, 0x6f, 0x6d, 0x70, 0x6c, 0x65, 0x74, 0x65, 0x64, 0x42, 0x0e, 0x0a, 0x0c, 0x5f, 0x72, 0x65, - 0x70, 0x6f, 0x72, 0x74, 0x5f, 0x74, 0x69, 0x6d, 0x65, 0x22, 0xfb, 0x02, 0x0a, 0x0d, 0x4d, 0x65, - 0x74, 0x72, 0x69, 0x63, 0x73, 0x52, 0x65, 0x70, 0x6f, 0x72, 0x74, 0x12, 0x19, 0x0a, 0x08, 0x74, - 0x72, 0x69, 0x61, 0x6c, 0x5f, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x07, 0x74, - 0x72, 0x69, 0x61, 0x6c, 0x49, 0x64, 0x12, 0x35, 0x0a, 0x08, 0x65, 0x6e, 0x64, 0x5f, 0x74, 0x69, - 0x6d, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, - 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x54, 0x69, 0x6d, 0x65, 0x73, - 0x74, 0x61, 0x6d, 0x70, 0x52, 0x07, 0x65, 0x6e, 0x64, 0x54, 0x69, 0x6d, 0x65, 0x12, 0x31, 0x0a, - 0x07, 0x6d, 0x65, 0x74, 0x72, 0x69, 0x63, 0x73, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x17, - 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, - 0x2e, 0x53, 0x74, 0x72, 0x75, 0x63, 0x74, 0x52, 0x07, 0x6d, 0x65, 0x74, 0x72, 0x69, 0x63, 0x73, - 0x12, 0x23, 0x0a, 0x0d, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x62, 0x61, 0x74, 0x63, 0x68, 0x65, - 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0c, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x42, 0x61, - 0x74, 0x63, 0x68, 0x65, 0x73, 0x12, 0x1a, 0x0a, 0x08, 0x61, 0x72, 0x63, 0x68, 0x69, 0x76, 0x65, - 0x64, 0x18, 0x05, 0x20, 0x01, 0x28, 0x08, 0x52, 0x08, 0x61, 0x72, 0x63, 0x68, 0x69, 0x76, 0x65, - 0x64, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x06, 0x20, 0x01, 0x28, 0x05, 0x52, 0x02, 0x69, - 0x64, 0x12, 0x20, 0x0a, 0x0c, 0x74, 0x72, 0x69, 0x61, 0x6c, 0x5f, 0x72, 0x75, 0x6e, 0x5f, 0x69, - 0x64, 0x18, 0x07, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0a, 0x74, 0x72, 0x69, 0x61, 0x6c, 0x52, 0x75, - 0x6e, 0x49, 0x64, 0x12, 0x14, 0x0a, 0x05, 0x67, 0x72, 0x6f, 0x75, 0x70, 0x18, 0x08, 0x20, 0x01, - 0x28, 0x09, 0x52, 0x05, 0x67, 0x72, 0x6f, 0x75, 0x70, 0x3a, 0x5c, 0x92, 0x41, 0x59, 0x0a, 0x57, - 0xd2, 0x01, 0x08, 0x74, 0x72, 0x69, 0x61, 0x6c, 0x5f, 0x69, 0x64, 0xd2, 0x01, 0x08, 0x65, 0x6e, - 0x64, 0x5f, 0x74, 0x69, 0x6d, 0x65, 0xd2, 0x01, 0x07, 0x6d, 0x65, 0x74, 0x72, 0x69, 0x63, 0x73, - 0xd2, 0x01, 0x0d, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x62, 0x61, 0x74, 0x63, 0x68, 0x65, 0x73, - 0xd2, 0x01, 0x08, 0x61, 0x72, 0x63, 0x68, 0x69, 0x76, 0x65, 0x64, 0xd2, 0x01, 0x02, 0x69, 0x64, - 0xd2, 0x01, 0x0c, 0x74, 0x72, 0x69, 0x61, 0x6c, 0x5f, 0x72, 0x75, 0x6e, 0x5f, 0x69, 0x64, 0xd2, - 0x01, 0x05, 0x67, 0x72, 0x6f, 0x75, 0x70, 0x22, 0xda, 0x02, 0x0a, 0x0f, 0x54, 0x72, 0x69, 0x61, - 0x6c, 0x53, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x49, 0x6e, 0x66, 0x6f, 0x12, 0x19, 0x0a, 0x08, 0x74, + 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x88, 0x01, 0x01, 0x12, 0x31, 0x0a, 0x12, 0x6c, 0x6f, 0x67, + 0x5f, 0x70, 0x6f, 0x6c, 0x69, 0x63, 0x79, 0x5f, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x65, 0x64, 0x18, + 0x18, 0x20, 0x01, 0x28, 0x09, 0x48, 0x02, 0x52, 0x10, 0x6c, 0x6f, 0x67, 0x50, 0x6f, 0x6c, 0x69, + 0x63, 0x79, 0x4d, 0x61, 0x74, 0x63, 0x68, 0x65, 0x64, 0x88, 0x01, 0x01, 0x3a, 0x5a, 0x92, 0x41, + 0x57, 0x0a, 0x55, 0xd2, 0x01, 0x02, 0x69, 0x64, 0xd2, 0x01, 0x0c, 0x65, 0x78, 0x70, 0x65, 0x72, + 0x69, 0x6d, 0x65, 0x6e, 0x74, 0x49, 0x64, 0xd2, 0x01, 0x09, 0x73, 0x74, 0x61, 0x72, 0x74, 0x54, + 0x69, 0x6d, 0x65, 0xd2, 0x01, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0xd2, 0x01, 0x08, 0x72, 0x65, + 0x73, 0x74, 0x61, 0x72, 0x74, 0x73, 0xd2, 0x01, 0x07, 0x68, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, + 0xd2, 0x01, 0x15, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x42, 0x61, 0x74, 0x63, 0x68, 0x65, 0x73, 0x50, + 0x72, 0x6f, 0x63, 0x65, 0x73, 0x73, 0x65, 0x64, 0x42, 0x15, 0x0a, 0x13, 0x5f, 0x6c, 0x6f, 0x67, + 0x5f, 0x72, 0x65, 0x74, 0x65, 0x6e, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x64, 0x61, 0x79, 0x73, 0x42, + 0x0b, 0x0a, 0x09, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x42, 0x15, 0x0a, 0x13, + 0x5f, 0x6c, 0x6f, 0x67, 0x5f, 0x70, 0x6f, 0x6c, 0x69, 0x63, 0x79, 0x5f, 0x6d, 0x61, 0x74, 0x63, + 0x68, 0x65, 0x64, 0x22, 0x9b, 0x03, 0x0a, 0x19, 0x54, 0x72, 0x69, 0x61, 0x6c, 0x50, 0x72, 0x6f, + 0x66, 0x69, 0x6c, 0x65, 0x72, 0x4d, 0x65, 0x74, 0x72, 0x69, 0x63, 0x4c, 0x61, 0x62, 0x65, 0x6c, + 0x73, 0x12, 0x19, 0x0a, 0x08, 0x74, 0x72, 0x69, 0x61, 0x6c, 0x5f, 0x69, 0x64, 0x18, 0x01, 0x20, + 0x01, 0x28, 0x05, 0x52, 0x07, 0x74, 0x72, 0x69, 0x61, 0x6c, 0x49, 0x64, 0x12, 0x12, 0x0a, 0x04, + 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, + 0x12, 0x19, 0x0a, 0x08, 0x61, 0x67, 0x65, 0x6e, 0x74, 0x5f, 0x69, 0x64, 0x18, 0x03, 0x20, 0x01, + 0x28, 0x09, 0x52, 0x07, 0x61, 0x67, 0x65, 0x6e, 0x74, 0x49, 0x64, 0x12, 0x19, 0x0a, 0x08, 0x67, + 0x70, 0x75, 0x5f, 0x75, 0x75, 0x69, 0x64, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x67, + 0x70, 0x75, 0x55, 0x75, 0x69, 0x64, 0x12, 0x62, 0x0a, 0x0b, 0x6d, 0x65, 0x74, 0x72, 0x69, 0x63, + 0x5f, 0x74, 0x79, 0x70, 0x65, 0x18, 0x05, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x41, 0x2e, 0x64, 0x65, + 0x74, 0x65, 0x72, 0x6d, 0x69, 0x6e, 0x65, 0x64, 0x2e, 0x74, 0x72, 0x69, 0x61, 0x6c, 0x2e, 0x76, + 0x31, 0x2e, 0x54, 0x72, 0x69, 0x61, 0x6c, 0x50, 0x72, 0x6f, 0x66, 0x69, 0x6c, 0x65, 0x72, 0x4d, + 0x65, 0x74, 0x72, 0x69, 0x63, 0x4c, 0x61, 0x62, 0x65, 0x6c, 0x73, 0x2e, 0x50, 0x72, 0x6f, 0x66, + 0x69, 0x6c, 0x65, 0x72, 0x4d, 0x65, 0x74, 0x72, 0x69, 0x63, 0x54, 0x79, 0x70, 0x65, 0x52, 0x0a, + 0x6d, 0x65, 0x74, 0x72, 0x69, 0x63, 0x54, 0x79, 0x70, 0x65, 0x22, 0x9b, 0x01, 0x0a, 0x12, 0x50, + 0x72, 0x6f, 0x66, 0x69, 0x6c, 0x65, 0x72, 0x4d, 0x65, 0x74, 0x72, 0x69, 0x63, 0x54, 0x79, 0x70, + 0x65, 0x12, 0x24, 0x0a, 0x20, 0x50, 0x52, 0x4f, 0x46, 0x49, 0x4c, 0x45, 0x52, 0x5f, 0x4d, 0x45, + 0x54, 0x52, 0x49, 0x43, 0x5f, 0x54, 0x59, 0x50, 0x45, 0x5f, 0x55, 0x4e, 0x53, 0x50, 0x45, 0x43, + 0x49, 0x46, 0x49, 0x45, 0x44, 0x10, 0x00, 0x12, 0x1f, 0x0a, 0x1b, 0x50, 0x52, 0x4f, 0x46, 0x49, + 0x4c, 0x45, 0x52, 0x5f, 0x4d, 0x45, 0x54, 0x52, 0x49, 0x43, 0x5f, 0x54, 0x59, 0x50, 0x45, 0x5f, + 0x53, 0x59, 0x53, 0x54, 0x45, 0x4d, 0x10, 0x01, 0x12, 0x1f, 0x0a, 0x1b, 0x50, 0x52, 0x4f, 0x46, + 0x49, 0x4c, 0x45, 0x52, 0x5f, 0x4d, 0x45, 0x54, 0x52, 0x49, 0x43, 0x5f, 0x54, 0x59, 0x50, 0x45, + 0x5f, 0x54, 0x49, 0x4d, 0x49, 0x4e, 0x47, 0x10, 0x02, 0x12, 0x1d, 0x0a, 0x19, 0x50, 0x52, 0x4f, + 0x46, 0x49, 0x4c, 0x45, 0x52, 0x5f, 0x4d, 0x45, 0x54, 0x52, 0x49, 0x43, 0x5f, 0x54, 0x59, 0x50, + 0x45, 0x5f, 0x4d, 0x49, 0x53, 0x43, 0x10, 0x03, 0x3a, 0x17, 0x92, 0x41, 0x14, 0x0a, 0x12, 0xd2, + 0x01, 0x08, 0x74, 0x72, 0x69, 0x61, 0x6c, 0x5f, 0x69, 0x64, 0xd2, 0x01, 0x04, 0x6e, 0x61, 0x6d, + 0x65, 0x22, 0x81, 0x02, 0x0a, 0x19, 0x54, 0x72, 0x69, 0x61, 0x6c, 0x50, 0x72, 0x6f, 0x66, 0x69, + 0x6c, 0x65, 0x72, 0x4d, 0x65, 0x74, 0x72, 0x69, 0x63, 0x73, 0x42, 0x61, 0x74, 0x63, 0x68, 0x12, + 0x16, 0x0a, 0x06, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x02, 0x52, + 0x06, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x73, 0x12, 0x18, 0x0a, 0x07, 0x62, 0x61, 0x74, 0x63, 0x68, + 0x65, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x05, 0x52, 0x07, 0x62, 0x61, 0x74, 0x63, 0x68, 0x65, + 0x73, 0x12, 0x3a, 0x0a, 0x0a, 0x74, 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61, 0x6d, 0x70, 0x73, 0x18, + 0x03, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, + 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x54, 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61, 0x6d, + 0x70, 0x52, 0x0a, 0x74, 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61, 0x6d, 0x70, 0x73, 0x12, 0x46, 0x0a, + 0x06, 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x2e, 0x2e, + 0x64, 0x65, 0x74, 0x65, 0x72, 0x6d, 0x69, 0x6e, 0x65, 0x64, 0x2e, 0x74, 0x72, 0x69, 0x61, 0x6c, + 0x2e, 0x76, 0x31, 0x2e, 0x54, 0x72, 0x69, 0x61, 0x6c, 0x50, 0x72, 0x6f, 0x66, 0x69, 0x6c, 0x65, + 0x72, 0x4d, 0x65, 0x74, 0x72, 0x69, 0x63, 0x4c, 0x61, 0x62, 0x65, 0x6c, 0x73, 0x52, 0x06, 0x6c, + 0x61, 0x62, 0x65, 0x6c, 0x73, 0x3a, 0x2e, 0x92, 0x41, 0x2b, 0x0a, 0x29, 0xd2, 0x01, 0x06, 0x76, + 0x61, 0x6c, 0x75, 0x65, 0x73, 0xd2, 0x01, 0x07, 0x62, 0x61, 0x74, 0x63, 0x68, 0x65, 0x73, 0xd2, + 0x01, 0x0a, 0x74, 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61, 0x6d, 0x70, 0x73, 0xd2, 0x01, 0x06, 0x6c, + 0x61, 0x62, 0x65, 0x6c, 0x73, 0x22, 0xda, 0x01, 0x0a, 0x0e, 0x54, 0x72, 0x69, 0x61, 0x6c, 0x45, + 0x61, 0x72, 0x6c, 0x79, 0x45, 0x78, 0x69, 0x74, 0x12, 0x48, 0x0a, 0x06, 0x72, 0x65, 0x61, 0x73, + 0x6f, 0x6e, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x30, 0x2e, 0x64, 0x65, 0x74, 0x65, 0x72, + 0x6d, 0x69, 0x6e, 0x65, 0x64, 0x2e, 0x74, 0x72, 0x69, 0x61, 0x6c, 0x2e, 0x76, 0x31, 0x2e, 0x54, + 0x72, 0x69, 0x61, 0x6c, 0x45, 0x61, 0x72, 0x6c, 0x79, 0x45, 0x78, 0x69, 0x74, 0x2e, 0x45, 0x78, + 0x69, 0x74, 0x65, 0x64, 0x52, 0x65, 0x61, 0x73, 0x6f, 0x6e, 0x52, 0x06, 0x72, 0x65, 0x61, 0x73, + 0x6f, 0x6e, 0x22, 0x6e, 0x0a, 0x0c, 0x45, 0x78, 0x69, 0x74, 0x65, 0x64, 0x52, 0x65, 0x61, 0x73, + 0x6f, 0x6e, 0x12, 0x1d, 0x0a, 0x19, 0x45, 0x58, 0x49, 0x54, 0x45, 0x44, 0x5f, 0x52, 0x45, 0x41, + 0x53, 0x4f, 0x4e, 0x5f, 0x55, 0x4e, 0x53, 0x50, 0x45, 0x43, 0x49, 0x46, 0x49, 0x45, 0x44, 0x10, + 0x00, 0x12, 0x1c, 0x0a, 0x18, 0x45, 0x58, 0x49, 0x54, 0x45, 0x44, 0x5f, 0x52, 0x45, 0x41, 0x53, + 0x4f, 0x4e, 0x5f, 0x49, 0x4e, 0x56, 0x41, 0x4c, 0x49, 0x44, 0x5f, 0x48, 0x50, 0x10, 0x01, 0x12, + 0x21, 0x0a, 0x1d, 0x45, 0x58, 0x49, 0x54, 0x45, 0x44, 0x5f, 0x52, 0x45, 0x41, 0x53, 0x4f, 0x4e, + 0x5f, 0x49, 0x4e, 0x49, 0x54, 0x5f, 0x49, 0x4e, 0x56, 0x41, 0x4c, 0x49, 0x44, 0x5f, 0x48, 0x50, + 0x10, 0x03, 0x3a, 0x0e, 0x92, 0x41, 0x0b, 0x0a, 0x09, 0xd2, 0x01, 0x06, 0x72, 0x65, 0x61, 0x73, + 0x6f, 0x6e, 0x22, 0x7a, 0x0a, 0x0e, 0x52, 0x65, 0x6e, 0x64, 0x65, 0x7a, 0x76, 0x6f, 0x75, 0x73, + 0x49, 0x6e, 0x66, 0x6f, 0x12, 0x1c, 0x0a, 0x09, 0x61, 0x64, 0x64, 0x72, 0x65, 0x73, 0x73, 0x65, + 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x09, 0x52, 0x09, 0x61, 0x64, 0x64, 0x72, 0x65, 0x73, 0x73, + 0x65, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, + 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x12, 0x14, 0x0a, 0x05, 0x73, 0x6c, 0x6f, 0x74, 0x73, 0x18, + 0x03, 0x20, 0x03, 0x28, 0x05, 0x52, 0x05, 0x73, 0x6c, 0x6f, 0x74, 0x73, 0x3a, 0x20, 0x92, 0x41, + 0x1d, 0x0a, 0x1b, 0xd2, 0x01, 0x09, 0x61, 0x64, 0x64, 0x72, 0x65, 0x73, 0x73, 0x65, 0x73, 0xd2, + 0x01, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0xd2, 0x01, 0x05, 0x73, 0x6c, 0x6f, 0x74, 0x73, 0x22, 0x3a, + 0x0a, 0x13, 0x54, 0x72, 0x69, 0x61, 0x6c, 0x52, 0x75, 0x6e, 0x6e, 0x65, 0x72, 0x4d, 0x65, 0x74, + 0x61, 0x64, 0x61, 0x74, 0x61, 0x12, 0x14, 0x0a, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x18, 0x01, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x3a, 0x0d, 0x92, 0x41, 0x0a, + 0x0a, 0x08, 0xd2, 0x01, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x22, 0xc3, 0x02, 0x0a, 0x0c, 0x54, + 0x72, 0x69, 0x61, 0x6c, 0x4d, 0x65, 0x74, 0x72, 0x69, 0x63, 0x73, 0x12, 0x19, 0x0a, 0x08, 0x74, 0x72, 0x69, 0x61, 0x6c, 0x5f, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x07, 0x74, - 0x72, 0x69, 0x61, 0x6c, 0x49, 0x64, 0x12, 0x27, 0x0a, 0x0f, 0x63, 0x68, 0x65, 0x63, 0x6b, 0x70, - 0x6f, 0x69, 0x6e, 0x74, 0x5f, 0x75, 0x75, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, - 0x0e, 0x63, 0x68, 0x65, 0x63, 0x6b, 0x70, 0x6f, 0x69, 0x6e, 0x74, 0x55, 0x75, 0x69, 0x64, 0x12, - 0x1e, 0x0a, 0x08, 0x6d, 0x6f, 0x64, 0x65, 0x6c, 0x5f, 0x69, 0x64, 0x18, 0x03, 0x20, 0x01, 0x28, - 0x05, 0x48, 0x00, 0x52, 0x07, 0x6d, 0x6f, 0x64, 0x65, 0x6c, 0x49, 0x64, 0x88, 0x01, 0x01, 0x12, - 0x28, 0x0a, 0x0d, 0x6d, 0x6f, 0x64, 0x65, 0x6c, 0x5f, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, - 0x18, 0x04, 0x20, 0x01, 0x28, 0x05, 0x48, 0x01, 0x52, 0x0c, 0x6d, 0x6f, 0x64, 0x65, 0x6c, 0x56, - 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x88, 0x01, 0x01, 0x12, 0x5d, 0x0a, 0x16, 0x74, 0x72, 0x69, - 0x61, 0x6c, 0x5f, 0x73, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x5f, 0x69, 0x6e, 0x66, 0x6f, 0x5f, 0x74, - 0x79, 0x70, 0x65, 0x18, 0x05, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x28, 0x2e, 0x64, 0x65, 0x74, 0x65, - 0x72, 0x6d, 0x69, 0x6e, 0x65, 0x64, 0x2e, 0x74, 0x72, 0x69, 0x61, 0x6c, 0x2e, 0x76, 0x31, 0x2e, - 0x54, 0x72, 0x69, 0x61, 0x6c, 0x53, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x49, 0x6e, 0x66, 0x6f, 0x54, - 0x79, 0x70, 0x65, 0x52, 0x13, 0x74, 0x72, 0x69, 0x61, 0x6c, 0x53, 0x6f, 0x75, 0x72, 0x63, 0x65, - 0x49, 0x6e, 0x66, 0x6f, 0x54, 0x79, 0x70, 0x65, 0x3a, 0x3b, 0x92, 0x41, 0x38, 0x0a, 0x36, 0xd2, - 0x01, 0x08, 0x74, 0x72, 0x69, 0x61, 0x6c, 0x5f, 0x69, 0x64, 0xd2, 0x01, 0x0f, 0x63, 0x68, 0x65, - 0x63, 0x6b, 0x70, 0x6f, 0x69, 0x6e, 0x74, 0x5f, 0x75, 0x75, 0x69, 0x64, 0xd2, 0x01, 0x16, 0x74, - 0x72, 0x69, 0x61, 0x6c, 0x5f, 0x73, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x5f, 0x69, 0x6e, 0x66, 0x6f, - 0x5f, 0x74, 0x79, 0x70, 0x65, 0x42, 0x0b, 0x0a, 0x09, 0x5f, 0x6d, 0x6f, 0x64, 0x65, 0x6c, 0x5f, - 0x69, 0x64, 0x42, 0x10, 0x0a, 0x0e, 0x5f, 0x6d, 0x6f, 0x64, 0x65, 0x6c, 0x5f, 0x76, 0x65, 0x72, - 0x73, 0x69, 0x6f, 0x6e, 0x2a, 0xb8, 0x02, 0x0a, 0x05, 0x53, 0x74, 0x61, 0x74, 0x65, 0x12, 0x15, - 0x0a, 0x11, 0x53, 0x54, 0x41, 0x54, 0x45, 0x5f, 0x55, 0x4e, 0x53, 0x50, 0x45, 0x43, 0x49, 0x46, - 0x49, 0x45, 0x44, 0x10, 0x00, 0x12, 0x10, 0x0a, 0x0c, 0x53, 0x54, 0x41, 0x54, 0x45, 0x5f, 0x41, - 0x43, 0x54, 0x49, 0x56, 0x45, 0x10, 0x01, 0x12, 0x10, 0x0a, 0x0c, 0x53, 0x54, 0x41, 0x54, 0x45, - 0x5f, 0x50, 0x41, 0x55, 0x53, 0x45, 0x44, 0x10, 0x02, 0x12, 0x1b, 0x0a, 0x17, 0x53, 0x54, 0x41, - 0x54, 0x45, 0x5f, 0x53, 0x54, 0x4f, 0x50, 0x50, 0x49, 0x4e, 0x47, 0x5f, 0x43, 0x41, 0x4e, 0x43, - 0x45, 0x4c, 0x45, 0x44, 0x10, 0x03, 0x12, 0x19, 0x0a, 0x15, 0x53, 0x54, 0x41, 0x54, 0x45, 0x5f, - 0x53, 0x54, 0x4f, 0x50, 0x50, 0x49, 0x4e, 0x47, 0x5f, 0x4b, 0x49, 0x4c, 0x4c, 0x45, 0x44, 0x10, - 0x04, 0x12, 0x1c, 0x0a, 0x18, 0x53, 0x54, 0x41, 0x54, 0x45, 0x5f, 0x53, 0x54, 0x4f, 0x50, 0x50, - 0x49, 0x4e, 0x47, 0x5f, 0x43, 0x4f, 0x4d, 0x50, 0x4c, 0x45, 0x54, 0x45, 0x44, 0x10, 0x05, 0x12, - 0x18, 0x0a, 0x14, 0x53, 0x54, 0x41, 0x54, 0x45, 0x5f, 0x53, 0x54, 0x4f, 0x50, 0x50, 0x49, 0x4e, - 0x47, 0x5f, 0x45, 0x52, 0x52, 0x4f, 0x52, 0x10, 0x06, 0x12, 0x12, 0x0a, 0x0e, 0x53, 0x54, 0x41, - 0x54, 0x45, 0x5f, 0x43, 0x41, 0x4e, 0x43, 0x45, 0x4c, 0x45, 0x44, 0x10, 0x07, 0x12, 0x13, 0x0a, - 0x0f, 0x53, 0x54, 0x41, 0x54, 0x45, 0x5f, 0x43, 0x4f, 0x4d, 0x50, 0x4c, 0x45, 0x54, 0x45, 0x44, - 0x10, 0x08, 0x12, 0x0f, 0x0a, 0x0b, 0x53, 0x54, 0x41, 0x54, 0x45, 0x5f, 0x45, 0x52, 0x52, 0x4f, - 0x52, 0x10, 0x09, 0x12, 0x10, 0x0a, 0x0c, 0x53, 0x54, 0x41, 0x54, 0x45, 0x5f, 0x51, 0x55, 0x45, - 0x55, 0x45, 0x44, 0x10, 0x0a, 0x12, 0x11, 0x0a, 0x0d, 0x53, 0x54, 0x41, 0x54, 0x45, 0x5f, 0x50, - 0x55, 0x4c, 0x4c, 0x49, 0x4e, 0x47, 0x10, 0x0b, 0x12, 0x12, 0x0a, 0x0e, 0x53, 0x54, 0x41, 0x54, - 0x45, 0x5f, 0x53, 0x54, 0x41, 0x52, 0x54, 0x49, 0x4e, 0x47, 0x10, 0x0c, 0x12, 0x11, 0x0a, 0x0d, - 0x53, 0x54, 0x41, 0x54, 0x45, 0x5f, 0x52, 0x55, 0x4e, 0x4e, 0x49, 0x4e, 0x47, 0x10, 0x0d, 0x2a, - 0x8b, 0x01, 0x0a, 0x13, 0x54, 0x72, 0x69, 0x61, 0x6c, 0x53, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x49, - 0x6e, 0x66, 0x6f, 0x54, 0x79, 0x70, 0x65, 0x12, 0x26, 0x0a, 0x22, 0x54, 0x52, 0x49, 0x41, 0x4c, - 0x5f, 0x53, 0x4f, 0x55, 0x52, 0x43, 0x45, 0x5f, 0x49, 0x4e, 0x46, 0x4f, 0x5f, 0x54, 0x59, 0x50, - 0x45, 0x5f, 0x55, 0x4e, 0x53, 0x50, 0x45, 0x43, 0x49, 0x46, 0x49, 0x45, 0x44, 0x10, 0x00, 0x12, - 0x24, 0x0a, 0x20, 0x54, 0x52, 0x49, 0x41, 0x4c, 0x5f, 0x53, 0x4f, 0x55, 0x52, 0x43, 0x45, 0x5f, - 0x49, 0x4e, 0x46, 0x4f, 0x5f, 0x54, 0x59, 0x50, 0x45, 0x5f, 0x49, 0x4e, 0x46, 0x45, 0x52, 0x45, - 0x4e, 0x43, 0x45, 0x10, 0x01, 0x12, 0x26, 0x0a, 0x22, 0x54, 0x52, 0x49, 0x41, 0x4c, 0x5f, 0x53, - 0x4f, 0x55, 0x52, 0x43, 0x45, 0x5f, 0x49, 0x4e, 0x46, 0x4f, 0x5f, 0x54, 0x59, 0x50, 0x45, 0x5f, - 0x46, 0x49, 0x4e, 0x45, 0x5f, 0x54, 0x55, 0x4e, 0x49, 0x4e, 0x47, 0x10, 0x02, 0x42, 0x37, 0x5a, - 0x35, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x64, 0x65, 0x74, 0x65, - 0x72, 0x6d, 0x69, 0x6e, 0x65, 0x64, 0x2d, 0x61, 0x69, 0x2f, 0x64, 0x65, 0x74, 0x65, 0x72, 0x6d, - 0x69, 0x6e, 0x65, 0x64, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2f, 0x70, 0x6b, 0x67, 0x2f, 0x74, - 0x72, 0x69, 0x61, 0x6c, 0x76, 0x31, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, + 0x72, 0x69, 0x61, 0x6c, 0x49, 0x64, 0x12, 0x20, 0x0a, 0x0c, 0x74, 0x72, 0x69, 0x61, 0x6c, 0x5f, + 0x72, 0x75, 0x6e, 0x5f, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0a, 0x74, 0x72, + 0x69, 0x61, 0x6c, 0x52, 0x75, 0x6e, 0x49, 0x64, 0x12, 0x2c, 0x0a, 0x0f, 0x73, 0x74, 0x65, 0x70, + 0x73, 0x5f, 0x63, 0x6f, 0x6d, 0x70, 0x6c, 0x65, 0x74, 0x65, 0x64, 0x18, 0x03, 0x20, 0x01, 0x28, + 0x05, 0x48, 0x00, 0x52, 0x0e, 0x73, 0x74, 0x65, 0x70, 0x73, 0x43, 0x6f, 0x6d, 0x70, 0x6c, 0x65, + 0x74, 0x65, 0x64, 0x88, 0x01, 0x01, 0x12, 0x40, 0x0a, 0x0b, 0x72, 0x65, 0x70, 0x6f, 0x72, 0x74, + 0x5f, 0x74, 0x69, 0x6d, 0x65, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x67, 0x6f, + 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x54, 0x69, + 0x6d, 0x65, 0x73, 0x74, 0x61, 0x6d, 0x70, 0x48, 0x01, 0x52, 0x0a, 0x72, 0x65, 0x70, 0x6f, 0x72, + 0x74, 0x54, 0x69, 0x6d, 0x65, 0x88, 0x01, 0x01, 0x12, 0x37, 0x0a, 0x07, 0x6d, 0x65, 0x74, 0x72, + 0x69, 0x63, 0x73, 0x18, 0x09, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1d, 0x2e, 0x64, 0x65, 0x74, 0x65, + 0x72, 0x6d, 0x69, 0x6e, 0x65, 0x64, 0x2e, 0x63, 0x6f, 0x6d, 0x6d, 0x6f, 0x6e, 0x2e, 0x76, 0x31, + 0x2e, 0x4d, 0x65, 0x74, 0x72, 0x69, 0x63, 0x73, 0x52, 0x07, 0x6d, 0x65, 0x74, 0x72, 0x69, 0x63, + 0x73, 0x3a, 0x29, 0x92, 0x41, 0x26, 0x0a, 0x24, 0xd2, 0x01, 0x08, 0x74, 0x72, 0x69, 0x61, 0x6c, + 0x5f, 0x69, 0x64, 0xd2, 0x01, 0x0c, 0x74, 0x72, 0x69, 0x61, 0x6c, 0x5f, 0x72, 0x75, 0x6e, 0x5f, + 0x69, 0x64, 0xd2, 0x01, 0x07, 0x6d, 0x65, 0x74, 0x72, 0x69, 0x63, 0x73, 0x42, 0x12, 0x0a, 0x10, + 0x5f, 0x73, 0x74, 0x65, 0x70, 0x73, 0x5f, 0x63, 0x6f, 0x6d, 0x70, 0x6c, 0x65, 0x74, 0x65, 0x64, + 0x42, 0x0e, 0x0a, 0x0c, 0x5f, 0x72, 0x65, 0x70, 0x6f, 0x72, 0x74, 0x5f, 0x74, 0x69, 0x6d, 0x65, + 0x22, 0xfb, 0x02, 0x0a, 0x0d, 0x4d, 0x65, 0x74, 0x72, 0x69, 0x63, 0x73, 0x52, 0x65, 0x70, 0x6f, + 0x72, 0x74, 0x12, 0x19, 0x0a, 0x08, 0x74, 0x72, 0x69, 0x61, 0x6c, 0x5f, 0x69, 0x64, 0x18, 0x01, + 0x20, 0x01, 0x28, 0x05, 0x52, 0x07, 0x74, 0x72, 0x69, 0x61, 0x6c, 0x49, 0x64, 0x12, 0x35, 0x0a, + 0x08, 0x65, 0x6e, 0x64, 0x5f, 0x74, 0x69, 0x6d, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, + 0x1a, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, + 0x66, 0x2e, 0x54, 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61, 0x6d, 0x70, 0x52, 0x07, 0x65, 0x6e, 0x64, + 0x54, 0x69, 0x6d, 0x65, 0x12, 0x31, 0x0a, 0x07, 0x6d, 0x65, 0x74, 0x72, 0x69, 0x63, 0x73, 0x18, + 0x03, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x17, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, + 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x53, 0x74, 0x72, 0x75, 0x63, 0x74, 0x52, 0x07, + 0x6d, 0x65, 0x74, 0x72, 0x69, 0x63, 0x73, 0x12, 0x23, 0x0a, 0x0d, 0x74, 0x6f, 0x74, 0x61, 0x6c, + 0x5f, 0x62, 0x61, 0x74, 0x63, 0x68, 0x65, 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0c, + 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x42, 0x61, 0x74, 0x63, 0x68, 0x65, 0x73, 0x12, 0x1a, 0x0a, 0x08, + 0x61, 0x72, 0x63, 0x68, 0x69, 0x76, 0x65, 0x64, 0x18, 0x05, 0x20, 0x01, 0x28, 0x08, 0x52, 0x08, + 0x61, 0x72, 0x63, 0x68, 0x69, 0x76, 0x65, 0x64, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x06, + 0x20, 0x01, 0x28, 0x05, 0x52, 0x02, 0x69, 0x64, 0x12, 0x20, 0x0a, 0x0c, 0x74, 0x72, 0x69, 0x61, + 0x6c, 0x5f, 0x72, 0x75, 0x6e, 0x5f, 0x69, 0x64, 0x18, 0x07, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0a, + 0x74, 0x72, 0x69, 0x61, 0x6c, 0x52, 0x75, 0x6e, 0x49, 0x64, 0x12, 0x14, 0x0a, 0x05, 0x67, 0x72, + 0x6f, 0x75, 0x70, 0x18, 0x08, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x67, 0x72, 0x6f, 0x75, 0x70, + 0x3a, 0x5c, 0x92, 0x41, 0x59, 0x0a, 0x57, 0xd2, 0x01, 0x08, 0x74, 0x72, 0x69, 0x61, 0x6c, 0x5f, + 0x69, 0x64, 0xd2, 0x01, 0x08, 0x65, 0x6e, 0x64, 0x5f, 0x74, 0x69, 0x6d, 0x65, 0xd2, 0x01, 0x07, + 0x6d, 0x65, 0x74, 0x72, 0x69, 0x63, 0x73, 0xd2, 0x01, 0x0d, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, + 0x62, 0x61, 0x74, 0x63, 0x68, 0x65, 0x73, 0xd2, 0x01, 0x08, 0x61, 0x72, 0x63, 0x68, 0x69, 0x76, + 0x65, 0x64, 0xd2, 0x01, 0x02, 0x69, 0x64, 0xd2, 0x01, 0x0c, 0x74, 0x72, 0x69, 0x61, 0x6c, 0x5f, + 0x72, 0x75, 0x6e, 0x5f, 0x69, 0x64, 0xd2, 0x01, 0x05, 0x67, 0x72, 0x6f, 0x75, 0x70, 0x22, 0xda, + 0x02, 0x0a, 0x0f, 0x54, 0x72, 0x69, 0x61, 0x6c, 0x53, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x49, 0x6e, + 0x66, 0x6f, 0x12, 0x19, 0x0a, 0x08, 0x74, 0x72, 0x69, 0x61, 0x6c, 0x5f, 0x69, 0x64, 0x18, 0x01, + 0x20, 0x01, 0x28, 0x05, 0x52, 0x07, 0x74, 0x72, 0x69, 0x61, 0x6c, 0x49, 0x64, 0x12, 0x27, 0x0a, + 0x0f, 0x63, 0x68, 0x65, 0x63, 0x6b, 0x70, 0x6f, 0x69, 0x6e, 0x74, 0x5f, 0x75, 0x75, 0x69, 0x64, + 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0e, 0x63, 0x68, 0x65, 0x63, 0x6b, 0x70, 0x6f, 0x69, + 0x6e, 0x74, 0x55, 0x75, 0x69, 0x64, 0x12, 0x1e, 0x0a, 0x08, 0x6d, 0x6f, 0x64, 0x65, 0x6c, 0x5f, + 0x69, 0x64, 0x18, 0x03, 0x20, 0x01, 0x28, 0x05, 0x48, 0x00, 0x52, 0x07, 0x6d, 0x6f, 0x64, 0x65, + 0x6c, 0x49, 0x64, 0x88, 0x01, 0x01, 0x12, 0x28, 0x0a, 0x0d, 0x6d, 0x6f, 0x64, 0x65, 0x6c, 0x5f, + 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x04, 0x20, 0x01, 0x28, 0x05, 0x48, 0x01, 0x52, + 0x0c, 0x6d, 0x6f, 0x64, 0x65, 0x6c, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x88, 0x01, 0x01, + 0x12, 0x5d, 0x0a, 0x16, 0x74, 0x72, 0x69, 0x61, 0x6c, 0x5f, 0x73, 0x6f, 0x75, 0x72, 0x63, 0x65, + 0x5f, 0x69, 0x6e, 0x66, 0x6f, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x18, 0x05, 0x20, 0x01, 0x28, 0x0e, + 0x32, 0x28, 0x2e, 0x64, 0x65, 0x74, 0x65, 0x72, 0x6d, 0x69, 0x6e, 0x65, 0x64, 0x2e, 0x74, 0x72, + 0x69, 0x61, 0x6c, 0x2e, 0x76, 0x31, 0x2e, 0x54, 0x72, 0x69, 0x61, 0x6c, 0x53, 0x6f, 0x75, 0x72, + 0x63, 0x65, 0x49, 0x6e, 0x66, 0x6f, 0x54, 0x79, 0x70, 0x65, 0x52, 0x13, 0x74, 0x72, 0x69, 0x61, + 0x6c, 0x53, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x49, 0x6e, 0x66, 0x6f, 0x54, 0x79, 0x70, 0x65, 0x3a, + 0x3b, 0x92, 0x41, 0x38, 0x0a, 0x36, 0xd2, 0x01, 0x08, 0x74, 0x72, 0x69, 0x61, 0x6c, 0x5f, 0x69, + 0x64, 0xd2, 0x01, 0x0f, 0x63, 0x68, 0x65, 0x63, 0x6b, 0x70, 0x6f, 0x69, 0x6e, 0x74, 0x5f, 0x75, + 0x75, 0x69, 0x64, 0xd2, 0x01, 0x16, 0x74, 0x72, 0x69, 0x61, 0x6c, 0x5f, 0x73, 0x6f, 0x75, 0x72, + 0x63, 0x65, 0x5f, 0x69, 0x6e, 0x66, 0x6f, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x42, 0x0b, 0x0a, 0x09, + 0x5f, 0x6d, 0x6f, 0x64, 0x65, 0x6c, 0x5f, 0x69, 0x64, 0x42, 0x10, 0x0a, 0x0e, 0x5f, 0x6d, 0x6f, + 0x64, 0x65, 0x6c, 0x5f, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x2a, 0xb8, 0x02, 0x0a, 0x05, + 0x53, 0x74, 0x61, 0x74, 0x65, 0x12, 0x15, 0x0a, 0x11, 0x53, 0x54, 0x41, 0x54, 0x45, 0x5f, 0x55, + 0x4e, 0x53, 0x50, 0x45, 0x43, 0x49, 0x46, 0x49, 0x45, 0x44, 0x10, 0x00, 0x12, 0x10, 0x0a, 0x0c, + 0x53, 0x54, 0x41, 0x54, 0x45, 0x5f, 0x41, 0x43, 0x54, 0x49, 0x56, 0x45, 0x10, 0x01, 0x12, 0x10, + 0x0a, 0x0c, 0x53, 0x54, 0x41, 0x54, 0x45, 0x5f, 0x50, 0x41, 0x55, 0x53, 0x45, 0x44, 0x10, 0x02, + 0x12, 0x1b, 0x0a, 0x17, 0x53, 0x54, 0x41, 0x54, 0x45, 0x5f, 0x53, 0x54, 0x4f, 0x50, 0x50, 0x49, + 0x4e, 0x47, 0x5f, 0x43, 0x41, 0x4e, 0x43, 0x45, 0x4c, 0x45, 0x44, 0x10, 0x03, 0x12, 0x19, 0x0a, + 0x15, 0x53, 0x54, 0x41, 0x54, 0x45, 0x5f, 0x53, 0x54, 0x4f, 0x50, 0x50, 0x49, 0x4e, 0x47, 0x5f, + 0x4b, 0x49, 0x4c, 0x4c, 0x45, 0x44, 0x10, 0x04, 0x12, 0x1c, 0x0a, 0x18, 0x53, 0x54, 0x41, 0x54, + 0x45, 0x5f, 0x53, 0x54, 0x4f, 0x50, 0x50, 0x49, 0x4e, 0x47, 0x5f, 0x43, 0x4f, 0x4d, 0x50, 0x4c, + 0x45, 0x54, 0x45, 0x44, 0x10, 0x05, 0x12, 0x18, 0x0a, 0x14, 0x53, 0x54, 0x41, 0x54, 0x45, 0x5f, + 0x53, 0x54, 0x4f, 0x50, 0x50, 0x49, 0x4e, 0x47, 0x5f, 0x45, 0x52, 0x52, 0x4f, 0x52, 0x10, 0x06, + 0x12, 0x12, 0x0a, 0x0e, 0x53, 0x54, 0x41, 0x54, 0x45, 0x5f, 0x43, 0x41, 0x4e, 0x43, 0x45, 0x4c, + 0x45, 0x44, 0x10, 0x07, 0x12, 0x13, 0x0a, 0x0f, 0x53, 0x54, 0x41, 0x54, 0x45, 0x5f, 0x43, 0x4f, + 0x4d, 0x50, 0x4c, 0x45, 0x54, 0x45, 0x44, 0x10, 0x08, 0x12, 0x0f, 0x0a, 0x0b, 0x53, 0x54, 0x41, + 0x54, 0x45, 0x5f, 0x45, 0x52, 0x52, 0x4f, 0x52, 0x10, 0x09, 0x12, 0x10, 0x0a, 0x0c, 0x53, 0x54, + 0x41, 0x54, 0x45, 0x5f, 0x51, 0x55, 0x45, 0x55, 0x45, 0x44, 0x10, 0x0a, 0x12, 0x11, 0x0a, 0x0d, + 0x53, 0x54, 0x41, 0x54, 0x45, 0x5f, 0x50, 0x55, 0x4c, 0x4c, 0x49, 0x4e, 0x47, 0x10, 0x0b, 0x12, + 0x12, 0x0a, 0x0e, 0x53, 0x54, 0x41, 0x54, 0x45, 0x5f, 0x53, 0x54, 0x41, 0x52, 0x54, 0x49, 0x4e, + 0x47, 0x10, 0x0c, 0x12, 0x11, 0x0a, 0x0d, 0x53, 0x54, 0x41, 0x54, 0x45, 0x5f, 0x52, 0x55, 0x4e, + 0x4e, 0x49, 0x4e, 0x47, 0x10, 0x0d, 0x2a, 0x8b, 0x01, 0x0a, 0x13, 0x54, 0x72, 0x69, 0x61, 0x6c, + 0x53, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x49, 0x6e, 0x66, 0x6f, 0x54, 0x79, 0x70, 0x65, 0x12, 0x26, + 0x0a, 0x22, 0x54, 0x52, 0x49, 0x41, 0x4c, 0x5f, 0x53, 0x4f, 0x55, 0x52, 0x43, 0x45, 0x5f, 0x49, + 0x4e, 0x46, 0x4f, 0x5f, 0x54, 0x59, 0x50, 0x45, 0x5f, 0x55, 0x4e, 0x53, 0x50, 0x45, 0x43, 0x49, + 0x46, 0x49, 0x45, 0x44, 0x10, 0x00, 0x12, 0x24, 0x0a, 0x20, 0x54, 0x52, 0x49, 0x41, 0x4c, 0x5f, + 0x53, 0x4f, 0x55, 0x52, 0x43, 0x45, 0x5f, 0x49, 0x4e, 0x46, 0x4f, 0x5f, 0x54, 0x59, 0x50, 0x45, + 0x5f, 0x49, 0x4e, 0x46, 0x45, 0x52, 0x45, 0x4e, 0x43, 0x45, 0x10, 0x01, 0x12, 0x26, 0x0a, 0x22, + 0x54, 0x52, 0x49, 0x41, 0x4c, 0x5f, 0x53, 0x4f, 0x55, 0x52, 0x43, 0x45, 0x5f, 0x49, 0x4e, 0x46, + 0x4f, 0x5f, 0x54, 0x59, 0x50, 0x45, 0x5f, 0x46, 0x49, 0x4e, 0x45, 0x5f, 0x54, 0x55, 0x4e, 0x49, + 0x4e, 0x47, 0x10, 0x02, 0x42, 0x37, 0x5a, 0x35, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, + 0x6f, 0x6d, 0x2f, 0x64, 0x65, 0x74, 0x65, 0x72, 0x6d, 0x69, 0x6e, 0x65, 0x64, 0x2d, 0x61, 0x69, + 0x2f, 0x64, 0x65, 0x74, 0x65, 0x72, 0x6d, 0x69, 0x6e, 0x65, 0x64, 0x2f, 0x70, 0x72, 0x6f, 0x74, + 0x6f, 0x2f, 0x70, 0x6b, 0x67, 0x2f, 0x74, 0x72, 0x69, 0x61, 0x6c, 0x76, 0x31, 0x62, 0x06, 0x70, + 0x72, 0x6f, 0x74, 0x6f, 0x33, } var ( diff --git a/proto/src/determined/run/v1/run.proto b/proto/src/determined/run/v1/run.proto index a4198754e87..9ef60d1fa38 100644 --- a/proto/src/determined/run/v1/run.proto +++ b/proto/src/determined/run/v1/run.proto @@ -119,8 +119,8 @@ message FlatRun { bool archived = 21; // Project level local id of run. string local_id = 22; - // Log signal. - optional string log_signal = 23; + // Log policy matched. + optional string log_policy_matched = 23; } // Flat run respresentation. diff --git a/proto/src/determined/trial/v1/trial.proto b/proto/src/determined/trial/v1/trial.proto index d4b4d7928a9..a75b7c458e4 100644 --- a/proto/src/determined/trial/v1/trial.proto +++ b/proto/src/determined/trial/v1/trial.proto @@ -149,8 +149,8 @@ message Trial { // metadata associated with the trial (based off the metadata stored in the // run). optional google.protobuf.Struct metadata = 23; - // The log signals. - optional string log_signal = 24; + // Log Policy Matched. + optional string log_policy_matched = 24; } // TrialProfilerMetricLabels are the labels for a single series, where a series diff --git a/schemas/expconf/v0/experiment.json b/schemas/expconf/v0/experiment.json index e863ff7b1b3..972713548cd 100644 --- a/schemas/expconf/v0/experiment.json +++ b/schemas/expconf/v0/experiment.json @@ -121,24 +121,8 @@ "array", "null" ], - "default": [ - { - "pattern": ".*CUDA out of memory.*", - "actions": [ - { - "signal": "CUDA OOM" - } - ] - }, - { - "pattern": ".*uncorrectable ECC error encountered.*", - "actions": [ - { - "signal": "ECC Error" - } - ] - } - ], + "$comment": "setting default to [] lets the actual default always comes from WithDefaults()", + "default": [], "optionalRef": "http://determined.ai/schemas/expconf/v0/log-policies.json" }, "retention_policy": { diff --git a/schemas/expconf/v0/log-action.json b/schemas/expconf/v0/log-action.json index bb2d06fa3ef..050d4d9ec42 100644 --- a/schemas/expconf/v0/log-action.json +++ b/schemas/expconf/v0/log-action.json @@ -3,18 +3,8 @@ "$id": "http://determined.ai/schemas/expconf/v0/log-action.json", "title": "LogAction", "union": { - "defaultMessage": "expect one of the followings: cancel_retries, exclude_node, or an object with a single \"signal\" field", + "defaultMessage": "expect one of the followings: cancel_retries, exclude_node, or an legacy object where object[\"type\"] is one of 'cancel_retries' or 'exclude_node'", "items": [ - { - "unionKey": "singleproperty:signal", - "properties": { - "signal": { - "type": "string" - } - }, - "type": "object", - "additionalProperties": false - }, { "unionKey": "never", "const": "cancel_retries" @@ -22,6 +12,14 @@ { "unionKey": "never", "const": "exclude_node" + }, + { + "unionKey": "const:type=cancel_retries", + "$ref": "http://determined.ai/schemas/expconf/v0/log-legacy-action-cancel-retries.json" + }, + { + "unionKey": "const:type=exclude_node", + "$ref": "http://determined.ai/schemas/expconf/v0/log-legacy-action-exclude-node.json" } ] } diff --git a/schemas/expconf/v0/log-legacy-action.json b/schemas/expconf/v0/log-legacy-action.json deleted file mode 100644 index c4683a9d69d..00000000000 --- a/schemas/expconf/v0/log-legacy-action.json +++ /dev/null @@ -1,32 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "http://determined.ai/schemas/expconf/v0/log-legacy-action.json", - "title": "LogLegacyAction", - "if": { - "required": [ - "type" - ] - }, - "then": { - "union": { - "defaultMessage": "is not an object where object[\"type\"] is one of 'cancel_retries' or 'exclude_node'", - "items": [ - { - "unionKey": "const:type=cancel_retries", - "$ref": "http://determined.ai/schemas/expconf/v0/log-legacy-action-cancel-retries.json" - }, - { - "unionKey": "const:type=exclude_node", - "$ref": "http://determined.ai/schemas/expconf/v0/log-legacy-action-exclude-node.json" - } - ] - } - }, - "additionalProperties": false, - "eventuallyRequired": [ - "type" - ], - "properties": { - "type": true - } -} diff --git a/schemas/expconf/v0/log-policy.json b/schemas/expconf/v0/log-policy.json index 2ffcab97de9..9de97684fc5 100644 --- a/schemas/expconf/v0/log-policy.json +++ b/schemas/expconf/v0/log-policy.json @@ -5,49 +5,30 @@ "additionalProperties": false, "type": "object", "properties": { - "pattern": { - "$comment": "default doesn't apply here because pattern is a required field", + "name": { "type": [ "string", "null" ], + "$comment": "Comment can be found in type LogPolicyV0", "default": null }, - "actions": { + "pattern": { "type": [ - "array", + "string", "null" ], - "items": { - "$ref": "http://determined.ai/schemas/expconf/v0/log-action.json" - }, + "$comment": "Comment can be found in type LogPolicyV0", "default": null }, "action": { "type": [ + "string", "object", "null" ], - "optionalRef": "http://determined.ai/schemas/expconf/v0/log-legacy-action.json", + "optionalRef": "http://determined.ai/schemas/expconf/v0/log-action.json", "default": null } - }, - "checks": { - "require either \"pattern\" with the depreated \"action\" field or \"pattern\" with the newer \"actions\" field": { - "oneOf": [ - { - "required": [ - "pattern", - "actions" - ] - }, - { - "required": [ - "pattern", - "action" - ] - } - ] - } } } diff --git a/schemas/test_cases/v0/experiment.yaml b/schemas/test_cases/v0/experiment.yaml index 1bb61889c37..48542e28d07 100644 --- a/schemas/test_cases/v0/experiment.yaml +++ b/schemas/test_cases/v0/experiment.yaml @@ -93,13 +93,7 @@ # pre-0.15.6 non-Native-API experiments emitted `internal: null` configs internal: null labels: [] - log_policies: - - pattern: .*uncorrectable ECC error encountered.* - actions: - - signal: ECC Error - - pattern: .*CUDA out of memory.* - actions: - - signal: CUDA OOM + log_policies: [] max_restarts: 5 min_validation_period: batches: 0 @@ -203,12 +197,10 @@ drop_capabilities: [] hyperparameters: {} log_policies: - - pattern: "*" - actions: - - signal: "*" - - pattern: "*" - actions: - - signal: "*" + - name: "*" + pattern: "*" + - name: "*" + pattern: "*" labels: [] max_restarts: 5 min_checkpoint_period: diff --git a/schemas/test_cases/v0/log_policies.yaml b/schemas/test_cases/v0/log_policies.yaml index 9c4169d88a9..57431045ae0 100644 --- a/schemas/test_cases/v0/log_policies.yaml +++ b/schemas/test_cases/v0/log_policies.yaml @@ -1,147 +1,180 @@ +# Log Action Tests +- name: valid log action + sane_as: + - http://determined.ai/schemas/expconf/v0/log-action.json + case: + cancel_retries + +- name: invalid log action + sanity_errors: + http://determined.ai/schemas/expconf/v0/log-action.json: + - "expect one of the followings: cancel_retries, exclude_node, or an legacy object where object.*" + case: + invalid_action + +- name: legacy log action cancel_retries + sane_as: + - http://determined.ai/schemas/expconf/v0/log-legacy-action-cancel-retries.json + case: + type: cancel_retries + +- name: legacy log action exclude_node + sane_as: + - http://determined.ai/schemas/expconf/v0/log-legacy-action-exclude-node.json + case: + type: exclude_node + +- name: invalid legacy log action + sanity_errors: + http://determined.ai/schemas/expconf/v0/log-action.json: + - "expect one of the followings: cancel_retries, exclude_node, or an legacy object where object.*" + case: + type: invalid_action + +# Log Policy Tests - name: valid log policy sane_as: - http://determined.ai/schemas/expconf/v0/log-policy.json case: + name: policy name pattern: a - actions: - - signal: sig - - cancel_retries - - exclude_node + action: cancel_retries -- name: log policy is invalid with both action and actions - sanity_errors: - http://determined.ai/schemas/expconf/v0/log-policy.json: - - "require either \"pattern\" with the depreated \"action\" field or \"pattern\" with the newer \"actions\" field" +- name: valid legacy log policy + sane_as: + - http://determined.ai/schemas/expconf/v0/log-policy.json case: pattern: a - actions: - - signal: sig action: type: cancel_retries -- name: invalid log policy 1 - sanity_errors: - http://determined.ai/schemas/expconf/v0/log-policy.json: - - "expect one of the followings: cancel_retries, exclude_node, or an object with a single \"signal\" field" +- name: valid log policy (after shim applied to a legacy policy) + sane_as: + - http://determined.ai/schemas/expconf/v0/log-policy.json case: pattern: a - actions: - - notallowedstring + action: cancel_retries -- name: invalid log policy 2 - sanity_errors: - http://determined.ai/schemas/expconf/v0/log-policy.json: - - "expect one of the followings: cancel_retries, exclude_node, or an object with a single \"signal\" field" +- name: valid log policy (user wants to override default log policy) + sane_as: + - http://determined.ai/schemas/expconf/v0/log-policy.json case: + name: policy name pattern: a - actions: - - notallowedstring -- name: invalid log policy 3 - sanity_errors: - http://determined.ai/schemas/expconf/v0/log-policy.json: - - "expect one of the followings: cancel_retries, exclude_node, or an object with a single \"signal\" field" +# Log Policies Tests +- name: valid log policies + sane_as: + - http://determined.ai/schemas/expconf/v0/log-policies.json case: - pattern: a - actions: - - 3 + - name: policy name + pattern: a + action: exclude_node -- name: unique actions when merged +- name: policies with different names but same patterns are valid merge_as: http://determined.ai/schemas/expconf/v0/log-policies.json - case: - - pattern: a - actions: - - cancel_retries + case: [] + # Use CheckMerged() to test LogPolicies unmarshal merge_src: - - pattern: a - actions: - - exclude_node + - name: policy name + pattern: a + action: cancel_retries + - pattern: different policy name + pattern: a merged: - - pattern: a - actions: - - cancel_retries - - exclude_node + - pattern: different policy name + pattern: a + - name: policy name + pattern: a + action: cancel_retries -- name: no duplicated actions when merged +- name: merge policies merge_as: http://determined.ai/schemas/expconf/v0/log-policies.json case: - - pattern: a - actions: - - cancel_retries - - exclude_node + - name: policy 1 + pattern: a + action: exclude_node + - name: policy 2 + pattern: b + action: cancel_retries + - name: policy 3 + pattern: c + action: exclude_node merge_src: - - pattern: a - actions: - - exclude_node + - name: policy 1 + pattern: aa + action: exclude_node + - name: policy 4 + pattern: b + action: cancel_retries + - name: policy 3 + pattern: c + action: cancel_retries merged: - - pattern: a - actions: - - cancel_retries - - exclude_node + - name: policy 1 + pattern: a + action: exclude_node + - name: policy 2 + pattern: b + action: cancel_retries + - name: policy 3 + pattern: c + action: exclude_node + - name: policy 4 + pattern: b + action: cancel_retries + -- name: no duplicated actions when merged 2 +- name: merge policies with different names but same patterns, and policies without names merge_as: http://determined.ai/schemas/expconf/v0/log-policies.json case: - - pattern: a - actions: - - exclude_node - - signal: sig1 + - name: policy 1 + pattern: a + action: exclude_node + - pattern: b + action: exclude_node merge_src: - - pattern: a - actions: - - cancel_retries - - exclude_node - - signal: sig2 + - name: policy 2 + pattern: a + action: cancel_retries + - pattern: b + action: cancel_retries merged: - - pattern: a - actions: - - cancel_retries - - exclude_node - - signal: sig1 + - pattern: b + action: cancel_retries + - pattern: b + action: exclude_node + - name: policy 1 + pattern: a + action: exclude_node + - name: policy 2 + pattern: a + action: cancel_retries -- name: user can override default values +- name: default values sane_as: - http://determined.ai/schemas/expconf/v0/log-policies.json default_as: http://determined.ai/schemas/expconf/v0/log-policies.json case: [] - defaulted: [] + defaulted: + - name: CUDA OOM + pattern: ".*CUDA out of memory.*" + - name: ECC Error + pattern: ".*uncorrectable ECC error encountered.*" - -- name: log action cancel_retries - sane_as: - - http://determined.ai/schemas/expconf/v0/log-legacy-action-cancel-retries.json - case: - type: cancel_retries - -- name: log action exclude_node +- name: user can override default values sane_as: - - http://determined.ai/schemas/expconf/v0/log-legacy-action-exclude-node.json + - http://determined.ai/schemas/expconf/v0/log-policies.json + default_as: + http://determined.ai/schemas/expconf/v0/log-policies.json case: - type: exclude_node + - name: CUDA OOM + - name: ECC Error -- name: recognize action field for backward compatibility - merge_as: http://determined.ai/schemas/expconf/v0/log-policies.json - case: [] - # merge_src is legacy log policies, merged is modern log policies - merge_src: - - pattern: a - action: - type: cancel_retries - - pattern: a - action: - type: exclude_node - - pattern: b - action: - type: exclude_node - merged: - - pattern: a - actions: - - cancel_retries - - exclude_node - - pattern: b - actions: - - exclude_node + defaulted: + - name: CUDA OOM + - name: ECC Error - name: legacy log policies merging merge_as: http://determined.ai/schemas/expconf/v0/log-policies.json @@ -164,12 +197,11 @@ type: exclude_node merged: - pattern: a - actions: - - cancel_retries + action: cancel_retries - pattern: b - actions: - - cancel_retries - - exclude_node + action: cancel_retries + - pattern: b + action: exclude_node - pattern: c - actions: - - exclude_node + action: exclude_node + diff --git a/webui/react/src/services/api-ts-sdk/api.ts b/webui/react/src/services/api-ts-sdk/api.ts index 16d99d09b3f..ff48702c2d8 100644 --- a/webui/react/src/services/api-ts-sdk/api.ts +++ b/webui/react/src/services/api-ts-sdk/api.ts @@ -934,11 +934,11 @@ export interface Trialv1Trial { */ metadata?: any; /** - * The log signals. + * Log Policy Matched. * @type {string} * @memberof Trialv1Trial */ - logSignal?: string; + logPolicyMatched?: string; } /** * @@ -3744,11 +3744,11 @@ export interface V1FlatRun { */ localId?: string; /** - * Log signal. + * Log policy matched. * @type {string} * @memberof V1FlatRun */ - logSignal?: string; + logPolicyMatched?: string; } /** * From 2ac7fdc2d4de0577d9823c6c842af231795a2779 Mon Sep 17 00:00:00 2001 From: Jerry Gong Date: Wed, 23 Oct 2024 15:42:21 -0400 Subject: [PATCH 22/27] Address comments --- e2e_tests/tests/cluster/test_log_policies.py | 1 - master/internal/api_experiment.go | 2 +- .../task_config_policy_intg_test.go | 10 +- .../pkg/model/task_container_defaults_test.go | 2 +- .../pkg/schemas/expconf/log_pattern_config.go | 136 +++++------------- master/pkg/schemas/zgen_schemas.go | 2 +- ...023105802_add-log-policy-matched.tx.up.sql | 2 +- schemas/expconf/v0/log-action.json | 2 +- schemas/test_cases/v0/log_policies.yaml | 20 +-- webui/react/src/pages/FlatRuns/columns.ts | 2 +- .../TrialDetails/Header/TrialHeaderLeft.tsx | 10 +- webui/react/src/services/decoder.ts | 2 +- webui/react/src/types.ts | 4 +- 13 files changed, 61 insertions(+), 134 deletions(-) diff --git a/e2e_tests/tests/cluster/test_log_policies.py b/e2e_tests/tests/cluster/test_log_policies.py index a9921bad969..aeeaa84f40e 100644 --- a/e2e_tests/tests/cluster/test_log_policies.py +++ b/e2e_tests/tests/cluster/test_log_policies.py @@ -166,7 +166,6 @@ def test_log_policy_matched(should_match: bool) -> None: expected_policy = "Test" config = { "log_policies": [{"name": expected_policy, "pattern": regex}], - "max_restarts": 1, } exp_ref = noop.create_experiment(sess, [noop.Exit(7)], config=config) diff --git a/master/internal/api_experiment.go b/master/internal/api_experiment.go index 9fe3daa6c81..ffd375c48ef 100644 --- a/master/internal/api_experiment.go +++ b/master/internal/api_experiment.go @@ -2741,7 +2741,7 @@ func (a *apiServer) SearchExperiments( Column("searcher_metric_value"). Column("trials.external_trial_id"). ColumnExpr("nullif(trials.metadata, 'null') as metadata"). - ColumnExpr("NULL as log_signal"). + ColumnExpr("NULL as log_policy_matched"). Join("LEFT JOIN validations bv ON trials.best_validation_id = bv.id"). Join("LEFT JOIN validations lv ON trials.latest_validation_id = lv.id"). Join("LEFT JOIN checkpoints_v2 new_ckpt ON new_ckpt.id = trials.warm_start_checkpoint_id"). diff --git a/master/internal/configpolicy/task_config_policy_intg_test.go b/master/internal/configpolicy/task_config_policy_intg_test.go index faf063cf0f0..5ec1e291317 100644 --- a/master/internal/configpolicy/task_config_policy_intg_test.go +++ b/master/internal/configpolicy/task_config_policy_intg_test.go @@ -471,11 +471,11 @@ func TestMergeWithInvariantExperimentConfigs(t *testing.T) { "propagation": "cluster-wide" } ], - "log_policies": [ - { - "pattern": "nonrepeat" - } - ] + "log_policies": [ + { + "pattern": "nonrepeat" + } + ] }` var defaultInvariantConfig expconf.ExperimentConfigV0 diff --git a/master/pkg/model/task_container_defaults_test.go b/master/pkg/model/task_container_defaults_test.go index 28d0e8ec915..2edd9d0a4d4 100644 --- a/master/pkg/model/task_container_defaults_test.go +++ b/master/pkg/model/task_container_defaults_test.go @@ -471,7 +471,7 @@ func TestLogPatternUnmarshal(t *testing.T) { {"name": "policy name", "pattern": "b", "action": "exclude_node"} ] }`)), &tcd) - require.ErrorContains(t, err, "log_policies have duplicated names \"policy name\" but different patterns") + require.ErrorContains(t, err, "log_policies have duplicated names \"policy name\"") } func TestPodSpecsDefaultMerging(t *testing.T) { diff --git a/master/pkg/schemas/expconf/log_pattern_config.go b/master/pkg/schemas/expconf/log_pattern_config.go index 3d05e9282bd..66f137a1ca5 100644 --- a/master/pkg/schemas/expconf/log_pattern_config.go +++ b/master/pkg/schemas/expconf/log_pattern_config.go @@ -3,7 +3,6 @@ package expconf import ( "encoding/json" "fmt" - "sort" log "github.com/sirupsen/logrus" @@ -47,66 +46,52 @@ func (b LogPoliciesConfigV0) WithDefaults() LogPoliciesConfigV0 { func (b LogPoliciesConfigV0) Merge( src LogPoliciesConfigV0, ) LogPoliciesConfigV0 { - // src is nil and b is an empty slice. - if src == nil && b != nil && len(b) == 0 { - return make(LogPoliciesConfigV0, 0) + if src == nil && b == nil { + return nil } - var out LogPoliciesConfigV0 - var policiesWoName LogPoliciesConfigV0 - seenPolicies := set.New[string]() - nameToLp := make(map[string]LogPolicyV0) - for _, lp := range src { + // Keep everything in b unconditionally. + out := append(LogPoliciesConfigV0{}, b...) + + names := set.New[string]() + unnamedPolicies := set.New[string]() + for _, lp := range b { if lp.RawName == nil { - policiesWoName.appendLegacyPolicies(lp, seenPolicies) - continue + json, err := json.Marshal(lp) + if err != nil { + log.Errorf("marshaling error %+v %v", lp, err) + } + unnamedPolicies.Insert(string(json)) + } else { + names.Insert(*lp.RawName) } - nameToLp[*lp.RawName] = lp } - for _, otherLp := range b { - if otherLp.RawName == nil { - policiesWoName.appendLegacyPolicies(otherLp, seenPolicies) - continue - } - name := *otherLp.RawName - if srcLp, ok := nameToLp[name]; ok { - // Merge two LogPolicies if they have the same name. - nameToLp[name] = schemas.Merge(otherLp, srcLp) - } else { - nameToLp[name] = otherLp + // Add policies in src that don't exist in b. + for _, lp := range src { + if lp.RawName == nil { + json, err := json.Marshal(lp) + if err != nil { + log.Errorf("marshaling error %+v %v", lp, err) + } + if !unnamedPolicies.Contains(string(json)) { + out = append(out, lp) + unnamedPolicies.Insert(string(json)) + } + } else if !names.Contains(*lp.RawName) { + out = append(out, lp) + names.Insert(*lp.RawName) } } - for _, lp := range nameToLp { - out = append(out, lp) - } - out = append(out, policiesWoName...) - out.sort() return out } -// Only keep unique legacy policies. -func (b *LogPoliciesConfigV0) appendLegacyPolicies(policy LogPolicyV0, seenPolicies set.Set[string]) { - // policy without name is legacy policy. - if policy.RawName == nil { - json, err := json.Marshal(policy) - if err != nil { - log.Errorf("marshaling error %+v %v", policy, err) - } - if seenPolicies.Contains(string(json)) { - return - } - seenPolicies.Insert(string(json)) - *b = append(*b, policy) - } -} - // UnmarshalJSON implements the json.Unmarshaler interface. // Special cases: // 1. User may submit policies that have different names but same patterns. We keep both of them, but which name // will be shown in the UI is undefined. -// 2. User can't submit policies that have the same names but different patterns. +// 2. User can't submit policies that have the same names. func (b *LogPoliciesConfigV0) UnmarshalJSON(data []byte) error { type DefaultParser LogPoliciesConfigV0 var jsonItems DefaultParser @@ -123,61 +108,15 @@ func (b *LogPoliciesConfigV0) UnmarshalJSON(data []byte) error { } if names.Contains(*n) { - return fmt.Errorf("log_policies have duplicated names %q but different patterns", *n) + return fmt.Errorf("log_policies have duplicated names %q", *n) } names.Insert(*n) } - out := LogPoliciesConfig(jsonItems) - out.sort() - *b = out - + *b = LogPoliciesConfig(jsonItems) return nil } -// Sort LogPoliciesConfigV0 so the output is in deterministic state. Testing will be easier. -func (b LogPoliciesConfigV0) sort() { - sort.Slice(b, func(i, j int) bool { - // Sort policies by name first. - var iName string - var jName string - if b[i].RawName != nil { - iName = *b[i].RawName - } - if b[j].RawName != nil { - jName = *b[j].RawName - } - - // Sort policies by pattern if their names are the same. - if iName == jName { - var iPattern string - var jPattern string - if b[i].RawPattern != nil { - iPattern = *b[i].RawPattern - } - if b[j].RawPattern != nil { - jPattern = *b[j].RawPattern - } - // Sort by action if patterns and names are the same. - if iPattern == jPattern { - var iAction string - var jAction string - if b[i].RawAction != nil { - iAction = string(b[i].RawAction.Type) - } - if b[j].RawAction != nil { - jAction = string(b[j].RawAction.Type) - } - return iAction < jAction - } - - return iPattern < jPattern - } - - return iName < jName - }) -} - // LogPolicyV0 is an action to take if we match against trial logs. // //go:generate ../gen.sh @@ -189,17 +128,6 @@ type LogPolicyV0 struct { RawAction *LogActionV0 `json:"action,omitempty"` } -// Merge LogPolicyV0. -func (l LogPolicyV0) Merge(src LogPolicyV0) LogPolicyV0 { - // Merging only applies to the LogActionV0 with the same name. - if src.RawName == nil || l.RawName == nil || *src.RawName != *l.RawName { - log.Errorf("the names of %+v and %+v are not the same", l, src) - return src - } - - return l -} - // LogActionType is the type of an action. type LogActionType string diff --git a/master/pkg/schemas/zgen_schemas.go b/master/pkg/schemas/zgen_schemas.go index a467a7d320f..e57a1d770c2 100644 --- a/master/pkg/schemas/zgen_schemas.go +++ b/master/pkg/schemas/zgen_schemas.go @@ -1500,7 +1500,7 @@ var ( "$id": "http://determined.ai/schemas/expconf/v0/log-action.json", "title": "LogAction", "union": { - "defaultMessage": "expect one of the followings: cancel_retries, exclude_node, or an legacy object where object[\"type\"] is one of 'cancel_retries' or 'exclude_node'", + "defaultMessage": "must be one of \"cancel_retries\" or \"exclude_node\"", "items": [ { "unionKey": "never", diff --git a/master/static/migrations/20241023105802_add-log-policy-matched.tx.up.sql b/master/static/migrations/20241023105802_add-log-policy-matched.tx.up.sql index 3899b4291fb..c265be3293c 100644 --- a/master/static/migrations/20241023105802_add-log-policy-matched.tx.up.sql +++ b/master/static/migrations/20241023105802_add-log-policy-matched.tx.up.sql @@ -2,4 +2,4 @@ ALTER table public.tasks ADD COLUMN log_policy_matched text; ALTER TABLE public.runs - RENAME COLUMN log_signal TO log_policy_matched; \ No newline at end of file + RENAME COLUMN log_signal TO log_policy_matched; diff --git a/schemas/expconf/v0/log-action.json b/schemas/expconf/v0/log-action.json index 050d4d9ec42..193ac032a02 100644 --- a/schemas/expconf/v0/log-action.json +++ b/schemas/expconf/v0/log-action.json @@ -3,7 +3,7 @@ "$id": "http://determined.ai/schemas/expconf/v0/log-action.json", "title": "LogAction", "union": { - "defaultMessage": "expect one of the followings: cancel_retries, exclude_node, or an legacy object where object[\"type\"] is one of 'cancel_retries' or 'exclude_node'", + "defaultMessage": "must be one of \"cancel_retries\" or \"exclude_node\"", "items": [ { "unionKey": "never", diff --git a/schemas/test_cases/v0/log_policies.yaml b/schemas/test_cases/v0/log_policies.yaml index 57431045ae0..6edee4f0604 100644 --- a/schemas/test_cases/v0/log_policies.yaml +++ b/schemas/test_cases/v0/log_policies.yaml @@ -8,7 +8,7 @@ - name: invalid log action sanity_errors: http://determined.ai/schemas/expconf/v0/log-action.json: - - "expect one of the followings: cancel_retries, exclude_node, or an legacy object where object.*" + - "must be one of \"cancel_retries\" or \"exclude_node\"" case: invalid_action @@ -27,7 +27,7 @@ - name: invalid legacy log action sanity_errors: http://determined.ai/schemas/expconf/v0/log-action.json: - - "expect one of the followings: cancel_retries, exclude_node, or an legacy object where object.*" + - "must be one of \"cancel_retries\" or \"exclude_node\"" case: type: invalid_action @@ -82,11 +82,11 @@ - pattern: different policy name pattern: a merged: - - pattern: different policy name - pattern: a - name: policy name pattern: a action: cancel_retries + - pattern: different policy name + pattern: a - name: merge policies merge_as: http://determined.ai/schemas/expconf/v0/log-policies.json @@ -140,16 +140,16 @@ - pattern: b action: cancel_retries merged: - - pattern: b - action: cancel_retries - - pattern: b - action: exclude_node - name: policy 1 pattern: a action: exclude_node + - pattern: b + action: exclude_node - name: policy 2 pattern: a action: cancel_retries + - pattern: b + action: cancel_retries - name: default values sane_as: @@ -196,12 +196,12 @@ action: type: exclude_node merged: - - pattern: a - action: cancel_retries - pattern: b action: cancel_retries - pattern: b action: exclude_node - pattern: c action: exclude_node + - pattern: a + action: cancel_retries diff --git a/webui/react/src/pages/FlatRuns/columns.ts b/webui/react/src/pages/FlatRuns/columns.ts index 99fe1247515..f1a84d48dc6 100644 --- a/webui/react/src/pages/FlatRuns/columns.ts +++ b/webui/react/src/pages/FlatRuns/columns.ts @@ -441,7 +441,7 @@ export const getColumnDefs = ({ data: { appTheme, kind: STATE_CELL, - label: record.logSignal, + label: record.logPolicyMatched, state: getCellStateFromExperimentState(record.state), }, kind: GridCellKind.Custom, diff --git a/webui/react/src/pages/TrialDetails/Header/TrialHeaderLeft.tsx b/webui/react/src/pages/TrialDetails/Header/TrialHeaderLeft.tsx index 5871c92a460..4c4b9116f1b 100644 --- a/webui/react/src/pages/TrialDetails/Header/TrialHeaderLeft.tsx +++ b/webui/react/src/pages/TrialDetails/Header/TrialHeaderLeft.tsx @@ -34,14 +34,14 @@ const TrialHeaderLeft: React.FC = ({ experiment, trial }: Props) => {
{f_flat_runs ? 'Run' : 'Trial'} {trial.id}
- {trial.logSignal && - (trial.logSignal.length < labelMaxLength ? ( - + {trial.logPolicyMatched && + (trial.logPolicyMatched.length < labelMaxLength ? ( + ) : ( - + ))} diff --git a/webui/react/src/services/decoder.ts b/webui/react/src/services/decoder.ts index 7c8ec5554a4..524eba0f6d9 100644 --- a/webui/react/src/services/decoder.ts +++ b/webui/react/src/services/decoder.ts @@ -681,7 +681,7 @@ export const decodeV1TrialToTrialItem = (data: Sdk.Trialv1Trial): types.TrialIte id: data.id, latestValidationMetric: data.latestValidation && decodeMetricsWorkload(data.latestValidation), logRetentionDays: data.logRetentionDays, - logSignal: data.logSignal, + logPolicyMatched: data.logPolicyMatched, metadata: data.metadata, searcherMetricsVal: data.searcherMetricValue, startTime: data.startTime as unknown as string, diff --git a/webui/react/src/types.ts b/webui/react/src/types.ts index bd8a64e84a9..5ef5b089d0b 100644 --- a/webui/react/src/types.ts +++ b/webui/react/src/types.ts @@ -710,7 +710,7 @@ export interface TrialItem extends StartEndTimes { logRetentionDays?: number; taskId?: string; metadata?: JsonObject; - logSignal?: string; + logPolicyMatched?: string; } export interface TrialDetails extends TrialItem { @@ -1298,7 +1298,7 @@ export interface FlatRun { archived: boolean; parentArchived: boolean; experiment?: FlatRunExperiment; - logSignal?: string; + logPolicyMatched?: string; } export interface FlatRunExperiment { From 65fc50faef04d6028b3d96b76a58b0e55ea1ce83 Mon Sep 17 00:00:00 2001 From: Jerry Gong Date: Wed, 23 Oct 2024 18:40:15 -0400 Subject: [PATCH 23/27] Address more comments --- e2e_tests/tests/cluster/test_log_policies.py | 2 +- .../postgres_task_config_policy.go | 7 +----- .../task_config_policy_intg_test.go | 5 ---- .../pkg/schemas/expconf/log_pattern_config.go | 20 ++++++---------- master/pkg/schemas/zgen_schemas.go | 23 +++++++++++++++++-- schemas/expconf/v0/log-policy.json | 23 +++++++++++++++++-- schemas/test_cases/v0/log_policies.yaml | 10 ++++++-- webui/react/src/services/decoder.ts | 2 +- 8 files changed, 60 insertions(+), 32 deletions(-) diff --git a/e2e_tests/tests/cluster/test_log_policies.py b/e2e_tests/tests/cluster/test_log_policies.py index aeeaa84f40e..a3abe1087cc 100644 --- a/e2e_tests/tests/cluster/test_log_policies.py +++ b/e2e_tests/tests/cluster/test_log_policies.py @@ -2,7 +2,7 @@ from determined.common.api import bindings from determined.experimental import client -from tests import api_utils, detproc +from tests import api_utils from tests import experiment as exp from tests.cluster import utils from tests.experiment import noop diff --git a/master/internal/configpolicy/postgres_task_config_policy.go b/master/internal/configpolicy/postgres_task_config_policy.go index dea01bf7e42..fcc78c75864 100644 --- a/master/internal/configpolicy/postgres_task_config_policy.go +++ b/master/internal/configpolicy/postgres_task_config_policy.go @@ -19,12 +19,7 @@ const ( // DefaultInvariantConfigStr is the default invariant config val used for tests. DefaultInvariantConfigStr = `{ "description": "random description", - "resources": {"slots": 4, "max_slots": 8}, - "log_policies": [ - { - "pattern": "nonrepeat" - } - ] + "resources": {"slots": 4, "max_slots": 8} }` // DefaultConstraintsStr is the default constraints val used for tests. DefaultConstraintsStr = `{"priority_limit": 10, "resources": {"max_slots": 8}}` diff --git a/master/internal/configpolicy/task_config_policy_intg_test.go b/master/internal/configpolicy/task_config_policy_intg_test.go index 5ec1e291317..f64de69719f 100644 --- a/master/internal/configpolicy/task_config_policy_intg_test.go +++ b/master/internal/configpolicy/task_config_policy_intg_test.go @@ -470,11 +470,6 @@ func TestMergeWithInvariantExperimentConfigs(t *testing.T) { "read_only": true, "propagation": "cluster-wide" } - ], - "log_policies": [ - { - "pattern": "nonrepeat" - } ] }` diff --git a/master/pkg/schemas/expconf/log_pattern_config.go b/master/pkg/schemas/expconf/log_pattern_config.go index 66f137a1ca5..387b325b05d 100644 --- a/master/pkg/schemas/expconf/log_pattern_config.go +++ b/master/pkg/schemas/expconf/log_pattern_config.go @@ -4,8 +4,6 @@ import ( "encoding/json" "fmt" - log "github.com/sirupsen/logrus" - "github.com/pkg/errors" "github.com/determined-ai/determined/master/pkg/ptrs" @@ -57,11 +55,10 @@ func (b LogPoliciesConfigV0) Merge( unnamedPolicies := set.New[string]() for _, lp := range b { if lp.RawName == nil { - json, err := json.Marshal(lp) - if err != nil { - log.Errorf("marshaling error %+v %v", lp, err) - } - unnamedPolicies.Insert(string(json)) + // Not checking nil because we've enforced action and pattern must be set for the legacy policy + // in the json schema. + s := fmt.Sprintf("%v:%v", *lp.RawAction, *lp.RawPattern) + unnamedPolicies.Insert(s) } else { names.Insert(*lp.RawName) } @@ -70,13 +67,10 @@ func (b LogPoliciesConfigV0) Merge( // Add policies in src that don't exist in b. for _, lp := range src { if lp.RawName == nil { - json, err := json.Marshal(lp) - if err != nil { - log.Errorf("marshaling error %+v %v", lp, err) - } - if !unnamedPolicies.Contains(string(json)) { + s := fmt.Sprintf("%v:%v", *lp.RawAction, *lp.RawPattern) + if !unnamedPolicies.Contains(s) { out = append(out, lp) - unnamedPolicies.Insert(string(json)) + unnamedPolicies.Insert(s) } } else if !names.Contains(*lp.RawName) { out = append(out, lp) diff --git a/master/pkg/schemas/zgen_schemas.go b/master/pkg/schemas/zgen_schemas.go index e57a1d770c2..b887031a82a 100644 --- a/master/pkg/schemas/zgen_schemas.go +++ b/master/pkg/schemas/zgen_schemas.go @@ -1578,7 +1578,7 @@ var ( "string", "null" ], - "$comment": "Comment can be found in type LogPolicyV0", + "$comment": "Legacy log policy doesn't have a name. Legacy log policy will be deprecated.", "default": null }, "pattern": { @@ -1586,7 +1586,7 @@ var ( "string", "null" ], - "$comment": "Comment can be found in type LogPolicyV0", + "$comment": "Pattern can be null. So user can override it to disable the default log polices.", "default": null }, "action": { @@ -1598,6 +1598,25 @@ var ( "optionalRef": "http://determined.ai/schemas/expconf/v0/log-action.json", "default": null } + }, + "checks": { + "\"name\" must be set, and \"pattern\" is also required unless you intend to disable an existing policy": { + "anyOf": [ + { + "required": [ + "name" + ], + "$comment": "modern policy requirement" + }, + { + "required": [ + "pattern", + "action" + ], + "$comment": "legacy policy requirements" + } + ] + } } } `) diff --git a/schemas/expconf/v0/log-policy.json b/schemas/expconf/v0/log-policy.json index 9de97684fc5..687b8d6e67c 100644 --- a/schemas/expconf/v0/log-policy.json +++ b/schemas/expconf/v0/log-policy.json @@ -10,7 +10,7 @@ "string", "null" ], - "$comment": "Comment can be found in type LogPolicyV0", + "$comment": "Legacy log policy doesn't have a name. Legacy log policy will be deprecated.", "default": null }, "pattern": { @@ -18,7 +18,7 @@ "string", "null" ], - "$comment": "Comment can be found in type LogPolicyV0", + "$comment": "Pattern can be null. So user can override it to disable the default log polices.", "default": null }, "action": { @@ -30,5 +30,24 @@ "optionalRef": "http://determined.ai/schemas/expconf/v0/log-action.json", "default": null } + }, + "checks": { + "\"name\" must be set, and \"pattern\" is also required unless you intend to disable an existing policy": { + "anyOf": [ + { + "required": [ + "name" + ], + "$comment": "modern policy requirement" + }, + { + "required": [ + "pattern", + "action" + ], + "$comment": "legacy policy requirements" + } + ] + } } } diff --git a/schemas/test_cases/v0/log_policies.yaml b/schemas/test_cases/v0/log_policies.yaml index 6edee4f0604..d12be1a88c4 100644 --- a/schemas/test_cases/v0/log_policies.yaml +++ b/schemas/test_cases/v0/log_policies.yaml @@ -62,6 +62,12 @@ name: policy name pattern: a +- name: invalid log policy + sanity_errors: + http://determined.ai/schemas/expconf/v0/log-policy.json: + - "\"name\" must be set" + case: {} + # Log Policies Tests - name: valid log policies sane_as: @@ -79,13 +85,13 @@ - name: policy name pattern: a action: cancel_retries - - pattern: different policy name + - name: different policy name pattern: a merged: - name: policy name pattern: a action: cancel_retries - - pattern: different policy name + - name: different policy name pattern: a - name: merge policies diff --git a/webui/react/src/services/decoder.ts b/webui/react/src/services/decoder.ts index 524eba0f6d9..70d7417c5de 100644 --- a/webui/react/src/services/decoder.ts +++ b/webui/react/src/services/decoder.ts @@ -680,8 +680,8 @@ export const decodeV1TrialToTrialItem = (data: Sdk.Trialv1Trial): types.TrialIte hyperparameters: flattenObject(data.hparams || {}), id: data.id, latestValidationMetric: data.latestValidation && decodeMetricsWorkload(data.latestValidation), - logRetentionDays: data.logRetentionDays, logPolicyMatched: data.logPolicyMatched, + logRetentionDays: data.logRetentionDays, metadata: data.metadata, searcherMetricsVal: data.searcherMetricValue, startTime: data.startTime as unknown as string, From f08317d4233005b9724e12327a4e5f412678ba5e Mon Sep 17 00:00:00 2001 From: Jerry Gong Date: Wed, 23 Oct 2024 19:36:43 -0400 Subject: [PATCH 24/27] fix failed tests --- .../internal/configpolicy/task_config_policy_intg_test.go | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/master/internal/configpolicy/task_config_policy_intg_test.go b/master/internal/configpolicy/task_config_policy_intg_test.go index f64de69719f..af68caca923 100644 --- a/master/internal/configpolicy/task_config_policy_intg_test.go +++ b/master/internal/configpolicy/task_config_policy_intg_test.go @@ -667,6 +667,7 @@ func testMergeSlicesAndMaps(t *testing.T) { }, "log_policies": [ { + "name": "nonrepeat policy", "pattern": "nonrepeat" } ] @@ -706,6 +707,7 @@ func testMergeSlicesAndMaps(t *testing.T) { }, "log_policies": [ { + "name": "repeat policy", "pattern": "repeat" } ] @@ -773,9 +775,11 @@ func testMergeSlicesAndMaps(t *testing.T) { }, "log_policies": [ { + "name": "nonrepeat policy", "pattern": "nonrepeat" }, { + "name": "repeat policy", "pattern": "repeat" } ] @@ -815,6 +819,7 @@ func testMergeSlicesAndMaps(t *testing.T) { }, "log_policies": [ { + "name": "global repeat policy", "pattern": "gloablrepeat" } ] @@ -898,12 +903,15 @@ func testMergeSlicesAndMaps(t *testing.T) { }, "log_policies": [ { + "name": "nonrepeat policy", "pattern": "nonrepeat" }, { + "name": "repeat policy", "pattern": "repeat" }, { + "name": "global repeat policy", "pattern": "gloablrepeat" } ] From 77de8c94f4f438879f1c1171faf635bff1dd53ce Mon Sep 17 00:00:00 2001 From: Jerry Gong Date: Thu, 24 Oct 2024 16:47:27 -0400 Subject: [PATCH 25/27] Address more comments --- master/internal/logpattern/logpattern.go | 56 +++++++++---------- master/pkg/model/experiment.go | 1 - .../pkg/schemas/expconf/log_pattern_config.go | 2 + 3 files changed, 30 insertions(+), 29 deletions(-) diff --git a/master/internal/logpattern/logpattern.go b/master/internal/logpattern/logpattern.go index 45f06a2eb92..3eaf7a5b006 100644 --- a/master/internal/logpattern/logpattern.go +++ b/master/internal/logpattern/logpattern.go @@ -80,28 +80,28 @@ func (l *LogPatternPolicies) monitor(ctx context.Context, } if compiledRegex.MatchString(log.Log) { - if policy.Action() != nil { - switch policy.Action().Type { - case expconf.LogActionTypeCancelRetries: - if err := addDontRetry( - ctx, model.TaskID(log.TaskID), *log.AgentID, *pattern, log.Log, - ); err != nil { - return fmt.Errorf("adding don't retry: %w", err) + err = db.Bun().RunInTx(ctx, nil, func(ctx context.Context, tx bun.Tx) error { + if policy.Action() != nil { + switch policy.Action().Type { + case expconf.LogActionTypeCancelRetries: + if err := addDontRetry( + ctx, model.TaskID(log.TaskID), *log.AgentID, *pattern, log.Log, tx, + ); err != nil { + return fmt.Errorf("adding don't retry: %w", err) + } + + case expconf.LogActionTypeExcludeNode: + if err := addRetryOnDifferentNode( + ctx, model.TaskID(log.TaskID), *log.AgentID, *pattern, log.Log, tx, + ); err != nil { + return fmt.Errorf("adding retry on different node: %w", err) + } + default: + return fmt.Errorf("unrecognized log pattern policy type") } - - case expconf.LogActionTypeExcludeNode: - if err := addRetryOnDifferentNode( - ctx, model.TaskID(log.TaskID), *log.AgentID, *pattern, log.Log, - ); err != nil { - return fmt.Errorf("adding retry on different node: %w", err) - } - default: - return fmt.Errorf("unrecognized log pattern policy type") } - } - if policy.Name() != nil { - err = db.Bun().RunInTx(ctx, nil, func(ctx context.Context, tx bun.Tx) error { + if policy.Name() != nil { if _, err := tx.NewUpdate().Model(&model.Task{}). Set("log_policy_matched = ?", *policy.Name()). Where("task_id = ?", log.TaskID). @@ -116,12 +116,12 @@ func (l *LogPatternPolicies) monitor(ctx context.Context, Exec(ctx); err != nil { return fmt.Errorf("updating log signal of task %s: %w", log.TaskID, err) } - - return nil - }) - if err != nil { - return fmt.Errorf("updating log_policy_matched: %w", err) } + + return nil + }) + if err != nil { + return fmt.Errorf("\"%s\" matches pattern \"%s\" but failed to update db: %w", log.Log, *pattern, err) } } } @@ -159,7 +159,7 @@ func GetBlockedNodes(ctx context.Context, taskID model.TaskID) ([]string, error) } func addRetryOnDifferentNode( - ctx context.Context, taskID model.TaskID, nodeName, regex, triggeringLog string, + ctx context.Context, taskID model.TaskID, nodeName, regex, triggeringLog string, tx bun.Tx, ) error { m := &retryOnDifferentNode{ TaskID: taskID, @@ -167,7 +167,7 @@ func addRetryOnDifferentNode( Regex: regex, TriggeringLog: triggeringLog, } - res, err := db.Bun().NewInsert().Model(m). + res, err := tx.NewInsert().Model(m). On("CONFLICT (task_id, node_name, regex) DO NOTHING"). // Only care about the first log. Exec(ctx) if err != nil { @@ -202,7 +202,7 @@ type dontRetry struct { } func addDontRetry( - ctx context.Context, taskID model.TaskID, nodeName, regex, triggeringLog string, + ctx context.Context, taskID model.TaskID, nodeName, regex, triggeringLog string, tx bun.Tx, ) error { m := &dontRetry{ TaskID: taskID, @@ -210,7 +210,7 @@ func addDontRetry( Regex: regex, TriggeringLog: triggeringLog, } - if _, err := db.Bun().NewInsert().Model(m). + if _, err := tx.NewInsert().Model(m). On("CONFLICT (task_id, regex) DO NOTHING"). // Only care about the first log. Exec(ctx); err != nil { return fmt.Errorf("adding don't retry policy %+v: %w", m, err) diff --git a/master/pkg/model/experiment.go b/master/pkg/model/experiment.go index 85b9d9ae89d..dc88fa1d8ce 100644 --- a/master/pkg/model/experiment.go +++ b/master/pkg/model/experiment.go @@ -522,7 +522,6 @@ type Run struct { LogRetentionDays *int16 `db:"log_retention_days"` Metadata map[string]any `db:"metadata" bun:"metadata,scanonly"` LocalID int `db:"local_id"` - LogPolicyMatched *string `db:"log_policy_matched"` } // RunTaskID represents a row from the `run_id_task_id` table. diff --git a/master/pkg/schemas/expconf/log_pattern_config.go b/master/pkg/schemas/expconf/log_pattern_config.go index 387b325b05d..e6cd8f495e2 100644 --- a/master/pkg/schemas/expconf/log_pattern_config.go +++ b/master/pkg/schemas/expconf/log_pattern_config.go @@ -171,6 +171,8 @@ func (s *LogActionV0) UnmarshalJSON(data []byte) error { case LogActionTypeCancelRetries, LogActionTypeExcludeNode: *s = LogActionV0{Type: lat} return nil + default: + return fmt.Errorf("unrecognized action type: %s, data: %q", lat, string(data)) } } From 193faa076504ff85b39646badb24fc472dcf210e Mon Sep 17 00:00:00 2001 From: Jerry Gong Date: Thu, 24 Oct 2024 16:50:16 -0400 Subject: [PATCH 26/27] move db migration file to top --- ....tx.up.sql => 20241024164947_add-log-policy-matched.tx.up.sql} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename master/static/migrations/{20241023105802_add-log-policy-matched.tx.up.sql => 20241024164947_add-log-policy-matched.tx.up.sql} (100%) diff --git a/master/static/migrations/20241023105802_add-log-policy-matched.tx.up.sql b/master/static/migrations/20241024164947_add-log-policy-matched.tx.up.sql similarity index 100% rename from master/static/migrations/20241023105802_add-log-policy-matched.tx.up.sql rename to master/static/migrations/20241024164947_add-log-policy-matched.tx.up.sql From 57031d08afe3d4ef497523ceb09e12935e66eb8f Mon Sep 17 00:00:00 2001 From: Jerry Gong Date: Thu, 24 Oct 2024 17:08:02 -0400 Subject: [PATCH 27/27] fix failed CI tests --- .../logpattern/logpattern_intg_test.go | 27 ++++++++++++------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/master/internal/logpattern/logpattern_intg_test.go b/master/internal/logpattern/logpattern_intg_test.go index 48dd5c6d678..f1181f97a49 100644 --- a/master/internal/logpattern/logpattern_intg_test.go +++ b/master/internal/logpattern/logpattern_intg_test.go @@ -10,6 +10,7 @@ import ( "testing" "github.com/stretchr/testify/require" + "github.com/uptrace/bun" "github.com/determined-ai/determined/master/internal/db" "github.com/determined-ai/determined/master/pkg/etc" @@ -53,12 +54,16 @@ func TestRetryOnDifferentNode(t *testing.T) { require.NoError(t, err) require.Empty(t, blocked) - require.NoError(t, addRetryOnDifferentNode(ctx, task.TaskID, "n0", "regexa", "loga")) - require.NoError(t, addRetryOnDifferentNode(ctx, task.TaskID, "n1", "regexa", "logb")) - require.NoError(t, addRetryOnDifferentNode(ctx, task.TaskID, "n0", "regexb", "logc")) + err = db.Bun().RunInTx(ctx, nil, func(ctx context.Context, tx bun.Tx) error { + require.NoError(t, addRetryOnDifferentNode(ctx, task.TaskID, "n0", "regexa", "loga", tx)) + require.NoError(t, addRetryOnDifferentNode(ctx, task.TaskID, "n1", "regexa", "logb", tx)) + require.NoError(t, addRetryOnDifferentNode(ctx, task.TaskID, "n0", "regexb", "logc", tx)) - require.NoError(t, addRetryOnDifferentNode(ctx, task.TaskID, "n0", "regexa", "dontappear")) - require.NoError(t, addRetryOnDifferentNode(ctx, task.TaskID, "n0", "regexb", "dontappear")) + require.NoError(t, addRetryOnDifferentNode(ctx, task.TaskID, "n0", "regexa", "dontappear", tx)) + require.NoError(t, addRetryOnDifferentNode(ctx, task.TaskID, "n0", "regexb", "dontappear", tx)) + return nil + }) + require.NoError(t, err) // Check DB state is as expected. var actual []*retryOnDifferentNode @@ -105,10 +110,14 @@ func TestShouldRetry(t *testing.T) { require.NoError(t, err) require.Empty(t, resp) - require.NoError(t, addDontRetry(ctx, task.TaskID, "n0", "regexa", "loga")) - require.NoError(t, addDontRetry(ctx, task.TaskID, "n0", "regexb", "logb")) - require.NoError(t, addDontRetry(ctx, task.TaskID, "n0", "regexa", "dontappear")) - require.NoError(t, addDontRetry(ctx, task.TaskID, "n1", "regexb", "dontappear")) + err = db.Bun().RunInTx(ctx, nil, func(ctx context.Context, tx bun.Tx) error { + require.NoError(t, addDontRetry(ctx, task.TaskID, "n0", "regexa", "loga", tx)) + require.NoError(t, addDontRetry(ctx, task.TaskID, "n0", "regexb", "logb", tx)) + require.NoError(t, addDontRetry(ctx, task.TaskID, "n0", "regexa", "dontappear", tx)) + require.NoError(t, addDontRetry(ctx, task.TaskID, "n1", "regexb", "dontappear", tx)) + return nil + }) + require.NoError(t, err) resp, err = ShouldRetry(ctx, task.TaskID) require.NoError(t, err)