Skip to content

Commit 5f6ad8d

Browse files
vertex-sdk-botcopybara-github
authored andcommitted
feat: Allow reuse of deleted experiment run id.
PiperOrigin-RevId: 591334624
1 parent e50e49e commit 5f6ad8d

File tree

3 files changed

+155
-14
lines changed

3 files changed

+155
-14
lines changed

google/cloud/aiplatform/metadata/experiment_resources.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -395,7 +395,12 @@ def delete(self, *, delete_backing_tensorboard_runs: bool = False):
395395
experiment_run.delete(
396396
delete_backing_tensorboard_run=delete_backing_tensorboard_runs
397397
)
398-
self._metadata_context.delete()
398+
try:
399+
self._metadata_context.delete()
400+
except exceptions.NotFound:
401+
_LOGGER.warning(
402+
f"Experiment {self.name} metadata node not found. Skipping deletion."
403+
)
399404

400405
def get_data_frame(self) -> "pd.DataFrame": # noqa: F821
401406
"""Get parameters, metrics, and time series metrics of all runs in this experiment as Dataframe.

google/cloud/aiplatform/metadata/experiment_run_resource.py

+41-11
Original file line numberDiff line numberDiff line change
@@ -757,12 +757,16 @@ def _create_context():
757757
experiment_run._backing_tensorboard_run = None
758758
experiment_run._largest_step = None
759759

760-
if tensorboard:
761-
cls._assign_backing_tensorboard(
762-
self=experiment_run, tensorboard=tensorboard
763-
)
764-
else:
765-
cls._assign_to_experiment_backing_tensorboard(self=experiment_run)
760+
try:
761+
if tensorboard:
762+
cls._assign_backing_tensorboard(
763+
self=experiment_run, tensorboard=tensorboard
764+
)
765+
else:
766+
cls._assign_to_experiment_backing_tensorboard(self=experiment_run)
767+
except Exception as e:
768+
metadata_context.delete()
769+
raise e
766770

767771
experiment_run._associate_to_experiment(experiment)
768772
return experiment_run
@@ -899,7 +903,12 @@ def assign_backing_tensorboard(
899903
backing_tensorboard = self._lookup_tensorboard_run_artifact()
900904
if backing_tensorboard:
901905
raise ValueError(
902-
f"Experiment run {self._run_name} already associated to tensorboard resource {backing_tensorboard.resource.resource_name}"
906+
f"Experiment run {self._run_name} already associated to tensorboard resource {backing_tensorboard.resource.resource_name}.\n"
907+
f"To delete backing tensorboard run, execute the following:\n"
908+
f'tensorboard_run_artifact = aiplatform.metadata.artifact.Artifact(artifact_name=f"{self._tensorboard_run_id(self._metadata_node.name)}")\n'
909+
f'tensorboard_run_resource = aiplatform.TensorboardRun(tensorboard_run_artifact.metadata["resourceName"])\n'
910+
f"tensorboard_run_resource.delete()\n"
911+
f"tensorboard_run_artifact.delete()"
903912
)
904913

905914
self._assign_backing_tensorboard(tensorboard=tensorboard)
@@ -1370,20 +1379,41 @@ def delete(self, *, delete_backing_tensorboard_run: bool = False):
13701379
self._backing_tensorboard_run.resource.delete()
13711380
self._backing_tensorboard_run.metadata.delete()
13721381
else:
1373-
_LOGGER.warn(
1382+
_LOGGER.warning(
13741383
f"Experiment run {self.name} does not have a backing tensorboard run."
13751384
" Skipping deletion."
13761385
)
13771386
else:
1378-
_LOGGER.warn(
1387+
_LOGGER.warning(
13791388
f"Experiment run {self.name} does not have a backing tensorboard run."
13801389
" Skipping deletion."
13811390
)
1391+
else:
1392+
_LOGGER.warning(
1393+
f"Experiment run {self.name} skipped backing tensorboard run deletion.\n"
1394+
f"To delete backing tensorboard run, execute the following:\n"
1395+
f'tensorboard_run_artifact = aiplatform.metadata.artifact.Artifact(artifact_name=f"{self._tensorboard_run_id(self._metadata_node.name)}")\n'
1396+
f'tensorboard_run_resource = aiplatform.TensorboardRun(tensorboard_run_artifact.metadata["resourceName"])\n'
1397+
f"tensorboard_run_resource.delete()\n"
1398+
f"tensorboard_run_artifact.delete()"
1399+
)
13821400

1383-
self._metadata_node.delete()
1401+
try:
1402+
self._metadata_node.delete()
1403+
except exceptions.NotFound:
1404+
_LOGGER.warning(
1405+
f"Experiment run {self.name} metadata node not found."
1406+
" Skipping deletion."
1407+
)
13841408

13851409
if self._is_legacy_experiment_run():
1386-
self._metadata_metric_artifact.delete()
1410+
try:
1411+
self._metadata_metric_artifact.delete()
1412+
except exceptions.NotFound:
1413+
_LOGGER.warning(
1414+
f"Experiment run {self.name} metadata node not found."
1415+
" Skipping deletion."
1416+
)
13871417

13881418
@_v1_not_supported
13891419
def get_artifacts(self) -> List[artifact.Artifact]:

tests/system/aiplatform/test_experiments.py

+108-2
Original file line numberDiff line numberDiff line change
@@ -444,7 +444,7 @@ def test_get_experiments_df(self):
444444
key=lambda d: d["run_name"],
445445
) == sorted(df.fillna(0.0).to_dict("records"), key=lambda d: d["run_name"])
446446

447-
def test_delete_run(self):
447+
def test_delete_run_does_not_exist_raises_exception(self):
448448
run = aiplatform.ExperimentRun(
449449
run_name=_RUN,
450450
experiment=self._experiment_name,
@@ -456,7 +456,113 @@ def test_delete_run(self):
456456
with pytest.raises(exceptions.NotFound):
457457
aiplatform.ExperimentRun(run_name=_RUN, experiment=self._experiment_name)
458458

459-
def test_delete_experiment(self):
459+
def test_delete_run_success(self):
460+
aiplatform.init(
461+
project=e2e_base._PROJECT,
462+
location=e2e_base._LOCATION,
463+
experiment=self._experiment_name,
464+
)
465+
aiplatform.start_run(_RUN)
466+
run = aiplatform.ExperimentRun(
467+
run_name=_RUN,
468+
experiment=self._experiment_name,
469+
project=e2e_base._PROJECT,
470+
location=e2e_base._LOCATION,
471+
)
472+
aiplatform.end_run()
473+
474+
run.delete(delete_backing_tensorboard_run=True)
475+
476+
with pytest.raises(exceptions.NotFound):
477+
aiplatform.ExperimentRun(
478+
run_name=_RUN,
479+
experiment=self._experiment_name,
480+
project=e2e_base._PROJECT,
481+
location=e2e_base._LOCATION,
482+
)
483+
484+
def test_reuse_run_success(self):
485+
aiplatform.init(
486+
project=e2e_base._PROJECT,
487+
location=e2e_base._LOCATION,
488+
experiment=self._experiment_name,
489+
)
490+
aiplatform.start_run(_RUN)
491+
run = aiplatform.ExperimentRun(
492+
run_name=_RUN,
493+
experiment=self._experiment_name,
494+
project=e2e_base._PROJECT,
495+
location=e2e_base._LOCATION,
496+
)
497+
aiplatform.end_run()
498+
run.delete(delete_backing_tensorboard_run=True)
499+
500+
aiplatform.start_run(_RUN)
501+
aiplatform.end_run()
502+
503+
run = aiplatform.ExperimentRun(
504+
run_name=_RUN,
505+
experiment=self._experiment_name,
506+
project=e2e_base._PROJECT,
507+
location=e2e_base._LOCATION,
508+
)
509+
assert run.name == _RUN
510+
511+
def test_delete_run_then_tensorboard_success(self):
512+
aiplatform.init(
513+
project=e2e_base._PROJECT,
514+
location=e2e_base._LOCATION,
515+
experiment=self._experiment_name,
516+
)
517+
aiplatform.start_run(_RUN, resume=True)
518+
run = aiplatform.ExperimentRun(
519+
run_name=_RUN,
520+
experiment=self._experiment_name,
521+
project=e2e_base._PROJECT,
522+
location=e2e_base._LOCATION,
523+
)
524+
aiplatform.end_run()
525+
run.delete()
526+
tensorboard_run_artifact = aiplatform.metadata.artifact.Artifact(
527+
artifact_name=f"{self._experiment_name}-{_RUN}-tb-run"
528+
)
529+
tensorboard_run_resource = aiplatform.TensorboardRun(
530+
tensorboard_run_artifact.metadata["resourceName"]
531+
)
532+
tensorboard_run_resource.delete()
533+
tensorboard_run_artifact.delete()
534+
535+
aiplatform.start_run(_RUN)
536+
aiplatform.end_run()
537+
538+
run = aiplatform.ExperimentRun(
539+
run_name=_RUN,
540+
experiment=self._experiment_name,
541+
project=e2e_base._PROJECT,
542+
location=e2e_base._LOCATION,
543+
)
544+
assert run.name == _RUN
545+
546+
def test_delete_wout_backing_tensorboard_reuse_run_raises_exception(self):
547+
aiplatform.init(
548+
project=e2e_base._PROJECT,
549+
location=e2e_base._LOCATION,
550+
experiment=self._experiment_name,
551+
)
552+
aiplatform.start_run(_RUN, resume=True)
553+
run = aiplatform.ExperimentRun(
554+
run_name=_RUN,
555+
experiment=self._experiment_name,
556+
project=e2e_base._PROJECT,
557+
location=e2e_base._LOCATION,
558+
)
559+
aiplatform.end_run()
560+
run.delete()
561+
562+
with pytest.raises(ValueError):
563+
aiplatform.start_run(_RUN)
564+
565+
def test_delete_experiment_does_not_exist_raises_exception(self):
460566
experiment = aiplatform.Experiment(
461567
experiment_name=self._experiment_name,
462568
project=e2e_base._PROJECT,

0 commit comments

Comments
 (0)