Skip to content

Commit 37627de

Browse files
vertex-sdk-botcopybara-github
authored andcommitted
fix: Tensorboard - Fixed bug in tensorboard uploader where profile logs are not uploaded from nested run directories.
PiperOrigin-RevId: 670652532
1 parent 36a56b9 commit 37627de

File tree

5 files changed

+20
-9
lines changed

5 files changed

+20
-9
lines changed

google/cloud/aiplatform/tensorboard/logdir_loader.py

-1
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,6 @@ def synchronize_runs(self):
7070
runs_seen = set()
7171
for subdir in io_wrapper.GetLogdirSubdirectories(self._logdir):
7272
run = os.path.relpath(subdir, self._logdir)
73-
run = run.replace("/", "-").replace("_", "-")
7473
runs_seen.add(run)
7574
if run not in self._directory_loaders:
7675
logger.info("- Adding run for relative directory %s", run)

google/cloud/aiplatform/tensorboard/plugins/tf_profiler/profile_uploader.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,10 @@ def send_request(self, run_name: str):
176176
self._profile_dir(run_name)
177177
)
178178

179-
tb_run = self._one_platform_resource_manager.get_run_resource_name(run_name)
179+
experiment_run_name = uploader_utils.reformat_run_name(run_name)
180+
tb_run = self._one_platform_resource_manager.get_run_resource_name(
181+
experiment_run_name
182+
)
180183

181184
if run_name not in self._run_to_file_request_sender:
182185
self._run_to_file_request_sender[

google/cloud/aiplatform/tensorboard/uploader.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -417,7 +417,8 @@ def _pre_create_runs_and_time_series(self):
417417
plugin_data=metadata.plugin_data.content,
418418
)
419419

420-
self._one_platform_resource_manager.batch_create_runs(run_names)
420+
experiment_runs = [uploader_utils.reformat_run_name(run) for run in run_names]
421+
self._one_platform_resource_manager.batch_create_runs(experiment_runs)
421422
self._one_platform_resource_manager.batch_create_time_series(
422423
run_tag_name_to_time_series_proto
423424
)
@@ -451,7 +452,9 @@ def _upload_once(self):
451452
)
452453
run_to_events[profile_run_name] = None
453454

454-
self._experiment_runs = run_to_events.keys()
455+
self._experiment_runs = [
456+
uploader_utils.reformat_run_name(run) for run in run_to_events.keys()
457+
]
455458

456459
with self._tracker.send_tracker():
457460
self._dispatcher.dispatch_requests(run_to_events)
@@ -807,6 +810,7 @@ def _add_event_internal(
807810
metadata: tf.compat.v1.SummaryMetadata,
808811
):
809812
self._num_values += 1
813+
run_name = uploader_utils.reformat_run_name(run_name)
810814
time_series_data_proto = self._run_to_tag_to_time_series_data[run_name].get(
811815
value.tag
812816
)

google/cloud/aiplatform/tensorboard/uploader_utils.py

+5
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,11 @@
4747
logger.setLevel(logging.WARNING)
4848

4949

50+
def reformat_run_name(run_name: str) -> str:
51+
"""Reformats the run name to be compatible with One Platform."""
52+
return run_name.replace("/", "-").replace("_", "-")
53+
54+
5055
class RateLimiter:
5156
"""Helper class for rate-limiting using a fixed minimum interval."""
5257

tests/unit/aiplatform/test_logdir_loader.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -201,15 +201,15 @@ def test_multiple_writes_to_logdir(self):
201201
{
202202
"a": ["tag_a"],
203203
"b": ["tag_b"],
204-
"b-x": ["tag_b_x"],
205-
"b-z": ["tag_b_z"],
204+
"b/x": ["tag_b_x"],
205+
"b_z": ["tag_b_z"],
206206
"c": ["tag_c"],
207207
},
208208
)
209209
# A second load should indicate no new data.
210210
self.assertEqual(
211211
self._extract_run_to_tags(loader.get_run_events()),
212-
{"a": [], "b": [], "b-x": [], "b-z": [], "c": []},
212+
{"a": [], "b": [], "b/x": [], "b_z": [], "c": []},
213213
)
214214
# Write some new data to both new and pre-existing event files.
215215
with FileWriter(os.path.join(logdir, "a"), filename_suffix=".other") as writer:
@@ -228,8 +228,8 @@ def test_multiple_writes_to_logdir(self):
228228
{
229229
"a": ["tag_a_2", "tag_a_3", "tag_a_4"],
230230
"b": [],
231-
"b-x": ["tag_b_x_2"],
232-
"b-z": [],
231+
"b/x": ["tag_b_x_2"],
232+
"b_z": [],
233233
"c": ["tag_c_2"],
234234
},
235235
)

0 commit comments

Comments
 (0)