@@ -1515,7 +1515,6 @@ def from_local_script(
1515
1515
staging_bucket = staging_bucket ,
1516
1516
)
1517
1517
1518
- @base .optional_sync ()
1519
1518
def run (
1520
1519
self ,
1521
1520
service_account : Optional [str ] = None ,
@@ -1537,7 +1536,8 @@ def run(
1537
1536
Optional. The full name of the Compute Engine network to which the job
1538
1537
should be peered. For example, projects/12345/global/networks/myVPC.
1539
1538
Private services access must already be configured for the network.
1540
- If left unspecified, the job is not peered with any network.
1539
+ If left unspecified, the network set in aiplatform.init will be used.
1540
+ Otherwise, the job is not peered with any network.
1541
1541
timeout (int):
1542
1542
The maximum job running time in seconds. The default is 7 days.
1543
1543
restart_job_on_worker_restart (bool):
@@ -1570,7 +1570,73 @@ def run(
1570
1570
create_request_timeout (float):
1571
1571
Optional. The timeout for the create request in seconds.
1572
1572
"""
1573
+ network = network or initializer .global_config .network
1574
+
1575
+ self ._run (
1576
+ service_account = service_account ,
1577
+ network = network ,
1578
+ timeout = timeout ,
1579
+ restart_job_on_worker_restart = restart_job_on_worker_restart ,
1580
+ enable_web_access = enable_web_access ,
1581
+ tensorboard = tensorboard ,
1582
+ sync = sync ,
1583
+ create_request_timeout = create_request_timeout ,
1584
+ )
1585
+
1586
+ @base .optional_sync ()
1587
+ def _run (
1588
+ self ,
1589
+ service_account : Optional [str ] = None ,
1590
+ network : Optional [str ] = None ,
1591
+ timeout : Optional [int ] = None ,
1592
+ restart_job_on_worker_restart : bool = False ,
1593
+ enable_web_access : bool = False ,
1594
+ tensorboard : Optional [str ] = None ,
1595
+ sync : bool = True ,
1596
+ create_request_timeout : Optional [float ] = None ,
1597
+ ) -> None :
1598
+ """Helper method to ensure network synchronization and to run the configured CustomJob.
1599
+
1600
+ Args:
1601
+ service_account (str):
1602
+ Optional. Specifies the service account for workload run-as account.
1603
+ Users submitting jobs must have act-as permission on this run-as account.
1604
+ network (str):
1605
+ Optional. The full name of the Compute Engine network to which the job
1606
+ should be peered. For example, projects/12345/global/networks/myVPC.
1607
+ Private services access must already be configured for the network.
1608
+ timeout (int):
1609
+ The maximum job running time in seconds. The default is 7 days.
1610
+ restart_job_on_worker_restart (bool):
1611
+ Restarts the entire CustomJob if a worker
1612
+ gets restarted. This feature can be used by
1613
+ distributed training jobs that are not resilient
1614
+ to workers leaving and joining a job.
1615
+ enable_web_access (bool):
1616
+ Whether you want Vertex AI to enable interactive shell access
1617
+ to training containers.
1618
+ https://cloud.google.com/vertex-ai/docs/training/monitor-debug-interactive-shell
1619
+ tensorboard (str):
1620
+ Optional. The name of a Vertex AI
1621
+ [Tensorboard][google.cloud.aiplatform.v1beta1.Tensorboard]
1622
+ resource to which this CustomJob will upload Tensorboard
1623
+ logs. Format:
1624
+ ``projects/{project}/locations/{location}/tensorboards/{tensorboard}``
1573
1625
1626
+ The training script should write Tensorboard to following Vertex AI environment
1627
+ variable:
1628
+
1629
+ AIP_TENSORBOARD_LOG_DIR
1630
+
1631
+ `service_account` is required with provided `tensorboard`.
1632
+ For more information on configuring your service account please visit:
1633
+ https://cloud.google.com/vertex-ai/docs/experiments/tensorboard-training
1634
+ sync (bool):
1635
+ Whether to execute this method synchronously. If False, this method
1636
+ will unblock and it will be executed in a concurrent Future.
1637
+ create_request_timeout (float):
1638
+ Optional. The timeout for the create request in seconds.
1639
+ """
1574
1640
if service_account :
1575
1641
self ._gca_resource .job_spec .service_account = service_account
1576
1642
@@ -1907,7 +1973,6 @@ def _log_web_access_uris(self):
1907
1973
)
1908
1974
self ._logged_web_access_uris .add (uri )
1909
1975
1910
- @base .optional_sync ()
1911
1976
def run (
1912
1977
self ,
1913
1978
service_account : Optional [str ] = None ,
@@ -1929,7 +1994,8 @@ def run(
1929
1994
Optional. The full name of the Compute Engine network to which the job
1930
1995
should be peered. For example, projects/12345/global/networks/myVPC.
1931
1996
Private services access must already be configured for the network.
1932
- If left unspecified, the job is not peered with any network.
1997
+ If left unspecified, the network set in aiplatform.init will be used.
1998
+ Otherwise, the job is not peered with any network.
1933
1999
timeout (int):
1934
2000
Optional. The maximum job running time in seconds. The default is 7 days.
1935
2001
restart_job_on_worker_restart (bool):
@@ -1962,7 +2028,73 @@ def run(
1962
2028
create_request_timeout (float):
1963
2029
Optional. The timeout for the create request in seconds.
1964
2030
"""
2031
+ network = network or initializer .global_config .network
2032
+
2033
+ self ._run (
2034
+ service_account = service_account ,
2035
+ network = network ,
2036
+ timeout = timeout ,
2037
+ restart_job_on_worker_restart = restart_job_on_worker_restart ,
2038
+ enable_web_access = enable_web_access ,
2039
+ tensorboard = tensorboard ,
2040
+ sync = sync ,
2041
+ create_request_timeout = create_request_timeout ,
2042
+ )
2043
+
2044
+ @base .optional_sync ()
2045
+ def _run (
2046
+ self ,
2047
+ service_account : Optional [str ] = None ,
2048
+ network : Optional [str ] = None ,
2049
+ timeout : Optional [int ] = None , # seconds
2050
+ restart_job_on_worker_restart : bool = False ,
2051
+ enable_web_access : bool = False ,
2052
+ tensorboard : Optional [str ] = None ,
2053
+ sync : bool = True ,
2054
+ create_request_timeout : Optional [float ] = None ,
2055
+ ) -> None :
2056
+ """Helper method to ensure network synchronization and to run the configured CustomJob.
2057
+
2058
+ Args:
2059
+ service_account (str):
2060
+ Optional. Specifies the service account for workload run-as account.
2061
+ Users submitting jobs must have act-as permission on this run-as account.
2062
+ network (str):
2063
+ Optional. The full name of the Compute Engine network to which the job
2064
+ should be peered. For example, projects/12345/global/networks/myVPC.
2065
+ Private services access must already be configured for the network.
2066
+ timeout (int):
2067
+ Optional. The maximum job running time in seconds. The default is 7 days.
2068
+ restart_job_on_worker_restart (bool):
2069
+ Restarts the entire CustomJob if a worker
2070
+ gets restarted. This feature can be used by
2071
+ distributed training jobs that are not resilient
2072
+ to workers leaving and joining a job.
2073
+ enable_web_access (bool):
2074
+ Whether you want Vertex AI to enable interactive shell access
2075
+ to training containers.
2076
+ https://cloud.google.com/vertex-ai/docs/training/monitor-debug-interactive-shell
2077
+ tensorboard (str):
2078
+ Optional. The name of a Vertex AI
2079
+ [Tensorboard][google.cloud.aiplatform.v1beta1.Tensorboard]
2080
+ resource to which this CustomJob will upload Tensorboard
2081
+ logs. Format:
2082
+ ``projects/{project}/locations/{location}/tensorboards/{tensorboard}``
1965
2083
2084
+ The training script should write Tensorboard to following Vertex AI environment
2085
+ variable:
2086
+
2087
+ AIP_TENSORBOARD_LOG_DIR
2088
+
2089
+ `service_account` is required with provided `tensorboard`.
2090
+ For more information on configuring your service account please visit:
2091
+ https://cloud.google.com/vertex-ai/docs/experiments/tensorboard-training
2092
+ sync (bool):
2093
+ Whether to execute this method synchronously. If False, this method
2094
+ will unblock and it will be executed in a concurrent Future.
2095
+ create_request_timeout (float):
2096
+ Optional. The timeout for the create request in seconds.
2097
+ """
1966
2098
if service_account :
1967
2099
self ._gca_resource .trial_job_spec .service_account = service_account
1968
2100
0 commit comments