feat: Add incremental training to AutoMLImageTrainingJob.

vertex-sdk-bot · copybara-github · commit bb92380f1281 · 2023-03-16T17:30:02.000-07:00
PiperOrigin-RevId: 517272484
diff --git a/google/cloud/aiplatform/training_jobs.py b/google/cloud/aiplatform/training_jobs.py
@@ -5270,6 +5270,7 @@ def __init__(
         multi_label: bool = False,
         model_type: str = "CLOUD",
         base_model: Optional[models.Model] = None,
+        incremental_train_base_model: Optional[models.Model] = None,
         project: Optional[str] = None,
         location: Optional[str] = None,
         credentials: Optional[auth_credentials.Credentials] = None,
@@ -5335,6 +5336,12 @@ def __init__(
                 Otherwise, the new model will be trained from scratch. The `base` model
                 must be in the same Project and Location as the new Model to train,
                 and have the same model_type.
+            incremental_train_base_model: Optional[models.Model] = None
+                Optional for both Image Classification and Object detection models, to
+                incrementally train a new model using an existing model as the starting point, with
+                a reduced training time. If not specified, the new model will be trained from scratch.
+                The `base` model must be in the same Project and Location as the new Model to train,
+                and have the same prediction_type and model_type.
             project (str):
                 Optional. Project to run training in. Overrides project set in aiplatform.init.
             location (str):
@@ -5423,6 +5430,7 @@ def __init__(
         self._prediction_type = prediction_type
         self._multi_label = multi_label
         self._base_model = base_model
+        self._incremental_train_base_model = incremental_train_base_model
 
     def run(
         self,
@@ -5603,6 +5611,7 @@ def run(
         return self._run(
             dataset=dataset,
             base_model=self._base_model,
+            incremental_train_base_model=self._incremental_train_base_model,
             training_fraction_split=training_fraction_split,
             validation_fraction_split=validation_fraction_split,
             test_fraction_split=test_fraction_split,
@@ -5627,6 +5636,7 @@ def _run(
         self,
         dataset: datasets.ImageDataset,
         base_model: Optional[models.Model] = None,
+        incremental_train_base_model: Optional[models.Model] = None,
         training_fraction_split: Optional[float] = None,
         validation_fraction_split: Optional[float] = None,
         test_fraction_split: Optional[float] = None,
@@ -5681,6 +5691,12 @@ def _run(
                 Otherwise, the new model will be trained from scratch. The `base` model
                 must be in the same Project and Location as the new Model to train,
                 and have the same model_type.
+            incremental_train_base_model: Optional[models.Model] = None
+                Optional for both Image Classification and Object detection models, to
+                incrementally train a new model using an existing model as the starting point, with
+                a reduced training time. If not specified, the new model will be trained from scratch.
+                The `base` model must be in the same Project and Location as the new Model to train,
+                and have the same prediction_type and model_type.
             model_id (str):
                 Optional. The ID to use for the Model produced by this job,
                 which will become the final component of the model resource name.
@@ -5818,6 +5834,11 @@ def _run(
             # Set ID of Vertex AI Model to base this training job off of
             training_task_inputs_dict["baseModelId"] = base_model.name
 
+        if incremental_train_base_model:
+            training_task_inputs_dict[
+                "uptrainBaseModelId"
+            ] = incremental_train_base_model.name
+
         return self._run_job(
             training_task_definition=training_task_definition,
             training_task_inputs=training_task_inputs_dict,
diff --git a/tests/unit/aiplatform/test_automl_image_training_jobs.py b/tests/unit/aiplatform/test_automl_image_training_jobs.py
@@ -85,6 +85,17 @@
     struct_pb2.Value(),
 )
 
+_TEST_TRAINING_TASK_INPUTS_WITH_UPTRAIN_BASE_MODEL = json_format.ParseDict(
+    {
+        "modelType": "CLOUD",
+        "budgetMilliNodeHours": _TEST_TRAINING_BUDGET_MILLI_NODE_HOURS,
+        "multiLabel": False,
+        "disableEarlyStopping": _TEST_TRAINING_DISABLE_EARLY_STOPPING,
+        "uptrainBaseModelId": _TEST_MODEL_ID,
+    },
+    struct_pb2.Value(),
+)
+
 _TEST_FRACTION_SPLIT_TRAINING = 0.6
 _TEST_FRACTION_SPLIT_VALIDATION = 0.2
 _TEST_FRACTION_SPLIT_TEST = 0.2
@@ -213,6 +224,20 @@ def mock_model():
     yield model
 
 
+@pytest.fixture
+def mock_uptrain_base_model():
+    model = mock.MagicMock(models.Model)
+    model.name = _TEST_MODEL_ID
+    model._latest_future = None
+    model._exception = None
+    model._gca_resource = gca_model.Model(
+        display_name=_TEST_MODEL_DISPLAY_NAME,
+        description="This is the mock uptrain base Model's description",
+        name=_TEST_MODEL_NAME,
+    )
+    yield model
+
+
 @pytest.mark.usefixtures("google_auth_mock")
 class TestAutoMLImageTrainingJob:
     def setup_method(self):
@@ -223,7 +248,7 @@ def teardown_method(self):
         initializer.global_pool.shutdown(wait=True)
 
     def test_init_all_parameters(self, mock_model):
-        """Ensure all private members are set correctly at initialization"""
+        """Ensure all private members are set correctly at initialization."""
 
         aiplatform.init(project=_TEST_PROJECT)
 
@@ -275,7 +300,7 @@ def test_run_call_pipeline_service_create(
         mock_pipeline_service_get,
         mock_dataset_image,
         mock_model_service_get,
-        mock_model,
+        mock_uptrain_base_model,
         sync,
     ):
         """Create and run an AutoML ICN training job, verify calls and return value"""
@@ -287,7 +312,7 @@ def test_run_call_pipeline_service_create(
 
         job = training_jobs.AutoMLImageTrainingJob(
             display_name=_TEST_DISPLAY_NAME,
-            base_model=mock_model,
+            incremental_train_base_model=mock_uptrain_base_model,
             labels=_TEST_LABELS,
         )
 
@@ -315,8 +340,7 @@ def test_run_call_pipeline_service_create(
 
         true_managed_model = gca_model.Model(
             display_name=_TEST_MODEL_DISPLAY_NAME,
-            labels=mock_model._gca_resource.labels,
-            description=mock_model._gca_resource.description,
+            labels=_TEST_MODEL_LABELS,
             encryption_spec=_TEST_DEFAULT_ENCRYPTION_SPEC,
             version_aliases=["default"],
         )
@@ -330,7 +354,7 @@ def test_run_call_pipeline_service_create(
             display_name=_TEST_DISPLAY_NAME,
             labels=_TEST_LABELS,
             training_task_definition=schema.training_job.definition.automl_image_classification,
-            training_task_inputs=_TEST_TRAINING_TASK_INPUTS_WITH_BASE_MODEL,
+            training_task_inputs=_TEST_TRAINING_TASK_INPUTS_WITH_UPTRAIN_BASE_MODEL,
             model_to_upload=true_managed_model,
             input_data_config=true_input_data_config,
             encryption_spec=_TEST_DEFAULT_ENCRYPTION_SPEC,
@@ -754,7 +778,7 @@ def test_splits_default(
         mock_pipeline_service_get,
         mock_dataset_image,
         mock_model_service_get,
-        mock_model,
+        mock_uptrain_base_model,
         sync,
     ):
         """
@@ -768,7 +792,8 @@ def test_splits_default(
         )
 
         job = training_jobs.AutoMLImageTrainingJob(
-            display_name=_TEST_DISPLAY_NAME, base_model=mock_model
+            display_name=_TEST_DISPLAY_NAME,
+            incremental_train_base_model=mock_uptrain_base_model,
         )
 
         model_from_job = job.run(
@@ -785,7 +810,6 @@ def test_splits_default(
 
         true_managed_model = gca_model.Model(
             display_name=_TEST_MODEL_DISPLAY_NAME,
-            description=mock_model._gca_resource.description,
             encryption_spec=_TEST_DEFAULT_ENCRYPTION_SPEC,
             version_aliases=["default"],
         )
@@ -797,7 +821,7 @@ def test_splits_default(
         true_training_pipeline = gca_training_pipeline.TrainingPipeline(
             display_name=_TEST_DISPLAY_NAME,
             training_task_definition=schema.training_job.definition.automl_image_classification,
-            training_task_inputs=_TEST_TRAINING_TASK_INPUTS_WITH_BASE_MODEL,
+            training_task_inputs=_TEST_TRAINING_TASK_INPUTS_WITH_UPTRAIN_BASE_MODEL,
             model_to_upload=true_managed_model,
             input_data_config=true_input_data_config,
             encryption_spec=_TEST_DEFAULT_ENCRYPTION_SPEC,