feat: LLM - Support accelerator_type in tuning

Ark-kun · copybara-github · commit 98ab2f99d63f · 2023-10-19T01:13:00.000-07:00
PiperOrigin-RevId: 574768322
diff --git a/tests/unit/aiplatform/test_language_models.py b/tests/unit/aiplatform/test_language_models.py
@@ -365,6 +365,11 @@ def reverse_string_2(s):""",
         "dag": {"tasks": {}},
         "inputDefinitions": {
             "parameters": {
+                "accelerator_type": {
+                    "defaultValue": "",
+                    "isOptional": True,
+                    "parameterType": "STRING",
+                },
                 "api_endpoint": {
                     "defaultValue": "aiplatform.googleapis.com/ui",
                     "isOptional": True,
@@ -1568,6 +1573,7 @@ def test_tune_text_generation_model(
                     enable_early_stopping=enable_early_stopping,
                     tensorboard=tensorboard_name,
                 ),
+                accelerator_type="TPU",
             )
             call_kwargs = mock_pipeline_service_create.call_args[1]
             pipeline_arguments = call_kwargs[
@@ -1581,6 +1587,7 @@ def test_tune_text_generation_model(
             assert pipeline_arguments["enable_early_stopping"] == enable_early_stopping
             assert pipeline_arguments["tensorboard_resource_id"] == tensorboard_name
             assert pipeline_arguments["large_model_reference"] == "text-bison@001"
+            assert pipeline_arguments["accelerator_type"] == "TPU"
             assert (
                 call_kwargs["pipeline_job"].encryption_spec.kms_key_name
                 == _TEST_ENCRYPTION_KEY_NAME
@@ -1649,6 +1656,7 @@ def test_tune_text_generation_model_ga(
                     enable_early_stopping=enable_early_stopping,
                     tensorboard=tensorboard_name,
                 ),
+                accelerator_type="TPU",
             )
             call_kwargs = mock_pipeline_service_create.call_args[1]
             pipeline_arguments = call_kwargs[
@@ -1661,6 +1669,7 @@ def test_tune_text_generation_model_ga(
             assert pipeline_arguments["enable_early_stopping"] == enable_early_stopping
             assert pipeline_arguments["tensorboard_resource_id"] == tensorboard_name
             assert pipeline_arguments["large_model_reference"] == "text-bison@001"
+            assert pipeline_arguments["accelerator_type"] == "TPU"
             assert (
                 call_kwargs["pipeline_job"].encryption_spec.kms_key_name
                 == _TEST_ENCRYPTION_KEY_NAME
@@ -1808,13 +1817,15 @@ def test_tune_chat_model(
                 tuning_job_location="europe-west4",
                 tuned_model_location="us-central1",
                 default_context=default_context,
+                accelerator_type="TPU",
             )
             call_kwargs = mock_pipeline_service_create.call_args[1]
             pipeline_arguments = call_kwargs[
                 "pipeline_job"
             ].runtime_config.parameter_values
             assert pipeline_arguments["large_model_reference"] == "chat-bison@001"
             assert pipeline_arguments["default_context"] == default_context
+            assert pipeline_arguments["accelerator_type"] == "TPU"
 
             # Testing the tuned model
             tuned_model = tuning_job.get_tuned_model()
@@ -1862,12 +1873,14 @@ def test_tune_code_generation_model(
                 training_data=_TEST_TEXT_BISON_TRAINING_DF,
                 tuning_job_location="europe-west4",
                 tuned_model_location="us-central1",
+                accelerator_type="TPU",
             )
             call_kwargs = mock_pipeline_service_create.call_args[1]
             pipeline_arguments = call_kwargs[
                 "pipeline_job"
             ].runtime_config.parameter_values
             assert pipeline_arguments["large_model_reference"] == "code-bison@001"
+            assert pipeline_arguments["accelerator_type"] == "TPU"
 
     @pytest.mark.parametrize(
         "job_spec",
@@ -1909,12 +1922,14 @@ def test_tune_code_chat_model(
                 training_data=_TEST_TEXT_BISON_TRAINING_DF,
                 tuning_job_location="europe-west4",
                 tuned_model_location="us-central1",
+                accelerator_type="TPU",
             )
             call_kwargs = mock_pipeline_service_create.call_args[1]
             pipeline_arguments = call_kwargs[
                 "pipeline_job"
             ].runtime_config.parameter_values
             assert pipeline_arguments["large_model_reference"] == "codechat-bison@001"
+            assert pipeline_arguments["accelerator_type"] == "TPU"
 
     @pytest.mark.usefixtures(
         "get_model_with_tuned_version_label_mock",
diff --git a/vertexai/language_models/_language_models.py b/vertexai/language_models/_language_models.py
@@ -15,7 +15,7 @@
 """Classes for working with language models."""
 
 import dataclasses
-from typing import Any, AsyncIterator, Dict, Iterator, List, Optional, Sequence, Union
+from typing import Any, AsyncIterator, Dict, Iterator, List, Literal, Optional, Sequence, Union
 import warnings
 
 from google.cloud import aiplatform
@@ -42,6 +42,9 @@
 # Endpoint label/metadata key to preserve the base model ID information
 _TUNING_BASE_MODEL_ID_LABEL_KEY = "google-vertex-llm-tuning-base-model-id"
 
+_ACCELERATOR_TYPES = ["TPU", "GPU"]
+_ACCELERATOR_TYPE_TYPE = Literal["TPU", "GPU"]
+
 
 def _get_model_id_from_tuning_model_id(tuning_model_id: str) -> str:
     """Gets the base model ID for the model ID labels used the tuned models.
@@ -166,6 +169,7 @@ def tune_model(
         model_display_name: Optional[str] = None,
         tuning_evaluation_spec: Optional["TuningEvaluationSpec"] = None,
         default_context: Optional[str] = None,
+        accelerator_type: Optional[_ACCELERATOR_TYPE_TYPE] = None,
     ) -> "_LanguageModelTuningJob":
         """Tunes a model based on training data.
 
@@ -191,6 +195,7 @@ def tune_model(
             model_display_name: Custom display name for the tuned model.
             tuning_evaluation_spec: Specification for the model evaluation during tuning.
             default_context: The context to use for all training samples by default.
+            accelerator_type: Type of accelerator to use. Can be "TPU" or "GPU".
 
         Returns:
             A `LanguageModelTuningJob` object that represents the tuning job.
@@ -252,6 +257,14 @@ def tune_model(
         if default_context:
             tuning_parameters["default_context"] = default_context
 
+        if accelerator_type:
+            if accelerator_type not in _ACCELERATOR_TYPES:
+                raise ValueError(
+                    f"Unsupported accelerator type: {accelerator_type}."
+                    f" Supported types: {_ACCELERATOR_TYPES}"
+                )
+            tuning_parameters["accelerator_type"] = accelerator_type
+
         return self._tune_model(
             training_data=training_data,
             tuning_parameters=tuning_parameters,
@@ -336,6 +349,7 @@ def tune_model(
         tuned_model_location: Optional[str] = None,
         model_display_name: Optional[str] = None,
         tuning_evaluation_spec: Optional["TuningEvaluationSpec"] = None,
+        accelerator_type: Optional[_ACCELERATOR_TYPE_TYPE] = None,
     ) -> "_LanguageModelTuningJob":
         """Tunes a model based on training data.
 
@@ -357,6 +371,7 @@ def tune_model(
             tuned_model_location: GCP location where the tuned model should be deployed. Only "us-central1" is supported for now.
             model_display_name: Custom display name for the tuned model.
             tuning_evaluation_spec: Specification for the model evaluation during tuning.
+            accelerator_type: Type of accelerator to use. Can be "TPU" or "GPU".
 
         Returns:
             A `LanguageModelTuningJob` object that represents the tuning job.
@@ -376,6 +391,7 @@ def tune_model(
             tuned_model_location=tuned_model_location,
             model_display_name=model_display_name,
             tuning_evaluation_spec=tuning_evaluation_spec,
+            accelerator_type=accelerator_type,
         )
 
 
@@ -393,6 +409,7 @@ def tune_model(
         tuned_model_location: Optional[str] = None,
         model_display_name: Optional[str] = None,
         tuning_evaluation_spec: Optional["TuningEvaluationSpec"] = None,
+        accelerator_type: Optional[_ACCELERATOR_TYPE_TYPE] = None,
     ) -> "_LanguageModelTuningJob":
         """Tunes a model based on training data.
 
@@ -421,6 +438,7 @@ def tune_model(
             tuned_model_location: GCP location where the tuned model should be deployed. Only "us-central1" is supported for now.
             model_display_name: Custom display name for the tuned model.
             tuning_evaluation_spec: Specification for the model evaluation during tuning.
+            accelerator_type: Type of accelerator to use. Can be "TPU" or "GPU".
 
         Returns:
             A `LanguageModelTuningJob` object that represents the tuning job.
@@ -441,6 +459,7 @@ def tune_model(
             tuned_model_location=tuned_model_location,
             model_display_name=model_display_name,
             tuning_evaluation_spec=tuning_evaluation_spec,
+            accelerator_type=accelerator_type,
         )
         tuned_model = job.get_tuned_model()
         self._endpoint = tuned_model._endpoint
@@ -461,6 +480,7 @@ def tune_model(
         tuned_model_location: Optional[str] = None,
         model_display_name: Optional[str] = None,
         default_context: Optional[str] = None,
+        accelerator_type: Optional[_ACCELERATOR_TYPE_TYPE] = None,
     ) -> "_LanguageModelTuningJob":
         """Tunes a model based on training data.
 
@@ -485,6 +505,7 @@ def tune_model(
             tuned_model_location: GCP location where the tuned model should be deployed. Only "us-central1" is supported for now.
             model_display_name: Custom display name for the tuned model.
             default_context: The context to use for all training samples by default.
+            accelerator_type: Type of accelerator to use. Can be "TPU" or "GPU".
 
         Returns:
             A `LanguageModelTuningJob` object that represents the tuning job.
@@ -504,6 +525,7 @@ def tune_model(
             tuned_model_location=tuned_model_location,
             model_display_name=model_display_name,
             default_context=default_context,
+            accelerator_type=accelerator_type,
         )
 
 
@@ -521,6 +543,7 @@ def tune_model(
         tuned_model_location: Optional[str] = None,
         model_display_name: Optional[str] = None,
         default_context: Optional[str] = None,
+        accelerator_type: Optional[_ACCELERATOR_TYPE_TYPE] = None,
     ) -> "_LanguageModelTuningJob":
         """Tunes a model based on training data.
 
@@ -549,6 +572,7 @@ def tune_model(
             tuned_model_location: GCP location where the tuned model should be deployed. Only "us-central1" is supported for now.
             model_display_name: Custom display name for the tuned model.
             default_context: The context to use for all training samples by default.
+            accelerator_type: Type of accelerator to use. Can be "TPU" or "GPU".
 
         Returns:
             A `LanguageModelTuningJob` object that represents the tuning job.
@@ -569,6 +593,7 @@ def tune_model(
             tuned_model_location=tuned_model_location,
             model_display_name=model_display_name,
             default_context=default_context,
+            accelerator_type=accelerator_type,
         )
         tuned_model = job.get_tuned_model()
         self._endpoint = tuned_model._endpoint