feat: GenAI - Added Llama3 support in GenAI batch prediction

jaycee-li · copybara-github · commit 6166152844dc · 2024-08-29T22:17:40.000-07:00
PiperOrigin-RevId: 669193397
diff --git a/tests/unit/vertexai/test_batch_prediction.py b/tests/unit/vertexai/test_batch_prediction.py
@@ -50,6 +50,8 @@
 _TEST_TUNED_GEMINI_MODEL_RESOURCE_NAME = "projects/123/locations/us-central1/models/456"
 _TEST_PALM_MODEL_NAME = "text-bison"
 _TEST_PALM_MODEL_RESOURCE_NAME = f"publishers/google/models/{_TEST_PALM_MODEL_NAME}"
+_TEST_LLAMA_MODEL_NAME = "llama3-405b-instruct-maas"
+_TEST_LLAMA_MODEL_RESOURCE_NAME = f"publishers/meta/models/{_TEST_LLAMA_MODEL_NAME}"
 
 _TEST_GCS_INPUT_URI = "gs://test-bucket/test-input.jsonl"
 _TEST_GCS_INPUT_URI_2 = "gs://test-bucket/test-input-2.jsonl"
@@ -127,6 +129,23 @@ def get_batch_prediction_job_with_gcs_output_mock():
         yield get_job_mock
 
 
+@pytest.fixture
+def get_batch_prediction_job_with_llama_model_mock():
+    with mock.patch.object(
+        job_service_client.JobServiceClient, "get_batch_prediction_job"
+    ) as get_job_mock:
+        get_job_mock.return_value = gca_batch_prediction_job_compat.BatchPredictionJob(
+            name=_TEST_BATCH_PREDICTION_JOB_NAME,
+            display_name=_TEST_DISPLAY_NAME,
+            model=_TEST_LLAMA_MODEL_RESOURCE_NAME,
+            state=_TEST_JOB_STATE_SUCCESS,
+            output_info=gca_batch_prediction_job_compat.BatchPredictionJob.OutputInfo(
+                gcs_output_directory=_TEST_GCS_OUTPUT_PREFIX
+            ),
+        )
+        yield get_job_mock
+
+
 @pytest.fixture
 def get_batch_prediction_job_with_tuned_gemini_model_mock():
     with mock.patch.object(
@@ -252,6 +271,16 @@ def test_init_batch_prediction_job(
             name=_TEST_BATCH_PREDICTION_JOB_NAME, retry=aiplatform_base._DEFAULT_RETRY
         )
 
+    def test_init_batch_prediction_job_with_llama_model(
+        self,
+        get_batch_prediction_job_with_llama_model_mock,
+    ):
+        batch_prediction.BatchPredictionJob(_TEST_BATCH_PREDICTION_JOB_ID)
+
+        get_batch_prediction_job_with_llama_model_mock.assert_called_once_with(
+            name=_TEST_BATCH_PREDICTION_JOB_NAME, retry=aiplatform_base._DEFAULT_RETRY
+        )
+
     def test_init_batch_prediction_job_with_tuned_gemini_model(
         self,
         get_batch_prediction_job_with_tuned_gemini_model_mock,
@@ -447,6 +476,39 @@ def test_submit_batch_prediction_job_with_bq_input_without_output_uri_prefix(
             timeout=None,
         )
 
+    def test_submit_batch_prediction_job_with_llama_model(
+        self,
+        create_batch_prediction_job_mock,
+    ):
+        job = batch_prediction.BatchPredictionJob.submit(
+            source_model=_TEST_LLAMA_MODEL_RESOURCE_NAME,
+            input_dataset=_TEST_BQ_INPUT_URI,
+        )
+
+        assert job.gca_resource == _TEST_GAPIC_BATCH_PREDICTION_JOB
+
+        expected_gapic_batch_prediction_job = gca_batch_prediction_job_compat.BatchPredictionJob(
+            display_name=_TEST_DISPLAY_NAME,
+            model=_TEST_LLAMA_MODEL_RESOURCE_NAME,
+            input_config=gca_batch_prediction_job_compat.BatchPredictionJob.InputConfig(
+                instances_format="bigquery",
+                bigquery_source=gca_io_compat.BigQuerySource(
+                    input_uri=_TEST_BQ_INPUT_URI
+                ),
+            ),
+            output_config=gca_batch_prediction_job_compat.BatchPredictionJob.OutputConfig(
+                bigquery_destination=gca_io_compat.BigQueryDestination(
+                    output_uri=_TEST_BQ_OUTPUT_PREFIX
+                ),
+                predictions_format="bigquery",
+            ),
+        )
+        create_batch_prediction_job_mock.assert_called_once_with(
+            parent=_TEST_PARENT,
+            batch_prediction_job=expected_gapic_batch_prediction_job,
+            timeout=None,
+        )
+
     @pytest.mark.usefixtures("create_batch_prediction_job_mock")
     def test_submit_batch_prediction_job_with_tuned_model(
         self,
@@ -467,14 +529,28 @@ def test_submit_batch_prediction_job_with_invalid_source_model(self):
         with pytest.raises(
             ValueError,
             match=(
-                f"Model '{_TEST_PALM_MODEL_RESOURCE_NAME}' is not a Generative AI model."
+                "Abbreviated model names are only supported for Gemini models. "
+                "Please provide the full publisher model name."
             ),
         ):
             batch_prediction.BatchPredictionJob.submit(
                 source_model=_TEST_PALM_MODEL_NAME,
                 input_dataset=_TEST_GCS_INPUT_URI,
             )
 
+    def test_submit_batch_prediction_job_with_invalid_abbreviated_model_name(self):
+        with pytest.raises(
+            ValueError,
+            match=(
+                "Abbreviated model names are only supported for Gemini models. "
+                "Please provide the full publisher model name."
+            ),
+        ):
+            batch_prediction.BatchPredictionJob.submit(
+                source_model=_TEST_LLAMA_MODEL_NAME,
+                input_dataset=_TEST_GCS_INPUT_URI,
+            )
+
     @pytest.mark.usefixtures("get_non_gemini_model_mock")
     def test_submit_batch_prediction_job_with_non_gemini_tuned_model(self):
         with pytest.raises(
diff --git a/vertexai/batch_prediction/_batch_prediction.py b/vertexai/batch_prediction/_batch_prediction.py
@@ -33,6 +33,7 @@
 _LOGGER = aiplatform_base.Logger(__name__)
 
 _GEMINI_MODEL_PATTERN = r"publishers/google/models/gemini"
+_LLAMA_MODEL_PATTERN = r"publishers/meta/models/llama"
 _GEMINI_TUNED_MODEL_PATTERN = r"^projects/[0-9]+?/locations/[0-9a-z-]+?/models/[0-9]+?$"
 
 
@@ -272,13 +273,20 @@ def _reconcile_model_name(cls, model_name: str) -> str:
 
         if "/" not in model_name:
             # model name (e.g., gemini-1.0-pro)
-            model_name = "publishers/google/models/" + model_name
+            if model_name.startswith("gemini"):
+                model_name = "publishers/google/models/" + model_name
+            else:
+                raise ValueError(
+                    "Abbreviated model names are only supported for Gemini models. "
+                    "Please provide the full publisher model name."
+                )
         elif model_name.startswith("models/"):
             # publisher model name (e.g., models/gemini-1.0-pro)
             model_name = "publishers/google/" + model_name
         elif (
             # publisher model full name
             not model_name.startswith("publishers/google/models/")
+            and not model_name.startswith("publishers/meta/models/")
             # tuned model full resource name
             and not re.search(_GEMINI_TUNED_MODEL_PATTERN, model_name)
         ):
@@ -302,6 +310,10 @@ def _is_genai_model(cls, model_name: str) -> bool:
                 # Model is a tuned Gemini model.
                 return True
 
+        if re.search(_LLAMA_MODEL_PATTERN, model_name):
+            # Model is a Llama3 model.
+            return True
+
         return False
 
     @classmethod