feat: Add the initial version of the LlamaIndex agent prebuilt template.

shawn-yang-google · copybara-github · commit f1f2ea53e4af · 2025-03-20T16:30:10.000-07:00
PiperOrigin-RevId: 738627111
diff --git a/noxfile.py b/noxfile.py
@@ -54,6 +54,7 @@
 UNIT_TEST_PYTHON_VERSIONS = ["3.8", "3.9", "3.10", "3.11", "3.12"]
 UNIT_TEST_LANGCHAIN_PYTHON_VERSIONS = ["3.9", "3.10", "3.11", "3.12"]
 UNIT_TEST_AG2_PYTHON_VERSIONS = ["3.9", "3.10", "3.11", "3.12"]
+UNIT_TEST_LLAMA_INDEX_PYTHON_VERSIONS = ["3.9", "3.10", "3.11", "3.12"]
 UNIT_TEST_STANDARD_DEPENDENCIES = [
     "mock",
     "asyncmock",
@@ -93,6 +94,7 @@
     "unit_ray",
     "unit_langchain",
     "unit_ag2",
+    "unit_llama_index",
     "system",
     "cover",
     "lint",
@@ -208,6 +210,7 @@ def default(session):
         "--ignore=tests/unit/vertex_ray",
         "--ignore=tests/unit/vertex_langchain",
         "--ignore=tests/unit/vertex_ag2",
+        "--ignore=tests/unit/vertex_llama_index",
         "--ignore=tests/unit/architecture",
         os.path.join("tests", "unit"),
         *session.posargs,
@@ -331,6 +334,34 @@ def unit_ag2(session):
     )
 
 
+@nox.session(python=UNIT_TEST_LLAMA_INDEX_PYTHON_VERSIONS)
+def unit_llama_index(session):
+    # Install all test dependencies, then install this package in-place.
+
+    constraints_path = str(
+        CURRENT_DIRECTORY / "testing" / "constraints-llama-index.txt"
+    )
+    standard_deps = UNIT_TEST_STANDARD_DEPENDENCIES + UNIT_TEST_DEPENDENCIES
+    session.install(*standard_deps, "-c", constraints_path)
+
+    # Install llama_index extras
+    session.install("-e", ".[llama_index_testing]", "-c", constraints_path)
+
+    # Run py.test against the unit tests.
+    session.run(
+        "py.test",
+        "--quiet",
+        "--junitxml=unit_llama_index_sponge_log.xml",
+        "--cov=google",
+        "--cov-append",
+        "--cov-config=.coveragerc",
+        "--cov-report=",
+        "--cov-fail-under=0",
+        os.path.join("tests", "unit", "vertex_llama_index"),
+        *session.posargs,
+    )
+
+
 def install_systemtest_dependencies(session, *constraints):
     # Use pre-release gRPC for system tests.
     # Exclude version 1.52.0rc1 which has a known issue.
diff --git a/setup.py b/setup.py
@@ -193,6 +193,20 @@
     )
 )
 
+llama_index_extra_require = [
+    "llama-index",
+    "llama-index-llms-google-genai",
+    "openinference-instrumentation-llama-index >= 3.0, < 4.0",
+]
+
+llama_index_testing_extra_require = list(
+    set(
+        llama_index_extra_require
+        + reasoning_engine_extra_require
+        + ["absl-py", "pytest-xdist"]
+    )
+)
+
 tokenization_extra_require = ["sentencepiece >= 0.2.0"]
 tokenization_testing_extra_require = tokenization_extra_require + ["nltk"]
 
@@ -309,6 +323,8 @@
         "tokenization": tokenization_extra_require,
         "ag2": ag2_extra_require,
         "ag2_testing": ag2_testing_extra_require,
+        "llama_index": llama_index_extra_require,
+        "llama_index_testing": llama_index_testing_extra_require,
     },
     python_requires=">=3.8",
     classifiers=[
diff --git a/testing/constraints-llama-index.txt b/testing/constraints-llama-index.txt
@@ -0,0 +1 @@
+pydantic<2.10
diff --git a/tests/unit/vertex_llama_index/test_reasoning_engine_templates_llama_index.py b/tests/unit/vertex_llama_index/test_reasoning_engine_templates_llama_index.py
@@ -0,0 +1,234 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import importlib
+from unittest import mock
+import json
+
+from google import auth
+import vertexai
+from google.cloud.aiplatform import initializer
+from vertexai.preview.reasoning_engines.templates import llama_index
+from vertexai.reasoning_engines import _utils
+import pytest
+
+from llama_index.core import prompts
+from llama_index.core.base.llms import types
+
+_TEST_LOCATION = "us-central1"
+_TEST_PROJECT = "test-project"
+_TEST_MODEL = "gemini-1.0-pro"
+_TEST_SYSTEM_INSTRUCTION = "You are a helpful bot."
+
+
+@pytest.fixture(scope="module")
+def google_auth_mock():
+    with mock.patch.object(auth, "default") as google_auth_mock:
+        credentials_mock = mock.Mock()
+        credentials_mock.with_quota_project.return_value = None
+        google_auth_mock.return_value = (
+            credentials_mock,
+            _TEST_PROJECT,
+        )
+        yield google_auth_mock
+
+
+@pytest.fixture
+def vertexai_init_mock():
+    with mock.patch.object(vertexai, "init") as vertexai_init_mock:
+        yield vertexai_init_mock
+
+
+@pytest.fixture
+def json_loads_mock():
+    with mock.patch.object(json, "loads") as json_loads_mock:
+        yield json_loads_mock
+
+
+@pytest.fixture
+def model_builder_mock():
+    with mock.patch.object(
+        llama_index,
+        "_default_model_builder",
+    ) as model_builder_mock:
+        yield model_builder_mock
+
+
+@pytest.fixture
+def cloud_trace_exporter_mock():
+    with mock.patch.object(
+        _utils,
+        "_import_cloud_trace_exporter_or_warn",
+    ) as cloud_trace_exporter_mock:
+        yield cloud_trace_exporter_mock
+
+
+@pytest.fixture
+def tracer_provider_mock():
+    with mock.patch("opentelemetry.sdk.trace.TracerProvider") as tracer_provider_mock:
+        yield tracer_provider_mock
+
+
+@pytest.fixture
+def simple_span_processor_mock():
+    with mock.patch(
+        "opentelemetry.sdk.trace.export.SimpleSpanProcessor"
+    ) as simple_span_processor_mock:
+        yield simple_span_processor_mock
+
+
+@pytest.fixture
+def llama_index_instrumentor_mock():
+    with mock.patch.object(
+        _utils,
+        "_import_openinference_llama_index_or_warn",
+    ) as llama_index_instrumentor_mock:
+        yield llama_index_instrumentor_mock
+
+
+@pytest.fixture
+def llama_index_instrumentor_none_mock():
+    with mock.patch.object(
+        _utils,
+        "_import_openinference_llama_index_or_warn",
+    ) as llama_index_instrumentor_mock:
+        llama_index_instrumentor_mock.return_value = None
+        yield llama_index_instrumentor_mock
+
+
+@pytest.mark.usefixtures("google_auth_mock")
+class TestLlamaIndexQueryPipelineAgent:
+    def setup_method(self):
+        importlib.reload(initializer)
+        importlib.reload(vertexai)
+        vertexai.init(
+            project=_TEST_PROJECT,
+            location=_TEST_LOCATION,
+        )
+        self.prompt = prompts.ChatPromptTemplate(
+            message_templates=[
+                types.ChatMessage(
+                    role=types.MessageRole.SYSTEM,
+                    content=_TEST_SYSTEM_INSTRUCTION,
+                ),
+                types.ChatMessage(
+                    role=types.MessageRole.USER,
+                    content="{input}",
+                ),
+            ],
+        )
+
+    def teardown_method(self):
+        initializer.global_pool.shutdown(wait=True)
+
+    def test_initialization(self):
+        agent = llama_index.LlamaIndexQueryPipelineAgent(model=_TEST_MODEL)
+        assert agent._model_name == _TEST_MODEL
+        assert agent._project == _TEST_PROJECT
+        assert agent._location == _TEST_LOCATION
+        assert agent._runnable is None
+
+    def test_set_up(self):
+        agent = llama_index.LlamaIndexQueryPipelineAgent(
+            model=_TEST_MODEL,
+            prompt=self.prompt,
+            model_builder=lambda **kwargs: kwargs,
+            runnable_builder=lambda **kwargs: kwargs,
+        )
+        assert agent._runnable is None
+        agent.set_up()
+        assert agent._runnable is not None
+
+    def test_clone(self):
+        agent = llama_index.LlamaIndexQueryPipelineAgent(
+            model=_TEST_MODEL,
+            prompt=self.prompt,
+            model_builder=lambda **kwargs: kwargs,
+            runnable_builder=lambda **kwargs: kwargs,
+        )
+        agent.set_up()
+        assert agent._runnable is not None
+        agent_clone = agent.clone()
+        assert agent._runnable is not None
+        assert agent_clone._runnable is None
+        agent_clone.set_up()
+        assert agent_clone._runnable is not None
+
+    def test_query(self, json_loads_mock):
+        agent = llama_index.LlamaIndexQueryPipelineAgent(
+            model=_TEST_MODEL,
+            prompt=self.prompt,
+        )
+        agent._runnable = mock.Mock()
+        mocks = mock.Mock()
+        mocks.attach_mock(mock=agent._runnable, attribute="run")
+        agent.query(input="test query")
+        mocks.assert_has_calls([mock.call.run.run(input="test query")])
+
+    def test_query_with_kwargs(self, json_loads_mock):
+        agent = llama_index.LlamaIndexQueryPipelineAgent(
+            model=_TEST_MODEL,
+            prompt=self.prompt,
+        )
+        agent._runnable = mock.Mock()
+        mocks = mock.Mock()
+        mocks.attach_mock(mock=agent._runnable, attribute="run")
+        agent.query(input="test query", test_arg=123)
+        mocks.assert_has_calls([mock.call.run.run(input="test query", test_arg=123)])
+
+    def test_query_with_kwargs_and_input_dict(self, json_loads_mock):
+        agent = llama_index.LlamaIndexQueryPipelineAgent(
+            model=_TEST_MODEL,
+            prompt=self.prompt,
+        )
+        agent._runnable = mock.Mock()
+        mocks = mock.Mock()
+        mocks.attach_mock(mock=agent._runnable, attribute="run")
+        agent.query(input={"input": "test query"})
+        mocks.assert_has_calls([mock.call.run.run(input="test query")])
+
+    @pytest.mark.usefixtures("caplog")
+    def test_enable_tracing(
+        self,
+        caplog,
+        cloud_trace_exporter_mock,
+        tracer_provider_mock,
+        simple_span_processor_mock,
+        llama_index_instrumentor_mock,
+    ):
+        agent = llama_index.LlamaIndexQueryPipelineAgent(
+            model=_TEST_MODEL,
+            prompt=self.prompt,
+            enable_tracing=True,
+        )
+        assert agent._instrumentor is None
+        # TODO(b/384730642): Re-enable this test once the parent issue is fixed.
+        # agent.set_up()
+        # assert agent._instrumentor is not None
+        # assert (
+        #     "enable_tracing=True but proceeding with tracing disabled"
+        #     not in caplog.text
+        # )
+
+    @pytest.mark.usefixtures("caplog")
+    def test_enable_tracing_warning(self, caplog, llama_index_instrumentor_none_mock):
+        agent = llama_index.LlamaIndexQueryPipelineAgent(
+            model=_TEST_MODEL,
+            prompt=self.prompt,
+            enable_tracing=True,
+        )
+        assert agent._instrumentor is None
+        # TODO(b/384730642): Re-enable this test once the parent issue is fixed.
+        # agent.set_up()
+        # assert "enable_tracing=True but proceeding with tracing disabled" in caplog.text
diff --git a/vertexai/preview/reasoning_engines/__init__.py b/vertexai/preview/reasoning_engines/__init__.py
@@ -29,11 +29,15 @@
 from vertexai.preview.reasoning_engines.templates.langgraph import (
     LanggraphAgent,
 )
+from vertexai.preview.reasoning_engines.templates.llama_index import (
+    LlamaIndexQueryPipelineAgent,
+)
 
 __all__ = (
     "AG2Agent",
     "LangchainAgent",
     "LanggraphAgent",
+    "LlamaIndexQueryPipelineAgent",
     "Queryable",
     "ReasoningEngine",
 )
diff --git a/vertexai/preview/reasoning_engines/templates/llama_index.py b/vertexai/preview/reasoning_engines/templates/llama_index.py
diff --git a/vertexai/reasoning_engines/_utils.py b/vertexai/reasoning_engines/_utils.py