[Runtime] Reduce dependency to speed up CI and reduce image size (#3195)

xingyaoww · web-flow · commit 1d49ef253ba0 · 2024-07-31T13:55:09.000-04:00
* reduce dependency for runtime

* try making llama-index an optional dependency that's not installed by default

* do not install llama-index in CI

* do not install llama-index in the app docker as well
diff --git a/.github/workflows/dummy-agent-test.yml b/.github/workflows/dummy-agent-test.yml
@@ -25,7 +25,7 @@ jobs:
       - name: Set up environment
         run: |
           curl -sSL https://install.python-poetry.org | python3 -
-          poetry install --without evaluation
+          poetry install --without evaluation,llama-index
           poetry run playwright install --with-deps chromium
           wget https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/1_Pooling/config.json -P /tmp/llama_index/models--BAAI--bge-small-en-v1.5/snapshots/5c38ec7c405ec4b44b94cc5a9bb96e735b38267a/1_Pooling/
       - name: Run tests
diff --git a/.github/workflows/review-pr.yml b/.github/workflows/review-pr.yml
@@ -50,7 +50,7 @@ jobs:
       run: |
         curl -sSL https://install.python-poetry.org | python3 -
         export PATH="/github/home/.local/bin:$PATH"
-        poetry install --without evaluation
+        poetry install --without evaluation,llama-index
         poetry run playwright install --with-deps chromium
 
     - name: Run OpenDevin
diff --git a/.github/workflows/run-unit-tests.yml b/.github/workflows/run-unit-tests.yml
@@ -70,7 +70,7 @@ jobs:
           cache: "poetry"
 
       - name: Install Python dependencies using Poetry
-        run: poetry install
+        run: poetry install --without evaluation,llama-index
 
       - name: Install & Start Docker
         if: env.INSTALL_DOCKER == '1'
@@ -153,7 +153,7 @@ jobs:
           cache: "poetry"
 
       - name: Install Python dependencies using Poetry
-        run: poetry install --without evaluation
+        run: poetry install --without evaluation,llama-index
 
       - name: Build Environment
         run: make build
diff --git a/.github/workflows/solve-issue.yml b/.github/workflows/solve-issue.yml
@@ -40,7 +40,7 @@ jobs:
       run: |
         curl -sSL https://install.python-poetry.org | python3 -
         export PATH="/github/home/.local/bin:$PATH"
-        poetry install --without evaluation
+        poetry install --without evaluation,llama-index
         poetry run playwright install --with-deps chromium
 
 
diff --git a/Makefile b/Makefile
@@ -141,7 +141,7 @@ install-python-dependencies:
 		export HNSWLIB_NO_NATIVE=1; \
 		poetry run pip install chroma-hnswlib; \
 	fi
-	@poetry install
+	@poetry install --without llama-index
 	@if [ -f "/etc/manjaro-release" ]; then \
 		echo "$(BLUE)Detected Manjaro Linux. Installing Playwright dependencies...$(RESET)"; \
 		poetry run pip install playwright; \
diff --git a/containers/app/Dockerfile b/containers/app/Dockerfile
@@ -26,7 +26,7 @@ RUN apt-get update -y \
 
 COPY ./pyproject.toml ./poetry.lock ./
 RUN touch README.md
-RUN export POETRY_CACHE_DIR && poetry install --without evaluation --no-root && rm -rf $POETRY_CACHE_DIR
+RUN export POETRY_CACHE_DIR && poetry install --without evaluation,llama-index --no-root && rm -rf $POETRY_CACHE_DIR
 
 FROM python:3.12.3-slim AS runtime
 
diff --git a/opendevin/memory/memory.py b/opendevin/memory/memory.py
@@ -1,10 +1,5 @@
 import threading
 
-import chromadb
-import llama_index.embeddings.openai.base as llama_openai
-from llama_index.core import Document, VectorStoreIndex
-from llama_index.core.retrievers import VectorIndexRetriever
-from llama_index.vector_stores.chroma import ChromaVectorStore
 from openai._exceptions import APIConnectionError, InternalServerError, RateLimitError
 from tenacity import (
     retry,
@@ -17,101 +12,115 @@
 from opendevin.core.logger import opendevin_logger as logger
 from opendevin.core.utils import json
 
-# TODO: this could be made configurable
-num_retries: int = 10
-retry_min_wait: int = 3
-retry_max_wait: int = 300
-
-# llama-index includes a retry decorator around openai.get_embeddings() function
-# it is initialized with hard-coded values and errors
-# this non-customizable behavior is creating issues when it's retrying faster than providers' rate limits
-# this block attempts to banish it and replace it with our decorator, to allow users to set their own limits
-
-if hasattr(llama_openai.get_embeddings, '__wrapped__'):
-    original_get_embeddings = llama_openai.get_embeddings.__wrapped__
-else:
-    logger.warning('Cannot set custom retry limits.')
-    num_retries = 1
-    original_get_embeddings = llama_openai.get_embeddings
-
-
-def attempt_on_error(retry_state):
-    logger.error(
-        f'{retry_state.outcome.exception()}. Attempt #{retry_state.attempt_number} | You can customize these settings in the configuration.',
-        exc_info=False,
+try:
+    import chromadb
+    import llama_index.embeddings.openai.base as llama_openai
+    from llama_index.core import Document, VectorStoreIndex
+    from llama_index.core.retrievers import VectorIndexRetriever
+    from llama_index.vector_stores.chroma import ChromaVectorStore
+
+    LLAMA_INDEX_AVAILABLE = True
+except ImportError:
+    LLAMA_INDEX_AVAILABLE = False
+
+if LLAMA_INDEX_AVAILABLE:
+    # TODO: this could be made configurable
+    num_retries: int = 10
+    retry_min_wait: int = 3
+    retry_max_wait: int = 300
+
+    # llama-index includes a retry decorator around openai.get_embeddings() function
+    # it is initialized with hard-coded values and errors
+    # this non-customizable behavior is creating issues when it's retrying faster than providers' rate limits
+    # this block attempts to banish it and replace it with our decorator, to allow users to set their own limits
+
+    if hasattr(llama_openai.get_embeddings, '__wrapped__'):
+        original_get_embeddings = llama_openai.get_embeddings.__wrapped__
+    else:
+        logger.warning('Cannot set custom retry limits.')
+        num_retries = 1
+        original_get_embeddings = llama_openai.get_embeddings
+
+    def attempt_on_error(retry_state):
+        logger.error(
+            f'{retry_state.outcome.exception()}. Attempt #{retry_state.attempt_number} | You can customize these settings in the configuration.',
+            exc_info=False,
+        )
+        return None
+
+    @retry(
+        reraise=True,
+        stop=stop_after_attempt(num_retries),
+        wait=wait_random_exponential(min=retry_min_wait, max=retry_max_wait),
+        retry=retry_if_exception_type(
+            (RateLimitError, APIConnectionError, InternalServerError)
+        ),
+        after=attempt_on_error,
     )
-    return None
-
-
-@retry(
-    reraise=True,
-    stop=stop_after_attempt(num_retries),
-    wait=wait_random_exponential(min=retry_min_wait, max=retry_max_wait),
-    retry=retry_if_exception_type(
-        (RateLimitError, APIConnectionError, InternalServerError)
-    ),
-    after=attempt_on_error,
-)
-def wrapper_get_embeddings(*args, **kwargs):
-    return original_get_embeddings(*args, **kwargs)
-
-
-llama_openai.get_embeddings = wrapper_get_embeddings
-
-
-class EmbeddingsLoader:
-    """Loader for embedding model initialization."""
-
-    @staticmethod
-    def get_embedding_model(strategy: str, llm_config: LLMConfig):
-        supported_ollama_embed_models = [
-            'llama2',
-            'mxbai-embed-large',
-            'nomic-embed-text',
-            'all-minilm',
-            'stable-code',
-        ]
-        if strategy in supported_ollama_embed_models:
-            from llama_index.embeddings.ollama import OllamaEmbedding
-
-            return OllamaEmbedding(
-                model_name=strategy,
-                base_url=llm_config.embedding_base_url,
-                ollama_additional_kwargs={'mirostat': 0},
-            )
-        elif strategy == 'openai':
-            from llama_index.embeddings.openai import OpenAIEmbedding
-
-            return OpenAIEmbedding(
-                model='text-embedding-ada-002',
-                api_key=llm_config.api_key,
-            )
-        elif strategy == 'azureopenai':
-            from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding
-
-            return AzureOpenAIEmbedding(
-                model='text-embedding-ada-002',
-                deployment_name=llm_config.embedding_deployment_name,
-                api_key=llm_config.api_key,
-                azure_endpoint=llm_config.base_url,
-                api_version=llm_config.api_version,
-            )
-        elif (strategy is not None) and (strategy.lower() == 'none'):
-            # TODO: this works but is not elegant enough. The incentive is when
-            # an agent using embeddings is not used, there is no reason we need to
-            # initialize an embedding model
-            return None
-        else:
-            from llama_index.embeddings.huggingface import HuggingFaceEmbedding
-
-            return HuggingFaceEmbedding(model_name='BAAI/bge-small-en-v1.5')
+    def wrapper_get_embeddings(*args, **kwargs):
+        return original_get_embeddings(*args, **kwargs)
+
+    llama_openai.get_embeddings = wrapper_get_embeddings
+
+    class EmbeddingsLoader:
+        """Loader for embedding model initialization."""
+
+        @staticmethod
+        def get_embedding_model(strategy: str, llm_config: LLMConfig):
+            supported_ollama_embed_models = [
+                'llama2',
+                'mxbai-embed-large',
+                'nomic-embed-text',
+                'all-minilm',
+                'stable-code',
+            ]
+            if strategy in supported_ollama_embed_models:
+                from llama_index.embeddings.ollama import OllamaEmbedding
+
+                return OllamaEmbedding(
+                    model_name=strategy,
+                    base_url=llm_config.embedding_base_url,
+                    ollama_additional_kwargs={'mirostat': 0},
+                )
+            elif strategy == 'openai':
+                from llama_index.embeddings.openai import OpenAIEmbedding
+
+                return OpenAIEmbedding(
+                    model='text-embedding-ada-002',
+                    api_key=llm_config.api_key,
+                )
+            elif strategy == 'azureopenai':
+                from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding
+
+                return AzureOpenAIEmbedding(
+                    model='text-embedding-ada-002',
+                    deployment_name=llm_config.embedding_deployment_name,
+                    api_key=llm_config.api_key,
+                    azure_endpoint=llm_config.base_url,
+                    api_version=llm_config.api_version,
+                )
+            elif (strategy is not None) and (strategy.lower() == 'none'):
+                # TODO: this works but is not elegant enough. The incentive is when
+                # an agent using embeddings is not used, there is no reason we need to
+                # initialize an embedding model
+                return None
+            else:
+                from llama_index.embeddings.huggingface import HuggingFaceEmbedding
+
+                return HuggingFaceEmbedding(model_name='BAAI/bge-small-en-v1.5')
 
 
 class LongTermMemory:
     """Handles storing information for the agent to access later, using chromadb."""
 
     def __init__(self, llm_config: LLMConfig, memory_max_threads: int = 1):
         """Initialize the chromadb and set up ChromaVectorStore for later use."""
+        if not LLAMA_INDEX_AVAILABLE:
+            raise ImportError(
+                'llama_index and its dependencies are not installed. '
+                'To use LongTermMemory, please run: poetry install --with llama-index'
+            )
+
         db = chromadb.Client(chromadb.Settings(anonymized_telemetry=False))
         self.collection = db.get_or_create_collection(name='memories')
         vector_store = ChromaVectorStore(chroma_collection=self.collection)
diff --git a/opendevin/runtime/utils/runtime_templates/Dockerfile.j2 b/opendevin/runtime/utils/runtime_templates/Dockerfile.j2
@@ -56,7 +56,7 @@ RUN mv /opendevin/{{ source_code_dirname }} /opendevin/code
 # 3. Clear poetry, apt, mamba caches
 RUN cd /opendevin/code && \
     /opendevin/miniforge3/bin/mamba run -n base poetry env use python3.11 && \
-    /opendevin/miniforge3/bin/mamba run -n base poetry install --no-interaction --no-root && \
+    /opendevin/miniforge3/bin/mamba run -n base poetry install --only main,runtime --no-interaction --no-root && \
     apt-get update && \
     /opendevin/miniforge3/bin/mamba run -n base poetry run pip install playwright && \
     /opendevin/miniforge3/bin/mamba run -n base poetry run playwright install --with-deps chromium && \
diff --git a/pyproject.toml b/pyproject.toml
@@ -85,6 +85,7 @@ python-pptx = "*"
 pylatexenc = "*"
 opencv-python = "*"
 
+
 [build-system]
 build-backend = "poetry.core.masonry.api"
 requires = [