|
1 | 1 | import threading
|
2 | 2 |
|
3 |
| -import chromadb |
4 |
| -import llama_index.embeddings.openai.base as llama_openai |
5 |
| -from llama_index.core import Document, VectorStoreIndex |
6 |
| -from llama_index.core.retrievers import VectorIndexRetriever |
7 |
| -from llama_index.vector_stores.chroma import ChromaVectorStore |
8 | 3 | from openai._exceptions import APIConnectionError, InternalServerError, RateLimitError
|
9 | 4 | from tenacity import (
|
10 | 5 | retry,
|
|
17 | 12 | from opendevin.core.logger import opendevin_logger as logger
|
18 | 13 | from opendevin.core.utils import json
|
19 | 14 |
|
20 |
| -# TODO: this could be made configurable |
21 |
| -num_retries: int = 10 |
22 |
| -retry_min_wait: int = 3 |
23 |
| -retry_max_wait: int = 300 |
24 |
| - |
25 |
| -# llama-index includes a retry decorator around openai.get_embeddings() function |
26 |
| -# it is initialized with hard-coded values and errors |
27 |
| -# this non-customizable behavior is creating issues when it's retrying faster than providers' rate limits |
28 |
| -# this block attempts to banish it and replace it with our decorator, to allow users to set their own limits |
29 |
| - |
30 |
| -if hasattr(llama_openai.get_embeddings, '__wrapped__'): |
31 |
| - original_get_embeddings = llama_openai.get_embeddings.__wrapped__ |
32 |
| -else: |
33 |
| - logger.warning('Cannot set custom retry limits.') |
34 |
| - num_retries = 1 |
35 |
| - original_get_embeddings = llama_openai.get_embeddings |
36 |
| - |
37 |
| - |
38 |
| -def attempt_on_error(retry_state): |
39 |
| - logger.error( |
40 |
| - f'{retry_state.outcome.exception()}. Attempt #{retry_state.attempt_number} | You can customize these settings in the configuration.', |
41 |
| - exc_info=False, |
| 15 | +try: |
| 16 | + import chromadb |
| 17 | + import llama_index.embeddings.openai.base as llama_openai |
| 18 | + from llama_index.core import Document, VectorStoreIndex |
| 19 | + from llama_index.core.retrievers import VectorIndexRetriever |
| 20 | + from llama_index.vector_stores.chroma import ChromaVectorStore |
| 21 | + |
| 22 | + LLAMA_INDEX_AVAILABLE = True |
| 23 | +except ImportError: |
| 24 | + LLAMA_INDEX_AVAILABLE = False |
| 25 | + |
| 26 | +if LLAMA_INDEX_AVAILABLE: |
| 27 | + # TODO: this could be made configurable |
| 28 | + num_retries: int = 10 |
| 29 | + retry_min_wait: int = 3 |
| 30 | + retry_max_wait: int = 300 |
| 31 | + |
| 32 | + # llama-index includes a retry decorator around openai.get_embeddings() function |
| 33 | + # it is initialized with hard-coded values and errors |
| 34 | + # this non-customizable behavior is creating issues when it's retrying faster than providers' rate limits |
| 35 | + # this block attempts to banish it and replace it with our decorator, to allow users to set their own limits |
| 36 | + |
| 37 | + if hasattr(llama_openai.get_embeddings, '__wrapped__'): |
| 38 | + original_get_embeddings = llama_openai.get_embeddings.__wrapped__ |
| 39 | + else: |
| 40 | + logger.warning('Cannot set custom retry limits.') |
| 41 | + num_retries = 1 |
| 42 | + original_get_embeddings = llama_openai.get_embeddings |
| 43 | + |
| 44 | + def attempt_on_error(retry_state): |
| 45 | + logger.error( |
| 46 | + f'{retry_state.outcome.exception()}. Attempt #{retry_state.attempt_number} | You can customize these settings in the configuration.', |
| 47 | + exc_info=False, |
| 48 | + ) |
| 49 | + return None |
| 50 | + |
| 51 | + @retry( |
| 52 | + reraise=True, |
| 53 | + stop=stop_after_attempt(num_retries), |
| 54 | + wait=wait_random_exponential(min=retry_min_wait, max=retry_max_wait), |
| 55 | + retry=retry_if_exception_type( |
| 56 | + (RateLimitError, APIConnectionError, InternalServerError) |
| 57 | + ), |
| 58 | + after=attempt_on_error, |
42 | 59 | )
|
43 |
| - return None |
44 |
| - |
45 |
| - |
46 |
| -@retry( |
47 |
| - reraise=True, |
48 |
| - stop=stop_after_attempt(num_retries), |
49 |
| - wait=wait_random_exponential(min=retry_min_wait, max=retry_max_wait), |
50 |
| - retry=retry_if_exception_type( |
51 |
| - (RateLimitError, APIConnectionError, InternalServerError) |
52 |
| - ), |
53 |
| - after=attempt_on_error, |
54 |
| -) |
55 |
| -def wrapper_get_embeddings(*args, **kwargs): |
56 |
| - return original_get_embeddings(*args, **kwargs) |
57 |
| - |
58 |
| - |
59 |
| -llama_openai.get_embeddings = wrapper_get_embeddings |
60 |
| - |
61 |
| - |
62 |
| -class EmbeddingsLoader: |
63 |
| - """Loader for embedding model initialization.""" |
64 |
| - |
65 |
| - @staticmethod |
66 |
| - def get_embedding_model(strategy: str, llm_config: LLMConfig): |
67 |
| - supported_ollama_embed_models = [ |
68 |
| - 'llama2', |
69 |
| - 'mxbai-embed-large', |
70 |
| - 'nomic-embed-text', |
71 |
| - 'all-minilm', |
72 |
| - 'stable-code', |
73 |
| - ] |
74 |
| - if strategy in supported_ollama_embed_models: |
75 |
| - from llama_index.embeddings.ollama import OllamaEmbedding |
76 |
| - |
77 |
| - return OllamaEmbedding( |
78 |
| - model_name=strategy, |
79 |
| - base_url=llm_config.embedding_base_url, |
80 |
| - ollama_additional_kwargs={'mirostat': 0}, |
81 |
| - ) |
82 |
| - elif strategy == 'openai': |
83 |
| - from llama_index.embeddings.openai import OpenAIEmbedding |
84 |
| - |
85 |
| - return OpenAIEmbedding( |
86 |
| - model='text-embedding-ada-002', |
87 |
| - api_key=llm_config.api_key, |
88 |
| - ) |
89 |
| - elif strategy == 'azureopenai': |
90 |
| - from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding |
91 |
| - |
92 |
| - return AzureOpenAIEmbedding( |
93 |
| - model='text-embedding-ada-002', |
94 |
| - deployment_name=llm_config.embedding_deployment_name, |
95 |
| - api_key=llm_config.api_key, |
96 |
| - azure_endpoint=llm_config.base_url, |
97 |
| - api_version=llm_config.api_version, |
98 |
| - ) |
99 |
| - elif (strategy is not None) and (strategy.lower() == 'none'): |
100 |
| - # TODO: this works but is not elegant enough. The incentive is when |
101 |
| - # an agent using embeddings is not used, there is no reason we need to |
102 |
| - # initialize an embedding model |
103 |
| - return None |
104 |
| - else: |
105 |
| - from llama_index.embeddings.huggingface import HuggingFaceEmbedding |
106 |
| - |
107 |
| - return HuggingFaceEmbedding(model_name='BAAI/bge-small-en-v1.5') |
| 60 | + def wrapper_get_embeddings(*args, **kwargs): |
| 61 | + return original_get_embeddings(*args, **kwargs) |
| 62 | + |
| 63 | + llama_openai.get_embeddings = wrapper_get_embeddings |
| 64 | + |
| 65 | + class EmbeddingsLoader: |
| 66 | + """Loader for embedding model initialization.""" |
| 67 | + |
| 68 | + @staticmethod |
| 69 | + def get_embedding_model(strategy: str, llm_config: LLMConfig): |
| 70 | + supported_ollama_embed_models = [ |
| 71 | + 'llama2', |
| 72 | + 'mxbai-embed-large', |
| 73 | + 'nomic-embed-text', |
| 74 | + 'all-minilm', |
| 75 | + 'stable-code', |
| 76 | + ] |
| 77 | + if strategy in supported_ollama_embed_models: |
| 78 | + from llama_index.embeddings.ollama import OllamaEmbedding |
| 79 | + |
| 80 | + return OllamaEmbedding( |
| 81 | + model_name=strategy, |
| 82 | + base_url=llm_config.embedding_base_url, |
| 83 | + ollama_additional_kwargs={'mirostat': 0}, |
| 84 | + ) |
| 85 | + elif strategy == 'openai': |
| 86 | + from llama_index.embeddings.openai import OpenAIEmbedding |
| 87 | + |
| 88 | + return OpenAIEmbedding( |
| 89 | + model='text-embedding-ada-002', |
| 90 | + api_key=llm_config.api_key, |
| 91 | + ) |
| 92 | + elif strategy == 'azureopenai': |
| 93 | + from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding |
| 94 | + |
| 95 | + return AzureOpenAIEmbedding( |
| 96 | + model='text-embedding-ada-002', |
| 97 | + deployment_name=llm_config.embedding_deployment_name, |
| 98 | + api_key=llm_config.api_key, |
| 99 | + azure_endpoint=llm_config.base_url, |
| 100 | + api_version=llm_config.api_version, |
| 101 | + ) |
| 102 | + elif (strategy is not None) and (strategy.lower() == 'none'): |
| 103 | + # TODO: this works but is not elegant enough. The incentive is when |
| 104 | + # an agent using embeddings is not used, there is no reason we need to |
| 105 | + # initialize an embedding model |
| 106 | + return None |
| 107 | + else: |
| 108 | + from llama_index.embeddings.huggingface import HuggingFaceEmbedding |
| 109 | + |
| 110 | + return HuggingFaceEmbedding(model_name='BAAI/bge-small-en-v1.5') |
108 | 111 |
|
109 | 112 |
|
110 | 113 | class LongTermMemory:
|
111 | 114 | """Handles storing information for the agent to access later, using chromadb."""
|
112 | 115 |
|
113 | 116 | def __init__(self, llm_config: LLMConfig, memory_max_threads: int = 1):
|
114 | 117 | """Initialize the chromadb and set up ChromaVectorStore for later use."""
|
| 118 | + if not LLAMA_INDEX_AVAILABLE: |
| 119 | + raise ImportError( |
| 120 | + 'llama_index and its dependencies are not installed. ' |
| 121 | + 'To use LongTermMemory, please run: poetry install --with llama-index' |
| 122 | + ) |
| 123 | + |
115 | 124 | db = chromadb.Client(chromadb.Settings(anonymized_telemetry=False))
|
116 | 125 | self.collection = db.get_or_create_collection(name='memories')
|
117 | 126 | vector_store = ChromaVectorStore(chroma_collection=self.collection)
|
|
0 commit comments