24
24
# todo: high lantency between client and the ollama embedding server will slow down embedding a lot
25
25
from llama_index .embeddings .ollama import OllamaEmbedding
26
26
27
+ # set RAG model deploy mode
28
+ RAG_MODEL_DEPLOY = os .environ .get ("RAG_MODEL_DEPLOY" ) or "local"
29
+
27
30
def build_automerging_index (
28
31
documents ,
29
32
llm ,
30
- # embed_model="local:BAAI/bge-small-en-v1.5",
31
33
chunk_sizes = None ,
32
34
):
33
35
chunk_sizes = chunk_sizes or [2048 , 512 , 128 ]
34
- embed_model = OllamaEmbedding (
35
- model_name = "jina/jina-embeddings-v2-base-en" ,
36
- base_url = os .environ .get ("OLLAMA_BASE_URL" ), # todo: any other configs here?
37
- )
36
+
37
+ if RAG_MODEL_DEPLOY == "local" :
38
+ embed_model = "local:jinaai/jina-embeddings-v2-base-en"
39
+ else :
40
+ embed_model = OllamaEmbedding (
41
+ model_name = "jina/jina-embeddings-v2-base-en" ,
42
+ base_url = os .environ .get ("OLLAMA_BASE_URL" ), # todo: any other configs here?
43
+ )
44
+
38
45
node_parser = HierarchicalNodeParser .from_defaults (chunk_sizes = chunk_sizes )
39
46
nodes = node_parser .get_nodes_from_documents (documents )
40
47
leaf_nodes = get_leaf_nodes (nodes )
@@ -59,10 +66,14 @@ def get_automerging_query_engine(
59
66
retriever = AutoMergingRetriever (
60
67
base_retriever , automerging_index .storage_context , verbose = True
61
68
)
62
- # rerank = SentenceTransformerRerank(
63
- # top_n=rerank_top_n, model="BAAI/bge-reranker-base"
64
- # )
65
- rerank = jinaai_rerank .JinaRerank (api_key = '' , top_n = rerank_top_n , model = "jina-reranker-v2" )
69
+
70
+ if RAG_MODEL_DEPLOY == "local" :
71
+ rerank = SentenceTransformerRerank (
72
+ top_n = rerank_top_n , model = "jinaai/jina-reranker-v2-base-multilingual"
73
+ )
74
+ else :
75
+ rerank = jinaai_rerank .JinaRerank (api_key = '' , top_n = rerank_top_n , model = "jina-reranker-v2" )
76
+
66
77
auto_merging_engine = RetrieverQueryEngine .from_args (
67
78
retriever , node_postprocessors = [rerank ]
68
79
)
0 commit comments