Skip to content

Commit a5800e0

Browse files
committed
add RAG local deploy support
1 parent f5bbca5 commit a5800e0

File tree

3 files changed

+29
-10
lines changed

3 files changed

+29
-10
lines changed

.env

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
LLM_LOCAL_BASE_URL=http://xinference:9997/v1
22
OLLAMA_BASE_URL=http://ollama:11434
33
HOSTING_CHECK_BASE_URL=http://127.0.0.1:8000
4-
4+
RAG_MODEL_DEPLOY=local

README.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,8 @@ Contexts
6262
- [ ] Filter out non-related contexts before send for verdict
6363

6464
### Toolchain
65+
- [ ] Evaluate MLOps pipeline
66+
- https://kitops.ml
6567
- [ ] Evaluate data quality of searching and url fetching. Better error handle.
6668
- [ ] Use multiple sources for factcheck.
6769

@@ -78,11 +80,17 @@ Contexts
7880
### Calculate
7981
- [ ] Shall we calculate percentage of true and false in the input? Any better calculation than items count?
8082

83+
## Issues
84+
- [ ] Uses many different types of models, diffcult for performance optimization and maintenance.
85+
8186
## References
8287
### Reports
8388
- [ ] AI-generated misinformation
8489
### Factcheck
8590
- https://www.bmi.bund.de/SharedDocs/schwerpunkte/EN/disinformation/examples-of-russian-disinformation-and-the-facts.html
91+
### Resources
92+
#### Inference
93+
- https://console.groq.com/docs/ (free tier)
8694

8795
## Thanks
8896
- Jina Reader: https://jina.ai

src/index.py

Lines changed: 20 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -24,17 +24,24 @@
2424
# todo: high lantency between client and the ollama embedding server will slow down embedding a lot
2525
from llama_index.embeddings.ollama import OllamaEmbedding
2626

27+
# set RAG model deploy mode
28+
RAG_MODEL_DEPLOY = os.environ.get("RAG_MODEL_DEPLOY") or "local"
29+
2730
def build_automerging_index(
2831
documents,
2932
llm,
30-
# embed_model="local:BAAI/bge-small-en-v1.5",
3133
chunk_sizes=None,
3234
):
3335
chunk_sizes = chunk_sizes or [2048, 512, 128]
34-
embed_model = OllamaEmbedding(
35-
model_name="jina/jina-embeddings-v2-base-en",
36-
base_url=os.environ.get("OLLAMA_BASE_URL"), # todo: any other configs here?
37-
)
36+
37+
if RAG_MODEL_DEPLOY == "local":
38+
embed_model="local:jinaai/jina-embeddings-v2-base-en"
39+
else:
40+
embed_model = OllamaEmbedding(
41+
model_name="jina/jina-embeddings-v2-base-en",
42+
base_url=os.environ.get("OLLAMA_BASE_URL"), # todo: any other configs here?
43+
)
44+
3845
node_parser = HierarchicalNodeParser.from_defaults(chunk_sizes=chunk_sizes)
3946
nodes = node_parser.get_nodes_from_documents(documents)
4047
leaf_nodes = get_leaf_nodes(nodes)
@@ -59,10 +66,14 @@ def get_automerging_query_engine(
5966
retriever = AutoMergingRetriever(
6067
base_retriever, automerging_index.storage_context, verbose=True
6168
)
62-
# rerank = SentenceTransformerRerank(
63-
# top_n=rerank_top_n, model="BAAI/bge-reranker-base"
64-
# )
65-
rerank = jinaai_rerank.JinaRerank(api_key='', top_n=rerank_top_n, model="jina-reranker-v2")
69+
70+
if RAG_MODEL_DEPLOY == "local":
71+
rerank = SentenceTransformerRerank(
72+
top_n=rerank_top_n, model="jinaai/jina-reranker-v2-base-multilingual"
73+
)
74+
else:
75+
rerank = jinaai_rerank.JinaRerank(api_key='', top_n=rerank_top_n, model="jina-reranker-v2")
76+
6677
auto_merging_engine = RetrieverQueryEngine.from_args(
6778
retriever, node_postprocessors=[rerank]
6879
)

0 commit comments

Comments
 (0)