Skip to content

Commit 83adc12

Browse files
authored
feat(RAG): Introduce SentenceTransformer Reranker (#1810)
1 parent f83abff commit 83adc12

File tree

7 files changed

+198
-8
lines changed

7 files changed

+198
-8
lines changed

fern/docs.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,8 @@ navigation:
6464
contents:
6565
- page: LLM Backends
6666
path: ./docs/pages/manual/llms.mdx
67+
- page: Reranking
68+
path: ./docs/pages/manual/reranker.mdx
6769
- section: User Interface
6870
contents:
6971
- page: User interface (Gradio) Manual

fern/docs/pages/manual/reranker.mdx

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
## Enhancing Response Quality with Reranking
2+
3+
PrivateGPT offers a reranking feature aimed at optimizing response generation by filtering out irrelevant documents, potentially leading to faster response times and enhanced relevance of answers generated by the LLM.
4+
5+
### Enabling Reranking
6+
7+
Document reranking can significantly improve the efficiency and quality of the responses by pre-selecting the most relevant documents before generating an answer. To leverage this feature, ensure that it is enabled in the RAG settings and consider adjusting the parameters to best fit your use case.
8+
9+
#### Additional Requirements
10+
11+
Before enabling reranking, you must install additional dependencies:
12+
13+
```bash
14+
poetry install --extras rerank-sentence-transformers
15+
```
16+
17+
This command installs dependencies for the cross-encoder reranker from sentence-transformers, which is currently the only supported method by PrivateGPT for document reranking.
18+
19+
#### Configuration
20+
21+
To enable and configure reranking, adjust the `rag` section within the `settings.yaml` file. Here are the key settings to consider:
22+
23+
- `similarity_top_k`: Determines the number of documents to initially retrieve and consider for reranking. This value should be larger than `top_n`.
24+
- `rerank`:
25+
- `enabled`: Set to `true` to activate the reranking feature.
26+
- `top_n`: Specifies the number of documents to use in the final answer generation process, chosen from the top-ranked documents provided by `similarity_top_k`.
27+
28+
Example configuration snippet:
29+
30+
```yaml
31+
rag:
32+
similarity_top_k: 10 # Number of documents to retrieve and consider for reranking
33+
rerank:
34+
enabled: true
35+
top_n: 3 # Number of top-ranked documents to use for generating the answer
36+
```

poetry.lock

Lines changed: 118 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

private_gpt/server/chat/chat_service.py

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from llama_index.core.indices.postprocessor import MetadataReplacementPostProcessor
1010
from llama_index.core.llms import ChatMessage, MessageRole
1111
from llama_index.core.postprocessor import (
12+
SentenceTransformerRerank,
1213
SimilarityPostprocessor,
1314
)
1415
from llama_index.core.storage import StorageContext
@@ -113,16 +114,24 @@ def _chat_engine(
113114
context_filter=context_filter,
114115
similarity_top_k=self.settings.rag.similarity_top_k,
115116
)
117+
node_postprocessors = [
118+
MetadataReplacementPostProcessor(target_metadata_key="window"),
119+
SimilarityPostprocessor(
120+
similarity_cutoff=settings.rag.similarity_value
121+
),
122+
]
123+
124+
if settings.rag.rerank.enabled:
125+
rerank_postprocessor = SentenceTransformerRerank(
126+
model=settings.rag.rerank.model, top_n=settings.rag.rerank.top_n
127+
)
128+
node_postprocessors.append(rerank_postprocessor)
129+
116130
return ContextChatEngine.from_defaults(
117131
system_prompt=system_prompt,
118132
retriever=vector_index_retriever,
119133
llm=self.llm_component.llm, # Takes no effect at the moment
120-
node_postprocessors=[
121-
MetadataReplacementPostProcessor(target_metadata_key="window"),
122-
SimilarityPostprocessor(
123-
similarity_cutoff=settings.rag.similarity_value
124-
),
125-
],
134+
node_postprocessors=node_postprocessors,
126135
)
127136
else:
128137
return SimpleChatEngine.from_defaults(

private_gpt/settings/settings.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -284,15 +284,31 @@ class UISettings(BaseModel):
284284
)
285285

286286

287+
class RerankSettings(BaseModel):
288+
enabled: bool = Field(
289+
False,
290+
description="This value controls whether a reranker should be included in the RAG pipeline.",
291+
)
292+
model: str = Field(
293+
"cross-encoder/ms-marco-MiniLM-L-2-v2",
294+
description="Rerank model to use. Limited to SentenceTransformer cross-encoder models.",
295+
)
296+
top_n: int = Field(
297+
2,
298+
description="This value controls the number of documents returned by the RAG pipeline.",
299+
)
300+
301+
287302
class RagSettings(BaseModel):
288303
similarity_top_k: int = Field(
289304
2,
290-
description="This value controls the number of documents returned by the RAG pipeline",
305+
description="This value controls the number of documents returned by the RAG pipeline or considered for reranking if enabled.",
291306
)
292307
similarity_value: float = Field(
293308
None,
294309
description="If set, any documents retrieved from the RAG must meet a certain match score. Acceptable values are between 0 and 1.",
295310
)
311+
rerank: RerankSettings
296312

297313

298314
class PostgresSettings(BaseModel):

pyproject.toml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,11 @@ asyncpg = {version="^0.29.0", optional = true}
3737

3838
# Optional Sagemaker dependency
3939
boto3 = {version ="^1.34.51", optional = true}
40+
41+
# Optional Reranker dependencies
42+
torch = {version ="^2.1.2", optional = true}
43+
sentence-transformers = {version ="^2.6.1", optional = true}
44+
4045
# Optional UI
4146
gradio = {version ="^4.19.2", optional = true}
4247

@@ -57,6 +62,7 @@ vector-stores-qdrant = ["llama-index-vector-stores-qdrant"]
5762
vector-stores-chroma = ["llama-index-vector-stores-chroma"]
5863
vector-stores-postgres = ["llama-index-vector-stores-postgres"]
5964
storage-nodestore-postgres = ["llama-index-storage-docstore-postgres","llama-index-storage-index-store-postgres","psycopg2-binary","asyncpg"]
65+
rerank-sentence-transformers = ["torch", "sentence-transformers"]
6066

6167
[tool.poetry.group.dev.dependencies]
6268
black = "^22"

settings.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,10 @@ rag:
4747
#This value controls how many "top" documents the RAG returns to use in the context.
4848
#similarity_value: 0.45
4949
#This value is disabled by default. If you enable this settings, the RAG will only use articles that meet a certain percentage score.
50+
rerank:
51+
enabled: false
52+
model: cross-encoder/ms-marco-MiniLM-L-2-v2
53+
top_n: 1
5054

5155
llamacpp:
5256
prompt_style: "mistral"

0 commit comments

Comments
 (0)