Skip to content

Commit e36b4ba

Browse files
committed
chore: CohereRanker - change default model and remove max_chunks_per_doc parameter
1 parent cf5aaa7 commit e36b4ba

File tree

2 files changed

+12
-38
lines changed

2 files changed

+12
-38
lines changed

integrations/cohere/src/haystack_integrations/components/rankers/cohere/ranker.py

+3-20
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ class CohereRanker:
2222
from haystack import Document
2323
from haystack.components.rankers import CohereRanker
2424
25-
ranker = CohereRanker(model="rerank-english-v2.0", top_k=2)
25+
ranker = CohereRanker(model="rerank-v3.5", top_k=2)
2626
2727
docs = [Document(content="Paris"), Document(content="Berlin")]
2828
query = "What is the capital of germany?"
@@ -33,11 +33,10 @@ class CohereRanker:
3333

3434
def __init__(
3535
self,
36-
model: str = "rerank-english-v2.0",
36+
model: str = "rerank-v3.5",
3737
top_k: int = 10,
3838
api_key: Secret = Secret.from_env_var(["COHERE_API_KEY", "CO_API_KEY"]),
3939
api_base_url: str = "https://api.cohere.com",
40-
max_chunks_per_doc: Optional[int] = None,
4140
meta_fields_to_embed: Optional[List[str]] = None,
4241
meta_data_separator: str = "\n",
4342
max_tokens_per_doc: int = 4096,
@@ -49,11 +48,6 @@ def __init__(
4948
:param top_k: The maximum number of documents to return.
5049
:param api_key: Cohere API key.
5150
:param api_base_url: the base URL of the Cohere API.
52-
:param max_chunks_per_doc: If your document exceeds 512 tokens, this determines the maximum number of
53-
chunks a document can be split into. If `None`, the default of 10 is used.
54-
For example, if your document is 6000 tokens, with the default of 10, the document will be split into 10
55-
chunks each of 512 tokens and the last 880 tokens will be disregarded.
56-
Check [Cohere docs](https://docs.cohere.com/docs/reranking-best-practices) for more information.
5751
:param meta_fields_to_embed: List of meta fields that should be concatenated
5852
with the document content for reranking.
5953
:param meta_data_separator: Separator used to concatenate the meta fields
@@ -64,20 +58,10 @@ def __init__(
6458
self.api_key = api_key
6559
self.api_base_url = api_base_url
6660
self.top_k = top_k
67-
self.max_chunks_per_doc = max_chunks_per_doc
6861
self.meta_fields_to_embed = meta_fields_to_embed or []
6962
self.meta_data_separator = meta_data_separator
7063
self.max_tokens_per_doc = max_tokens_per_doc
71-
if max_chunks_per_doc is not None:
72-
# Note: max_chunks_per_doc is currently not supported by the Cohere V2 API
73-
# See: https://docs.cohere.com/reference/rerank
74-
import warnings
75-
76-
warnings.warn(
77-
"The max_chunks_per_doc parameter currently has no effect as it is not supported by the Cohere V2 API.",
78-
UserWarning,
79-
stacklevel=2,
80-
)
64+
8165
self._cohere_client = cohere.ClientV2(
8266
api_key=self.api_key.resolve_value(), base_url=self.api_base_url, client_name="haystack"
8367
)
@@ -95,7 +79,6 @@ def to_dict(self) -> Dict[str, Any]:
9579
api_key=self.api_key.to_dict() if self.api_key else None,
9680
api_base_url=self.api_base_url,
9781
top_k=self.top_k,
98-
max_chunks_per_doc=self.max_chunks_per_doc,
9982
meta_fields_to_embed=self.meta_fields_to_embed,
10083
meta_data_separator=self.meta_data_separator,
10184
max_tokens_per_doc=self.max_tokens_per_doc,

integrations/cohere/tests/test_cohere_ranker.py

+9-18
Original file line numberDiff line numberDiff line change
@@ -41,11 +41,10 @@ class TestCohereRanker:
4141
def test_init_default(self, monkeypatch):
4242
monkeypatch.setenv("CO_API_KEY", "test-api-key")
4343
component = CohereRanker()
44-
assert component.model_name == "rerank-english-v2.0"
44+
assert component.model_name == "rerank-v3.5"
4545
assert component.top_k == 10
4646
assert component.api_key == Secret.from_env_var(["COHERE_API_KEY", "CO_API_KEY"])
4747
assert component.api_base_url == COHERE_API_URL
48-
assert component.max_chunks_per_doc is None
4948
assert component.meta_fields_to_embed == []
5049
assert component.meta_data_separator == "\n"
5150
assert component.max_tokens_per_doc == 4096
@@ -59,20 +58,18 @@ def test_init_fail_wo_api_key(self, monkeypatch):
5958
def test_init_with_parameters(self, monkeypatch):
6059
monkeypatch.setenv("CO_API_KEY", "test-api-key")
6160
component = CohereRanker(
62-
model="rerank-multilingual-v2.0",
61+
model="rerank-multilingual-v3.0",
6362
top_k=5,
6463
api_key=Secret.from_env_var(["COHERE_API_KEY", "CO_API_KEY"]),
6564
api_base_url="test-base-url",
66-
max_chunks_per_doc=40,
6765
meta_fields_to_embed=["meta_field_1", "meta_field_2"],
6866
meta_data_separator=",",
6967
max_tokens_per_doc=100,
7068
)
71-
assert component.model_name == "rerank-multilingual-v2.0"
69+
assert component.model_name == "rerank-multilingual-v3.0"
7270
assert component.top_k == 5
7371
assert component.api_key == Secret.from_env_var(["COHERE_API_KEY", "CO_API_KEY"])
7472
assert component.api_base_url == "test-base-url"
75-
assert component.max_chunks_per_doc == 40
7673
assert component.meta_fields_to_embed == ["meta_field_1", "meta_field_2"]
7774
assert component.meta_data_separator == ","
7875
assert component.max_tokens_per_doc == 100
@@ -84,11 +81,10 @@ def test_to_dict_default(self, monkeypatch):
8481
assert data == {
8582
"type": "haystack_integrations.components.rankers.cohere.ranker.CohereRanker",
8683
"init_parameters": {
87-
"model": "rerank-english-v2.0",
84+
"model": "rerank-v3.5",
8885
"api_key": {"env_vars": ["COHERE_API_KEY", "CO_API_KEY"], "strict": True, "type": "env_var"},
8986
"api_base_url": COHERE_API_URL,
9087
"top_k": 10,
91-
"max_chunks_per_doc": None,
9288
"meta_fields_to_embed": [],
9389
"meta_data_separator": "\n",
9490
"max_tokens_per_doc": 4096,
@@ -98,11 +94,10 @@ def test_to_dict_default(self, monkeypatch):
9894
def test_to_dict_with_parameters(self, monkeypatch):
9995
monkeypatch.setenv("CO_API_KEY", "test-api-key")
10096
component = CohereRanker(
101-
model="rerank-multilingual-v2.0",
97+
model="rerank-multilingual-v3.0",
10298
top_k=2,
10399
api_key=Secret.from_env_var(["COHERE_API_KEY", "CO_API_KEY"]),
104100
api_base_url="test-base-url",
105-
max_chunks_per_doc=50,
106101
meta_fields_to_embed=["meta_field_1", "meta_field_2"],
107102
meta_data_separator=",",
108103
max_tokens_per_doc=100,
@@ -111,11 +106,10 @@ def test_to_dict_with_parameters(self, monkeypatch):
111106
assert data == {
112107
"type": "haystack_integrations.components.rankers.cohere.ranker.CohereRanker",
113108
"init_parameters": {
114-
"model": "rerank-multilingual-v2.0",
109+
"model": "rerank-multilingual-v3.0",
115110
"api_key": {"env_vars": ["COHERE_API_KEY", "CO_API_KEY"], "strict": True, "type": "env_var"},
116111
"api_base_url": "test-base-url",
117112
"top_k": 2,
118-
"max_chunks_per_doc": 50,
119113
"meta_fields_to_embed": ["meta_field_1", "meta_field_2"],
120114
"meta_data_separator": ",",
121115
"max_tokens_per_doc": 100,
@@ -127,22 +121,20 @@ def test_from_dict(self, monkeypatch):
127121
data = {
128122
"type": "haystack_integrations.components.rankers.cohere.ranker.CohereRanker",
129123
"init_parameters": {
130-
"model": "rerank-multilingual-v2.0",
124+
"model": "rerank-multilingual-v3.0",
131125
"api_key": {"env_vars": ["COHERE_API_KEY", "CO_API_KEY"], "strict": True, "type": "env_var"},
132126
"api_base_url": "test-base-url",
133127
"top_k": 2,
134-
"max_chunks_per_doc": 50,
135128
"meta_fields_to_embed": ["meta_field_1", "meta_field_2"],
136129
"meta_data_separator": ",",
137130
"max_tokens_per_doc": 100,
138131
},
139132
}
140133
component = CohereRanker.from_dict(data)
141-
assert component.model_name == "rerank-multilingual-v2.0"
134+
assert component.model_name == "rerank-multilingual-v3.0"
142135
assert component.top_k == 2
143136
assert component.api_key == Secret.from_env_var(["COHERE_API_KEY", "CO_API_KEY"])
144137
assert component.api_base_url == "test-base-url"
145-
assert component.max_chunks_per_doc == 50
146138
assert component.meta_fields_to_embed == ["meta_field_1", "meta_field_2"]
147139
assert component.meta_data_separator == ","
148140
assert component.max_tokens_per_doc == 100
@@ -153,10 +145,9 @@ def test_from_dict_fail_wo_env_var(self, monkeypatch):
153145
data = {
154146
"type": "haystack_integrations.components.rankers.cohere.ranker.CohereRanker",
155147
"init_parameters": {
156-
"model": "rerank-multilingual-v2.0",
148+
"model": "rerank-multilingual-v3.0",
157149
"api_key": {"env_vars": ["COHERE_API_KEY", "CO_API_KEY"], "strict": True, "type": "env_var"},
158150
"top_k": 2,
159-
"max_chunks_per_doc": 50,
160151
"max_tokens_per_doc": 100,
161152
},
162153
}

0 commit comments

Comments
 (0)