Skip to content

Commit ea977f8

Browse files
authored
qdrant[minor]: Support maxMarginalRelevanceSearch() (#5467)
1 parent bfbd46d commit ea977f8

File tree

3 files changed

+118
-1
lines changed

3 files changed

+118
-1
lines changed

libs/langchain-qdrant/src/tests/vectorstores.int.test.ts

+11
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,17 @@ describe("QdrantVectorStore testcase", () => {
2525
const results = await qdrantVectorStore.similaritySearch(pageContent, 1);
2626

2727
expect(results[0]).toEqual(new Document({ metadata: {}, pageContent }));
28+
29+
expect(qdrantVectorStore.maxMarginalRelevanceSearch).toBeDefined();
30+
31+
const mmrResults = await qdrantVectorStore.maxMarginalRelevanceSearch(
32+
pageContent,
33+
{
34+
k: 1,
35+
}
36+
);
37+
expect(mmrResults.length).toBe(1);
38+
expect(mmrResults[0]).toEqual(new Document({ metadata: {}, pageContent }));
2839
});
2940

3041
test("passing client directly with a model that creates embeddings with a different number of dimensions", async () => {

libs/langchain-qdrant/src/tests/vectorstores.test.ts

+43
Original file line numberDiff line numberDiff line change
@@ -209,3 +209,46 @@ test("QdrantVectorStore adds vectors with no custom payload", async () => {
209209
],
210210
});
211211
});
212+
213+
test("QdrantVectorStore MMR works", async () => {
214+
const client = {
215+
upsert: jest.fn(),
216+
search: jest.fn<any>().mockResolvedValue([]),
217+
getCollections: jest.fn<any>().mockResolvedValue({ collections: [] }),
218+
createCollection: jest.fn(),
219+
};
220+
221+
const embeddings = new FakeEmbeddings();
222+
223+
const store = new QdrantVectorStore(embeddings, {
224+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
225+
client: client as any,
226+
});
227+
228+
expect(store).toBeDefined();
229+
230+
await store.addDocuments([
231+
{
232+
pageContent: "hello",
233+
metadata: {},
234+
},
235+
]);
236+
237+
expect(client.upsert).toHaveBeenCalledTimes(1);
238+
239+
expect(store.maxMarginalRelevanceSearch).toBeDefined();
240+
241+
await store.maxMarginalRelevanceSearch("hello", {
242+
k: 10,
243+
fetchK: 7,
244+
});
245+
246+
expect(client.search).toHaveBeenCalledTimes(1);
247+
expect(client.search).toHaveBeenCalledWith("documents", {
248+
filter: undefined,
249+
limit: 7,
250+
vector: [0.1, 0.2, 0.3, 0.4],
251+
with_payload: ["metadata", "content"],
252+
with_vector: true,
253+
});
254+
});

libs/langchain-qdrant/src/vectorstores.ts

+64-1
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,13 @@ import { QdrantClient } from "@qdrant/js-client-rest";
22
import type { Schemas as QdrantSchemas } from "@qdrant/js-client-rest";
33
import { v4 as uuid } from "uuid";
44
import type { EmbeddingsInterface } from "@langchain/core/embeddings";
5-
import { VectorStore } from "@langchain/core/vectorstores";
5+
import {
6+
type MaxMarginalRelevanceSearchOptions,
7+
VectorStore,
8+
} from "@langchain/core/vectorstores";
69
import { Document } from "@langchain/core/documents";
710
import { getEnvironmentVariable } from "@langchain/core/utils/env";
11+
import { maximalMarginalRelevance } from "@langchain/core/utils/math";
812

913
const CONTENT_KEY = "content";
1014
const METADATA_KEY = "metadata";
@@ -194,6 +198,8 @@ export class QdrantVectorStore extends VectorStore {
194198
vector: query,
195199
limit: k,
196200
filter,
201+
with_payload: [this.metadataPayloadKey, this.contentPayloadKey],
202+
with_vector: false,
197203
});
198204

199205
const result: [Document, number][] = (
@@ -210,6 +216,63 @@ export class QdrantVectorStore extends VectorStore {
210216
return result;
211217
}
212218

219+
/**
220+
* Return documents selected using the maximal marginal relevance.
221+
* Maximal marginal relevance optimizes for similarity to the query AND diversity
222+
* among selected documents.
223+
*
224+
* @param {string} query - Text to look up documents similar to.
225+
* @param {number} options.k - Number of documents to return.
226+
* @param {number} options.fetchK - Number of documents to fetch before passing to the MMR algorithm. Defaults to 20.
227+
* @param {number} options.lambda - Number between 0 and 1 that determines the degree of diversity among the results,
228+
* where 0 corresponds to maximum diversity and 1 to minimum diversity.
229+
* @param {this["FilterType"]} options.filter - Optional filter to apply to the search results.
230+
*
231+
* @returns {Promise<Document[]>} - List of documents selected by maximal marginal relevance.
232+
*/
233+
async maxMarginalRelevanceSearch(
234+
query: string,
235+
options: MaxMarginalRelevanceSearchOptions<this["FilterType"]>
236+
): Promise<Document[]> {
237+
if (!query) {
238+
return [];
239+
}
240+
241+
const queryEmbedding = await this.embeddings.embedQuery(query);
242+
243+
await this.ensureCollection();
244+
245+
const results = await this.client.search(this.collectionName, {
246+
vector: queryEmbedding,
247+
limit: options?.fetchK ?? 20,
248+
filter: options?.filter,
249+
with_payload: [this.metadataPayloadKey, this.contentPayloadKey],
250+
with_vector: true,
251+
});
252+
253+
const embeddingList = results.map((res) => res.vector) as number[][];
254+
255+
const mmrIndexes = maximalMarginalRelevance(
256+
queryEmbedding,
257+
embeddingList,
258+
options?.lambda,
259+
options.k
260+
);
261+
262+
const topMmrMatches = mmrIndexes.map((idx) => results[idx]);
263+
264+
const result = (topMmrMatches as QdrantSearchResponse[]).map(
265+
(res) =>
266+
new Document({
267+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
268+
metadata: res.payload[this.metadataPayloadKey] as Record<string, any>,
269+
pageContent: res.payload[this.contentPayloadKey] as string,
270+
})
271+
);
272+
273+
return result;
274+
}
275+
213276
/**
214277
* Method to ensure the existence of a collection in the Qdrant database.
215278
* If the collection does not exist, it is created.

0 commit comments

Comments
 (0)