Skip to content

Commit 668b0bb

Browse files
community[minor]: Add Upstash Embeddings Support (#5150)
* add: metadata filtering for UpstashVectorStore * fmt * skip tests * add: upstash embeddings support * add: tests * fmt * Update upstash.mdx * Update upstash.mdx * use fake embeddings * fix: replace UpstashEmbeddings parameter with FakeEmbeddings class * Naming --------- Co-authored-by: Jacob Lee <[email protected]>
1 parent cb34587 commit 668b0bb

File tree

4 files changed

+207
-17
lines changed

4 files changed

+207
-17
lines changed

docs/core_docs/docs/integrations/vectorstores/upstash.mdx

+7
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ import CodeBlock from "@theme/CodeBlock";
22
import CreateClientExample from "@examples/indexes/vector_stores/upstash/create_client.ts";
33
import IndexQueryExample from "@examples/indexes/vector_stores/upstash/index_and_query_docs.ts";
44
import DeleteExample from "@examples/indexes/vector_stores/upstash/delete_docs.ts";
5+
import UpstashEmbeddingsExample from "@examples/indexes/vector_stores/upstash/upstash_embeddings.ts";
56
import IntegrationInstallTooltip from "@mdx_components/integration_install_tooltip.mdx";
67

78
# Upstash Vector
@@ -41,6 +42,12 @@ You can index the LangChain documents with any model of your choice, and perform
4142

4243
<CodeBlock language="typescript">{IndexQueryExample}</CodeBlock>
4344

45+
## Upstash embeddings
46+
47+
It's possible to use the embeddings service of Upstash, which is based on the embedding model of choice when creating the vector database. You don't need to create the embeddings manually, as the Upstash Vector service will handle this for you.
48+
49+
<CodeBlock language="typescript">{UpstashEmbeddingsExample}</CodeBlock>
50+
4451
## Delete Documents
4552

4653
You can also delete the documents you've indexed previously.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
import { Index } from "@upstash/vector";
2+
import { Document } from "@langchain/core/documents";
3+
import { UpstashVectorStore } from "@langchain/community/vectorstores/upstash";
4+
import { FakeEmbeddings } from "@langchain/core/utils/testing";
5+
6+
const index = new Index({
7+
url: process.env.UPSTASH_VECTOR_REST_URL as string,
8+
token: process.env.UPSTASH_VECTOR_REST_TOKEN as string,
9+
});
10+
11+
// Initializing the UpstashVectorStore with the Upstash Embeddings configuration.
12+
// Passing FakeEmbeddings here will enable the store to generate embeddings using Upstash Embeddings.
13+
const UpstashVector = new UpstashVectorStore(new FakeEmbeddings(), { index });
14+
15+
// Creating the docs to be indexed.
16+
const id = new Date().getTime();
17+
const documents = [
18+
new Document({
19+
metadata: { name: id },
20+
pageContent: "Hello there!",
21+
}),
22+
new Document({
23+
metadata: { name: id },
24+
pageContent: "What are you building?",
25+
}),
26+
new Document({
27+
metadata: { time: id },
28+
pageContent: "Upstash Vector is great for building AI applications.",
29+
}),
30+
new Document({
31+
metadata: { time: id },
32+
pageContent: "To be, or not to be, that is the question.",
33+
}),
34+
];
35+
36+
// Creating embeddings from the provided documents, and adding them to Upstash database.
37+
await UpstashVector.addDocuments(documents);
38+
39+
// Waiting vectors to be indexed in the vector store.
40+
// eslint-disable-next-line no-promise-executor-return
41+
await new Promise((resolve) => setTimeout(resolve, 1000));
42+
43+
const queryResult = await UpstashVector.similaritySearchWithScore(
44+
"Vector database",
45+
2
46+
);
47+
48+
console.log(queryResult);
49+
/**
50+
[
51+
[
52+
Document {
53+
pageContent: 'Upstash Vector is great for building AI applications.',
54+
metadata: [Object]
55+
},
56+
0.9016147
57+
],
58+
[
59+
Document {
60+
pageContent: 'What are you building?',
61+
metadata: [Object]
62+
},
63+
0.8613077
64+
]
65+
]
66+
*/

libs/langchain-community/src/vectorstores/tests/upstash.int.test.ts

+56-3
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,15 @@
11
/* eslint-disable no-process-env */
22
import { Index } from "@upstash/vector";
33
import { Document } from "@langchain/core/documents";
4-
import { SyntheticEmbeddings } from "@langchain/core/utils/testing";
4+
import {
5+
SyntheticEmbeddings,
6+
FakeEmbeddings,
7+
} from "@langchain/core/utils/testing";
58
import { EmbeddingsInterface } from "@langchain/core/embeddings";
69
import { UpstashVectorStore } from "../upstash.js";
710
import { sleep } from "../../utils/time.js";
811

9-
describe.skip("UpstashVectorStore", () => {
12+
describe("UpstashVectorStore", () => {
1013
let store: UpstashVectorStore;
1114
let embeddings: EmbeddingsInterface;
1215
let index: Index;
@@ -17,8 +20,10 @@ describe.skip("UpstashVectorStore", () => {
1720
token: process.env.UPSTASH_VECTOR_REST_TOKEN,
1821
});
1922

23+
await index.reset();
24+
2025
embeddings = new SyntheticEmbeddings({
21-
vectorSize: 1536,
26+
vectorSize: 384,
2227
});
2328

2429
store = new UpstashVectorStore(embeddings, {
@@ -119,4 +124,52 @@ describe.skip("UpstashVectorStore", () => {
119124

120125
expect(results2).toHaveLength(0);
121126
});
127+
128+
test("UpstashVectorStore with Upstash Embedding configuration, the embeddings will be created by Upstash's service", async () => {
129+
const vectorStoreWithUpstashEmbeddings = new UpstashVectorStore(
130+
new FakeEmbeddings(),
131+
{ index }
132+
);
133+
134+
const createdAt = new Date().getTime();
135+
136+
const ids = await vectorStoreWithUpstashEmbeddings.addDocuments([
137+
{ pageContent: "hello", metadata: { a: createdAt + 1 } },
138+
{ pageContent: "car", metadata: { a: createdAt } },
139+
{ pageContent: "adjective", metadata: { a: createdAt } },
140+
{ pageContent: "hi", metadata: { a: createdAt } },
141+
]);
142+
143+
// Sleeping for a second to make sure that all the indexing operations are finished.
144+
await sleep(1000);
145+
146+
const results1 =
147+
await vectorStoreWithUpstashEmbeddings.similaritySearchVectorWithScore(
148+
"hello!",
149+
1
150+
);
151+
expect(results1).toHaveLength(1);
152+
153+
expect([results1[0][0]]).toEqual([
154+
new Document({ metadata: { a: createdAt + 1 }, pageContent: "hello" }),
155+
]);
156+
157+
const results2 =
158+
await vectorStoreWithUpstashEmbeddings.similaritySearchVectorWithScore(
159+
"testing!",
160+
6
161+
);
162+
163+
expect(results2).toHaveLength(4);
164+
165+
await vectorStoreWithUpstashEmbeddings.delete({ ids: ids.slice(2) });
166+
167+
const results3 =
168+
await vectorStoreWithUpstashEmbeddings.similaritySearchVectorWithScore(
169+
"testing again!",
170+
6
171+
);
172+
173+
expect(results3).toHaveLength(2);
174+
});
122175
});

libs/langchain-community/src/vectorstores/upstash.ts

+78-14
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
import * as uuid from "uuid";
22
import { EmbeddingsInterface } from "@langchain/core/embeddings";
33
import { VectorStore } from "@langchain/core/vectorstores";
4-
import { Index as UpstashIndex } from "@upstash/vector";
4+
import { Index as UpstashIndex, type QueryResult } from "@upstash/vector";
55
import { Document, DocumentInterface } from "@langchain/core/documents";
66
import { chunkArray } from "@langchain/core/utils/chunk_array";
7+
import { FakeEmbeddings } from "@langchain/core/utils/testing";
8+
79
import {
810
AsyncCaller,
911
AsyncCallerParams,
@@ -37,6 +39,7 @@ export type UpstashDeleteParams =
3739
| { deleteAll: boolean; ids?: never };
3840

3941
const CONCURRENT_UPSERT_LIMIT = 1000;
42+
4043
/**
4144
* The main class that extends the 'VectorStore' class. It provides
4245
* methods for interacting with Upstash index, such as adding documents,
@@ -49,7 +52,7 @@ export class UpstashVectorStore extends VectorStore {
4952

5053
caller: AsyncCaller;
5154

52-
embeddings: EmbeddingsInterface;
55+
useUpstashEmbeddings?: boolean;
5356

5457
filter?: this["FilterType"];
5558

@@ -59,8 +62,11 @@ export class UpstashVectorStore extends VectorStore {
5962

6063
constructor(embeddings: EmbeddingsInterface, args: UpstashVectorLibArgs) {
6164
super(embeddings, args);
62-
63-
this.embeddings = embeddings;
65+
// Special case where the embeddings instance is a FakeEmbeddings instance. In this case, we need to disable "instanceof" rule.
66+
// eslint-disable-next-line no-instanceof/no-instanceof
67+
if (embeddings instanceof FakeEmbeddings) {
68+
this.useUpstashEmbeddings = true;
69+
}
6470

6571
const { index, ...asyncCallerArgs } = args;
6672

@@ -78,10 +84,14 @@ export class UpstashVectorStore extends VectorStore {
7884
*/
7985
async addDocuments(
8086
documents: DocumentInterface[],
81-
options?: { ids?: string[] }
87+
options?: { ids?: string[]; useUpstashEmbeddings?: boolean }
8288
) {
8389
const texts = documents.map(({ pageContent }) => pageContent);
8490

91+
if (this.useUpstashEmbeddings || options?.useUpstashEmbeddings) {
92+
return this._addData(documents, options);
93+
}
94+
8595
const embeddings = await this.embeddings.embedDocuments(texts);
8696

8797
return this.addVectors(embeddings, documents, options);
@@ -128,6 +138,48 @@ export class UpstashVectorStore extends VectorStore {
128138
return documentIds;
129139
}
130140

141+
/**
142+
* This method adds the provided documents to Upstash database. The pageContent of the documents will be embedded by Upstash Embeddings.
143+
* @param documents Array of Document objects to be added to the Upstash database.
144+
* @param options Optional object containing the array of ids for the documents.
145+
* @returns Promise that resolves with the ids of the provided documents when the upsert operation is done.
146+
*/
147+
protected async _addData(
148+
documents: DocumentInterface[],
149+
options?: { ids?: string[] }
150+
) {
151+
const documentIds =
152+
options?.ids ?? Array.from({ length: documents.length }, () => uuid.v4());
153+
154+
const upstashVectorsWithData = documents.map((document, index) => {
155+
const metadata = {
156+
_pageContentLC: documents[index].pageContent,
157+
...documents[index].metadata,
158+
};
159+
160+
const id = documentIds[index];
161+
162+
return {
163+
id,
164+
data: document.pageContent,
165+
metadata,
166+
};
167+
});
168+
169+
const vectorChunks = chunkArray(
170+
upstashVectorsWithData,
171+
CONCURRENT_UPSERT_LIMIT
172+
);
173+
174+
const batchRequests = vectorChunks.map((chunk) =>
175+
this.caller.call(async () => this.index.upsert(chunk))
176+
);
177+
178+
await Promise.all(batchRequests);
179+
180+
return documentIds;
181+
}
182+
131183
/**
132184
* This method deletes documents from the Upstash database. You can either
133185
* provide the target ids, or delete all vectors in the database.
@@ -143,18 +195,30 @@ export class UpstashVectorStore extends VectorStore {
143195
}
144196

145197
protected async _runUpstashQuery(
146-
query: number[],
198+
query: number[] | string,
147199
k: number,
148200
filter?: this["FilterType"],
149201
options?: { includeVectors: boolean }
150202
) {
151-
const queryResult = await this.index.query<UpstashQueryMetadata>({
152-
vector: query,
153-
topK: k,
154-
includeMetadata: true,
155-
filter,
156-
...options,
157-
});
203+
let queryResult: QueryResult<UpstashQueryMetadata>[] = [];
204+
205+
if (typeof query === "string") {
206+
queryResult = await this.index.query<UpstashQueryMetadata>({
207+
data: query,
208+
topK: k,
209+
includeMetadata: true,
210+
filter,
211+
...options,
212+
});
213+
} else {
214+
queryResult = await this.index.query<UpstashQueryMetadata>({
215+
vector: query,
216+
topK: k,
217+
includeMetadata: true,
218+
filter,
219+
...options,
220+
});
221+
}
158222

159223
return queryResult;
160224
}
@@ -169,7 +233,7 @@ export class UpstashVectorStore extends VectorStore {
169233
* maximum of 'k' and vectors in the index.
170234
*/
171235
async similaritySearchVectorWithScore(
172-
query: number[],
236+
query: number[] | string,
173237
k: number,
174238
filter?: this["FilterType"]
175239
): Promise<[DocumentInterface, number][]> {

0 commit comments

Comments
 (0)