1
1
import * as uuid from "uuid" ;
2
2
import { EmbeddingsInterface } from "@langchain/core/embeddings" ;
3
3
import { VectorStore } from "@langchain/core/vectorstores" ;
4
- import { Index as UpstashIndex } from "@upstash/vector" ;
4
+ import { Index as UpstashIndex , type QueryResult } from "@upstash/vector" ;
5
5
import { Document , DocumentInterface } from "@langchain/core/documents" ;
6
6
import { chunkArray } from "@langchain/core/utils/chunk_array" ;
7
+ import { FakeEmbeddings } from "@langchain/core/utils/testing" ;
8
+
7
9
import {
8
10
AsyncCaller ,
9
11
AsyncCallerParams ,
@@ -37,6 +39,7 @@ export type UpstashDeleteParams =
37
39
| { deleteAll : boolean ; ids ?: never } ;
38
40
39
41
const CONCURRENT_UPSERT_LIMIT = 1000 ;
42
+
40
43
/**
41
44
* The main class that extends the 'VectorStore' class. It provides
42
45
* methods for interacting with Upstash index, such as adding documents,
@@ -49,7 +52,7 @@ export class UpstashVectorStore extends VectorStore {
49
52
50
53
caller : AsyncCaller ;
51
54
52
- embeddings : EmbeddingsInterface ;
55
+ useUpstashEmbeddings ?: boolean ;
53
56
54
57
filter ?: this[ "FilterType" ] ;
55
58
@@ -59,8 +62,11 @@ export class UpstashVectorStore extends VectorStore {
59
62
60
63
constructor ( embeddings : EmbeddingsInterface , args : UpstashVectorLibArgs ) {
61
64
super ( embeddings , args ) ;
62
-
63
- this . embeddings = embeddings ;
65
+ // Special case where the embeddings instance is a FakeEmbeddings instance. In this case, we need to disable "instanceof" rule.
66
+ // eslint-disable-next-line no-instanceof/no-instanceof
67
+ if ( embeddings instanceof FakeEmbeddings ) {
68
+ this . useUpstashEmbeddings = true ;
69
+ }
64
70
65
71
const { index, ...asyncCallerArgs } = args ;
66
72
@@ -78,10 +84,14 @@ export class UpstashVectorStore extends VectorStore {
78
84
*/
79
85
async addDocuments (
80
86
documents : DocumentInterface [ ] ,
81
- options ?: { ids ?: string [ ] }
87
+ options ?: { ids ?: string [ ] ; useUpstashEmbeddings ?: boolean }
82
88
) {
83
89
const texts = documents . map ( ( { pageContent } ) => pageContent ) ;
84
90
91
+ if ( this . useUpstashEmbeddings || options ?. useUpstashEmbeddings ) {
92
+ return this . _addData ( documents , options ) ;
93
+ }
94
+
85
95
const embeddings = await this . embeddings . embedDocuments ( texts ) ;
86
96
87
97
return this . addVectors ( embeddings , documents , options ) ;
@@ -128,6 +138,48 @@ export class UpstashVectorStore extends VectorStore {
128
138
return documentIds ;
129
139
}
130
140
141
+ /**
142
+ * This method adds the provided documents to Upstash database. The pageContent of the documents will be embedded by Upstash Embeddings.
143
+ * @param documents Array of Document objects to be added to the Upstash database.
144
+ * @param options Optional object containing the array of ids for the documents.
145
+ * @returns Promise that resolves with the ids of the provided documents when the upsert operation is done.
146
+ */
147
+ protected async _addData (
148
+ documents : DocumentInterface [ ] ,
149
+ options ?: { ids ?: string [ ] }
150
+ ) {
151
+ const documentIds =
152
+ options ?. ids ?? Array . from ( { length : documents . length } , ( ) => uuid . v4 ( ) ) ;
153
+
154
+ const upstashVectorsWithData = documents . map ( ( document , index ) => {
155
+ const metadata = {
156
+ _pageContentLC : documents [ index ] . pageContent ,
157
+ ...documents [ index ] . metadata ,
158
+ } ;
159
+
160
+ const id = documentIds [ index ] ;
161
+
162
+ return {
163
+ id,
164
+ data : document . pageContent ,
165
+ metadata,
166
+ } ;
167
+ } ) ;
168
+
169
+ const vectorChunks = chunkArray (
170
+ upstashVectorsWithData ,
171
+ CONCURRENT_UPSERT_LIMIT
172
+ ) ;
173
+
174
+ const batchRequests = vectorChunks . map ( ( chunk ) =>
175
+ this . caller . call ( async ( ) => this . index . upsert ( chunk ) )
176
+ ) ;
177
+
178
+ await Promise . all ( batchRequests ) ;
179
+
180
+ return documentIds ;
181
+ }
182
+
131
183
/**
132
184
* This method deletes documents from the Upstash database. You can either
133
185
* provide the target ids, or delete all vectors in the database.
@@ -143,18 +195,30 @@ export class UpstashVectorStore extends VectorStore {
143
195
}
144
196
145
197
protected async _runUpstashQuery (
146
- query : number [ ] ,
198
+ query : number [ ] | string ,
147
199
k : number ,
148
200
filter ?: this[ "FilterType" ] ,
149
201
options ?: { includeVectors : boolean }
150
202
) {
151
- const queryResult = await this . index . query < UpstashQueryMetadata > ( {
152
- vector : query ,
153
- topK : k ,
154
- includeMetadata : true ,
155
- filter,
156
- ...options ,
157
- } ) ;
203
+ let queryResult : QueryResult < UpstashQueryMetadata > [ ] = [ ] ;
204
+
205
+ if ( typeof query === "string" ) {
206
+ queryResult = await this . index . query < UpstashQueryMetadata > ( {
207
+ data : query ,
208
+ topK : k ,
209
+ includeMetadata : true ,
210
+ filter,
211
+ ...options ,
212
+ } ) ;
213
+ } else {
214
+ queryResult = await this . index . query < UpstashQueryMetadata > ( {
215
+ vector : query ,
216
+ topK : k ,
217
+ includeMetadata : true ,
218
+ filter,
219
+ ...options ,
220
+ } ) ;
221
+ }
158
222
159
223
return queryResult ;
160
224
}
@@ -169,7 +233,7 @@ export class UpstashVectorStore extends VectorStore {
169
233
* maximum of 'k' and vectors in the index.
170
234
*/
171
235
async similaritySearchVectorWithScore (
172
- query : number [ ] ,
236
+ query : number [ ] | string ,
173
237
k : number ,
174
238
filter ?: this[ "FilterType" ]
175
239
) : Promise < [ DocumentInterface , number ] [ ] > {
0 commit comments