Skip to content

Commit ef1ea2f

Browse files
langchain[patch]: Parent doc OOM fix (extends #5989) (#6012)
* Fix memory issue for large number of chunks on large documents * Format --------- Co-authored-by: Clemens Peters <[email protected]>
1 parent ef201d0 commit ef1ea2f

File tree

1 file changed

+16
-12
lines changed

1 file changed

+16
-12
lines changed

langchain/src/retrievers/parent_document.ts

+16-12
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,21 @@ export class ParentDocumentRetriever extends MultiVectorRetriever {
135135
return parentDocs.slice(0, this.parentK);
136136
}
137137

138+
async _storeDocuments(
139+
parentDoc: Document,
140+
childDocs: Document[],
141+
addToDocstore: boolean
142+
) {
143+
if (this.childDocumentRetriever) {
144+
await this.childDocumentRetriever.addDocuments(childDocs);
145+
} else {
146+
await this.vectorstore.addDocuments(childDocs);
147+
}
148+
if (addToDocstore) {
149+
await this.docstore.mset(Object.entries(parentDoc));
150+
}
151+
}
152+
138153
/**
139154
* Adds documents to the docstore and vectorstores.
140155
* If a retriever is provided, it will be used to add documents instead of the vectorstore.
@@ -181,8 +196,6 @@ export class ParentDocumentRetriever extends MultiVectorRetriever {
181196
`Got uneven list of documents and ids.\nIf "ids" is provided, should be same length as "documents".`
182197
);
183198
}
184-
const embeddedDocs: Document[] = [];
185-
const fullDocs: Record<string, Document> = {};
186199
for (let i = 0; i < parentDocs.length; i += 1) {
187200
const parentDoc = parentDocs[i];
188201
const parentDocId = parentDocIds[i];
@@ -197,16 +210,7 @@ export class ParentDocumentRetriever extends MultiVectorRetriever {
197210
metadata: { ...subDoc.metadata, [this.idKey]: parentDocId },
198211
})
199212
);
200-
embeddedDocs.push(...taggedSubDocs);
201-
fullDocs[parentDocId] = parentDoc;
202-
}
203-
if (this.childDocumentRetriever) {
204-
await this.childDocumentRetriever.addDocuments(embeddedDocs);
205-
} else {
206-
await this.vectorstore.addDocuments(embeddedDocs);
207-
}
208-
if (addToDocstore) {
209-
await this.docstore.mset(Object.entries(fullDocs));
213+
await this._storeDocuments(parentDoc, taggedSubDocs, addToDocstore);
210214
}
211215
}
212216
}

0 commit comments

Comments
 (0)