Skip to content

Commit a53d208

Browse files
authored
core[minor]: Add ID field to document (#5893)
* core[minor]: Add ID field to document * add more docstring * chore: lint files * update typeorm id document field * fix tests
1 parent d37a6d2 commit a53d208

File tree

5 files changed

+36
-2
lines changed

5 files changed

+36
-2
lines changed

langchain-core/src/documents/document.ts

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,14 @@ export interface DocumentInput<
55
pageContent: string;
66

77
metadata?: Metadata;
8+
9+
/**
10+
* An optional identifier for the document.
11+
*
12+
* Ideally this should be unique across the document collection and formatted
13+
* as a UUID, but this will not be enforced.
14+
*/
15+
id?: string;
816
}
917

1018
export interface DocumentInterface<
@@ -14,6 +22,14 @@ export interface DocumentInterface<
1422
pageContent: string;
1523

1624
metadata: Metadata;
25+
26+
/**
27+
* An optional identifier for the document.
28+
*
29+
* Ideally this should be unique across the document collection and formatted
30+
* as a UUID, but this will not be enforced.
31+
*/
32+
id?: string;
1733
}
1834

1935
/**
@@ -28,9 +44,21 @@ export class Document<
2844

2945
metadata: Metadata;
3046

47+
// The ID field is optional at the moment.
48+
// It will likely become required in a future major release after
49+
// it has been adopted by enough vectorstore implementations.
50+
/**
51+
* An optional identifier for the document.
52+
*
53+
* Ideally this should be unique across the document collection and formatted
54+
* as a UUID, but this will not be enforced.
55+
*/
56+
id?: string;
57+
3158
constructor(fields: DocumentInput<Metadata>) {
3259
this.pageContent =
3360
fields.pageContent !== undefined ? fields.pageContent.toString() : "";
3461
this.metadata = fields.metadata ?? ({} as Metadata);
62+
this.id = fields.id;
3563
}
3664
}

langchain/src/document_loaders/tests/csv-blob.test.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ test("Test CSV loader from blob", async () => {
4646
expect(docs.length).toBe(2);
4747
expect(docs[0]).toMatchInlineSnapshot(`
4848
Document {
49+
"id": undefined,
4950
"metadata": {
5051
"blobType": "text/csv",
5152
"line": 1,
@@ -57,6 +58,7 @@ test("Test CSV loader from blob", async () => {
5758
`);
5859
expect(docs[1]).toMatchInlineSnapshot(`
5960
Document {
61+
"id": undefined,
6062
"metadata": {
6163
"blobType": "text/csv",
6264
"line": 2,

langchain/src/document_loaders/tests/json-blob.test.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ test("Test JSON loader from blob", async () => {
3939
expect(docs.length).toBe(2);
4040
expect(docs[0]).toMatchInlineSnapshot(`
4141
Document {
42+
"id": undefined,
4243
"metadata": {
4344
"blobType": "application/json",
4445
"line": 1,
@@ -49,6 +50,7 @@ test("Test JSON loader from blob", async () => {
4950
`);
5051
expect(docs[1]).toMatchInlineSnapshot(`
5152
Document {
53+
"id": undefined,
5254
"metadata": {
5355
"blobType": "application/json",
5456
"line": 2,
@@ -87,6 +89,7 @@ test("Test JSON loader from blob", async () => {
8789
expect(docs.length).toBe(10);
8890
expect(docs[0]).toMatchInlineSnapshot(`
8991
Document {
92+
"id": undefined,
9093
"metadata": {
9194
"blobType": "application/json",
9295
"line": 1,
@@ -97,6 +100,7 @@ test("Test JSON loader from blob", async () => {
97100
`);
98101
expect(docs[1]).toMatchInlineSnapshot(`
99102
Document {
103+
"id": undefined,
100104
"metadata": {
101105
"blobType": "application/json",
102106
"line": 2,

langchain/src/document_loaders/tests/jsonl-blob.test.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ test("Test JSONL loader from blob", async () => {
4040
expect(docs.length).toBe(2);
4141
expect(docs[0]).toMatchInlineSnapshot(`
4242
Document {
43+
"id": undefined,
4344
"metadata": {
4445
"blobType": "application/jsonl+json",
4546
"line": 1,
@@ -50,6 +51,7 @@ test("Test JSONL loader from blob", async () => {
5051
`);
5152
expect(docs[1]).toMatchInlineSnapshot(`
5253
Document {
54+
"id": undefined,
5355
"metadata": {
5456
"blobType": "application/jsonl+json",
5557
"line": 2,

libs/langchain-community/src/vectorstores/typeorm.ts

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,6 @@ export interface TypeORMVectorStoreArgs {
2323
*/
2424
export class TypeORMVectorStoreDocument extends Document {
2525
embedding: string;
26-
27-
id?: string;
2826
}
2927

3028
const defaultDocumentTableName = "documents";

0 commit comments

Comments
 (0)