1
- import { Document } from "@langchain/core/documents" ;
2
- import type { BrowserbaseLoadOptions } from "@browserbasehq/sdk" ;
1
+ import { Document , DocumentInterface } from "@langchain/core/documents" ;
2
+ import Browserbase , { BrowserbaseLoadOptions } from "@browserbasehq/sdk" ;
3
3
import { BaseDocumentLoader } from "../base.js" ;
4
4
import type { DocumentLoader } from "../base.js" ;
5
5
6
- type BrowserbaseLoaderOptions = BrowserbaseLoadOptions & {
6
+ interface BrowserbaseLoaderOptions extends BrowserbaseLoadOptions {
7
7
apiKey ?: string ;
8
- } ;
8
+ }
9
9
10
10
/**
11
11
* Load pre-rendered web pages using a headless browser hosted on Browserbase.
12
12
*
13
13
* Depends on `@browserbasehq/sdk` package.
14
14
* Get your API key from https://browserbase.com
15
15
*
16
+ * @example
17
+ * ```javascript
18
+ * import { BrowserbaseLoader } from "langchain/document_loaders/web/browserbase.js";
19
+ *
20
+ * const loader = new BrowserbaseLoader(["https://example.com"], {
21
+ * apiKey: process.env.BROWSERBASE_API_KEY,
22
+ * textContent: true,
23
+ * });
24
+ *
25
+ * const docs = await loader.load();
26
+ * ```
27
+ *
16
28
* @param {string[] } urls - The URLs of the web pages to load.
17
29
* @param {BrowserbaseLoaderOptions } [options] - Browserbase client options.
18
30
*/
@@ -25,19 +37,22 @@ export class BrowserbaseLoader
25
37
26
38
options : BrowserbaseLoaderOptions ;
27
39
40
+ browserbase : Browserbase ;
41
+
28
42
constructor ( urls : string [ ] , options : BrowserbaseLoaderOptions = { } ) {
29
43
super ( ) ;
30
44
this . urls = urls ;
31
45
this . options = options ;
46
+ this . browserbase = new Browserbase ( options . apiKey ) ;
32
47
}
33
48
34
49
/**
35
50
* Load pages from URLs.
36
51
*
37
- * @returns {Promise<Document []> } - A generator that yields loaded documents.
52
+ * @returns {Promise<DocumentInterface []> } - A promise which resolves to a list of documents.
38
53
*/
39
54
40
- async load ( ) : Promise < Document [ ] > {
55
+ async load ( ) : Promise < DocumentInterface [ ] > {
41
56
const documents : Document [ ] = [ ] ;
42
57
for await ( const doc of this . lazyLoad ( ) ) {
43
58
documents . push ( doc ) ;
@@ -49,11 +64,10 @@ export class BrowserbaseLoader
49
64
/**
50
65
* Load pages from URLs.
51
66
*
52
- * @returns {Generator<Document > } - A generator that yields loaded documents.
67
+ * @returns {Generator<DocumentInterface > } - A generator that yields documents.
53
68
*/
54
69
async * lazyLoad ( ) {
55
- const browserbase = await BrowserbaseLoader . imports ( this . options . apiKey ) ;
56
- const pages = await browserbase . loadURLs ( this . urls , this . options ) ;
70
+ const pages = await this . browserbase . loadURLs ( this . urls , this . options ) ;
57
71
58
72
let index = 0 ;
59
73
for await ( const page of pages ) {
@@ -67,17 +81,4 @@ export class BrowserbaseLoader
67
81
index += index + 1 ;
68
82
}
69
83
}
70
-
71
- static async imports ( apiKey ?: string ) {
72
- try {
73
- const { default : Browserbase } = await import ( "@browserbasehq/sdk" ) ;
74
- return new Browserbase ( apiKey ) ;
75
- } catch ( error ) {
76
- throw new Error (
77
- "You must run " +
78
- "`npm install --save @browserbasehq/sdk` " +
79
- "to use the Browserbase loader."
80
- ) ;
81
- }
82
- }
83
84
}
0 commit comments