Skip to content

Commit f924d48

Browse files
committed
feat: add textract driver to puterai module
1 parent b520783 commit f924d48

File tree

8 files changed

+4883
-2531
lines changed

8 files changed

+4883
-2531
lines changed

package-lock.json

+4,675-2,531
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/backend/exports.js

+2
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ const { testlaunch } = require("./src/index.js");
2626
const BaseService = require("./src/services/BaseService.js");
2727
const { Context } = require("./src/util/context.js");
2828
const { TestDriversModule } = require("./src/modules/test-drivers/TestDriversModule.js");
29+
const { PuterAIModule } = require("./src/modules/puterai/PuterAIModule.js");
2930

3031

3132
module.exports = {
@@ -48,4 +49,5 @@ module.exports = {
4849
LocalDiskStorageModule,
4950
SelfHostedModule,
5051
TestDriversModule,
52+
PuterAIModule,
5153
};

src/backend/package.json

+1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
"test": "npx mocha"
88
},
99
"dependencies": {
10+
"@aws-sdk/client-textract": "^3.621.0",
1011
"@heyputer/kv.js": "^0.1.3",
1112
"@heyputer/multest": "^0.0.2",
1213
"@heyputer/puter-js-common": "^1.0.0",
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
const BaseService = require("../../services/BaseService");
2+
3+
class AIInterfaceService extends BaseService {
4+
async ['__on_driver.register.interfaces'] () {
5+
const svc_registry = this.services.get('registry');
6+
const col_interfaces = svc_registry.get('interfaces');
7+
8+
col_interfaces.set('puter-ocr', {
9+
description: 'Optical character recognition',
10+
methods: {
11+
recognize: {
12+
description: 'Recognize text in an image or document.',
13+
parameters: {
14+
source: {
15+
type: 'file',
16+
},
17+
},
18+
result: {
19+
type: {
20+
$: 'stream',
21+
content_type: 'image',
22+
}
23+
},
24+
},
25+
}
26+
});
27+
}
28+
}
29+
30+
module.exports = {
31+
AIInterfaceService
32+
};
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
const { TextractClient, AnalyzeDocumentCommand, InvalidS3ObjectException } = require("@aws-sdk/client-textract");
2+
3+
const BaseService = require("../../services/BaseService");
4+
5+
class AWSTextractService extends BaseService {
6+
_construct () {
7+
this.clients_ = {};
8+
}
9+
10+
static IMPLEMENTS = {
11+
['puter-ocr']: {
12+
async recognize ({ source, test_mode }) {
13+
const resp = await this.analyze_document(source);
14+
15+
// Simplify the response for common interface
16+
const puter_response = {
17+
blocks: []
18+
};
19+
20+
for ( const block of resp.Blocks ) {
21+
if ( block.BlockType === 'PAGE' ) continue;
22+
if ( block.BlockType === 'CELL' ) continue;
23+
if ( block.BlockType === 'TABLE' ) continue;
24+
if ( block.BlockType === 'MERGED_CELL' ) continue;
25+
if ( block.BlockType === 'LAYOUT_FIGURE' ) continue;
26+
if ( block.BlockType === 'LAYOUT_TEXT' ) continue;
27+
28+
const puter_block = {
29+
type: `text/textract:${block.BlockType}`,
30+
confidence: block.Confidence,
31+
text: block.Text,
32+
};
33+
puter_response.blocks.push(puter_block);
34+
}
35+
36+
return puter_response;
37+
}
38+
},
39+
};
40+
41+
_create_aws_credentials () {
42+
return {
43+
accessKeyId: this.config.aws.access_key,
44+
secretAccessKey: this.config.aws.secret_key,
45+
};
46+
}
47+
48+
_get_client (region) {
49+
if ( ! region ) {
50+
region = this.config.aws?.region ?? this.global_config.aws?.region
51+
?? 'us-west-2';
52+
}
53+
if ( this.clients_[region] ) return this.clients_[region];
54+
55+
this.clients_[region] = new TextractClient({
56+
credentials: this._create_aws_credentials(),
57+
region,
58+
});
59+
60+
return this.clients_[region];
61+
}
62+
63+
async analyze_document (file_facade) {
64+
const {
65+
client, document, using_s3
66+
} = await this._get_client_and_document(file_facade);
67+
68+
const command = new AnalyzeDocumentCommand({
69+
Document: document,
70+
FeatureTypes: [
71+
// 'TABLES',
72+
// 'FORMS',
73+
// 'SIGNATURES',
74+
'LAYOUT'
75+
],
76+
});
77+
78+
try {
79+
return await client.send(command);
80+
} catch (e) {
81+
if ( using_s3 && e instanceof InvalidS3ObjectException ) {
82+
const { client, document } =
83+
await this._get_client_and_document(file_facade, true);
84+
const command = new AnalyzeDocumentCommand({
85+
Document: document,
86+
FeatureTypes: [
87+
'LAYOUT',
88+
],
89+
})
90+
return await client.send(command);
91+
}
92+
93+
throw e;
94+
}
95+
96+
throw new Error('expected to be unreachable');
97+
}
98+
99+
async _get_client_and_document (file_facade, force_buffer) {
100+
const try_s3info = await file_facade.get('s3-info');
101+
if ( try_s3info && ! force_buffer ) {
102+
console.log('S3 INFO', try_s3info)
103+
return {
104+
using_s3: true,
105+
client: this._get_client(try_s3info.bucket_region),
106+
document: {
107+
S3Object: {
108+
Bucket: try_s3info.bucket,
109+
Name: try_s3info.key,
110+
},
111+
},
112+
};
113+
}
114+
115+
const try_buffer = await file_facade.get('buffer');
116+
if ( try_buffer ) {
117+
const base64 = try_buffer.toString('base64');
118+
return {
119+
client: this._get_client(),
120+
document: {
121+
Bytes: try_buffer,
122+
},
123+
};
124+
}
125+
126+
const fsNode = await file_facade.get('fs-node');
127+
if ( fsNode && ! await fsNode.exists() ) {
128+
throw APIError.create('subject_does_not_exist');
129+
}
130+
131+
throw new Error('No suitable input for Textract');
132+
}
133+
}
134+
135+
module.exports = {
136+
AWSTextractService,
137+
};
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
const { AdvancedBase } = require("@heyputer/puter-js-common");
2+
3+
class PuterAIModule extends AdvancedBase {
4+
async install (context) {
5+
const services = context.get('services');
6+
7+
const { AIInterfaceService } = require('./AIInterfaceService');
8+
services.registerService('__ai-interfaces', AIInterfaceService);
9+
10+
const { AWSTextractService } = require('./AWSTextractService');
11+
services.registerService('aws-textract', AWSTextractService);
12+
}
13+
}
14+
15+
module.exports = {
16+
PuterAIModule,
17+
};
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
```javascript
2+
await (await fetch("http://api.puter.localhost:4100/drivers/call", {
3+
"headers": {
4+
"Content-Type": "application/json",
5+
"Authorization": `Bearer ${puter.authToken}`,
6+
},
7+
"body": JSON.stringify({
8+
interface: 'puter-ocr',
9+
driver: 'aws-textract',
10+
method: 'recognize',
11+
args: {
12+
source: '~/Desktop/testocr.png',
13+
},
14+
}),
15+
"method": "POST",
16+
})).json();
17+
```

tools/run-selfhosted.js

+2
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ const main = async () => {
8585
LocalDiskStorageModule,
8686
SelfHostedModule,
8787
TestDriversModule,
88+
PuterAIModule,
8889
} = (await import('@heyputer/backend')).default;
8990

9091
const k = new Kernel({
@@ -95,6 +96,7 @@ const main = async () => {
9596
k.add_module(new LocalDiskStorageModule());
9697
k.add_module(new SelfHostedModule());
9798
k.add_module(new TestDriversModule());
99+
k.add_module(new PuterAIModule());
98100
k.boot();
99101
};
100102

0 commit comments

Comments
 (0)