Skip to content

Commit f34b06f

Browse files
committed
temporary fix for encoding packaging issues
1 parent 4555342 commit f34b06f

File tree

4 files changed

+42
-8
lines changed

4 files changed

+42
-8
lines changed

binary/README.md

+12
Original file line numberDiff line numberDiff line change
@@ -30,3 +30,15 @@ The build process is otherwise defined entirely in `build.js`.
3030
## Debugging
3131

3232
To debug the binary with IntelliJ, set `useTcp` to `true` in `CoreMessenger.kt`, and then in VS Code run the "Core Binary" debug script. Instead of starting a subprocess for the binary and communicating over stdin/stdout, the IntelliJ extension will connect over TCP to the server started from the VS Code window. You can place breakpoints anywhere in the `core` or `binary` folders.
33+
34+
## Building
35+
36+
```bash
37+
npm run build
38+
```
39+
40+
## Testing
41+
42+
```bash
43+
npm run test
44+
```

binary/build.js

+1
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,7 @@ async function installNodeModuleInTempDirAndCopyToCurrent(packageName, toCopy) {
162162
"../core/vendor/tree-sitter.wasm",
163163
"../core/llm/llamaTokenizerWorkerPool.mjs",
164164
"../core/llm/llamaTokenizer.mjs",
165+
"../core/llm/tiktokenWorkerPool.mjs",
165166
];
166167
for (const f of filesToCopy) {
167168
fs.copyFileSync(

core/llm/asyncEncoder.ts

+8-6
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
1-
import llamaTokenizer from "./llamaTokenizer.js";
2-
import { Tiktoken, encodingForModel as _encodingForModel } from "js-tiktoken";
1+
import path from "path";
32
import workerpool from "workerpool";
4-
import * as path from "path";
53

64
export interface AsyncEncoder {
75
encode(text: string): Promise<number[]>;
@@ -13,7 +11,9 @@ export class LlamaAsyncEncoder implements AsyncEncoder {
1311
private workerPool: workerpool.Pool;
1412

1513
constructor() {
16-
this.workerPool = workerpool.pool(workerCodeFilePath("llamaTokenizerWorkerPool.mjs"));
14+
this.workerPool = workerpool.pool(
15+
workerCodeFilePath("llamaTokenizerWorkerPool.mjs"),
16+
);
1717
}
1818

1919
async encode(text: string): Promise<number[]> {
@@ -35,7 +35,9 @@ export class GPTAsyncEncoder implements AsyncEncoder {
3535
private workerPool: workerpool.Pool;
3636

3737
constructor() {
38-
this.workerPool = workerpool.pool(workerCodeFilePath("tiktokenWorkerPool.mjs"));
38+
this.workerPool = workerpool.pool(
39+
workerCodeFilePath("tiktokenWorkerPool.mjs"),
40+
);
3941
}
4042

4143
async encode(text: string): Promise<number[]> {
@@ -58,4 +60,4 @@ function workerCodeFilePath(workerFileName: string): string {
5860
return path.join(__dirname, "llm", workerFileName);
5961
}
6062
return path.join(__dirname, workerFileName);
61-
}
63+
}

core/llm/countTokens.ts

+21-2
Original file line numberDiff line numberDiff line change
@@ -24,18 +24,37 @@ class LlamaEncoding implements Encoding {
2424
}
2525
}
2626

27+
class NonWorkerAsyncEncoder implements AsyncEncoder {
28+
constructor(private readonly encoding: Encoding) {}
29+
30+
async close(): Promise<void> {}
31+
32+
async encode(text: string): Promise<number[]> {
33+
return this.encoding.encode(text);
34+
}
35+
36+
async decode(tokens: number[]): Promise<string> {
37+
return this.encoding.decode(tokens);
38+
}
39+
}
40+
2741
let gptEncoding: Encoding | null = null;
2842
const gptAsyncEncoder = new GPTAsyncEncoder();
2943
const llamaEncoding = new LlamaEncoding();
3044
const llamaAsyncEncoder = new LlamaAsyncEncoder();
3145

3246
function asyncEncoderForModel(modelName: string): AsyncEncoder {
47+
// Temporary due to issues packaging the worker files
48+
if (process.env.IS_BINARY) {
49+
const encoding = encodingForModel(modelName);
50+
return new NonWorkerAsyncEncoder(encoding);
51+
}
52+
3353
const modelType = autodetectTemplateType(modelName);
3454
if (!modelType || modelType === "none") {
3555
return gptAsyncEncoder;
3656
}
37-
// Temporary due to issues packaging the worker files
38-
return process.env.IS_BINARY ? gptAsyncEncoder : llamaAsyncEncoder;
57+
return llamaAsyncEncoder;
3958
}
4059

4160
function encodingForModel(modelName: string): Encoding {

0 commit comments

Comments
 (0)