Skip to content

Commit d05d056

Browse files
tests: charStream
1 parent 3b28464 commit d05d056

File tree

2 files changed

+170
-0
lines changed

2 files changed

+170
-0
lines changed

core/autocomplete/charStream.test.ts

+138
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
import { jest } from "@jest/globals";
2+
import * as charStream from "./charStream";
3+
import { Typescript } from "./languages";
4+
5+
describe("charStream", () => {
6+
let mockFullStop: jest.Mock;
7+
8+
async function getCharGenerator(chars: string[]) {
9+
return (async function* () {
10+
for (const char of chars) {
11+
yield char;
12+
}
13+
})();
14+
}
15+
16+
async function getFilteredChars(results: AsyncGenerator<string>) {
17+
const output = [];
18+
for await (const char of results) {
19+
output.push(char);
20+
}
21+
return output;
22+
}
23+
24+
beforeEach(() => {
25+
mockFullStop = jest.fn();
26+
});
27+
28+
describe("onlyWhitespaceAfterEndOfLine", () => {
29+
const endOfLineChar = Typescript.endOfLine[0];
30+
31+
it("should stop at end of line if non-whitespace follows", async () => {
32+
const charGenerator = await getCharGenerator([
33+
`Hello${endOfLineChar}World`,
34+
]);
35+
36+
const result = charStream.onlyWhitespaceAfterEndOfLine(
37+
charGenerator,
38+
[endOfLineChar],
39+
mockFullStop,
40+
);
41+
const filteredChars = await getFilteredChars(result);
42+
43+
expect(filteredChars.join("")).toBe(`Hello${endOfLineChar}`);
44+
expect(mockFullStop).toHaveBeenCalledTimes(1);
45+
});
46+
47+
it("should continue past end of line if only whitespace follows", async () => {
48+
const charGenerator = await getCharGenerator([
49+
`Hello${endOfLineChar} World`,
50+
]);
51+
const result = charStream.onlyWhitespaceAfterEndOfLine(
52+
charGenerator,
53+
[endOfLineChar],
54+
mockFullStop,
55+
);
56+
const filteredChars = await getFilteredChars(result);
57+
expect(filteredChars.join("")).toBe(`Hello${endOfLineChar} World`);
58+
expect(mockFullStop).not.toHaveBeenCalled();
59+
});
60+
61+
it("should handle end of line at the end of chunk", async () => {
62+
const charGenerator = await getCharGenerator([
63+
`Hello${endOfLineChar}`,
64+
"World",
65+
]);
66+
const result = charStream.onlyWhitespaceAfterEndOfLine(
67+
charGenerator,
68+
[endOfLineChar],
69+
mockFullStop,
70+
);
71+
const filteredChars = await getFilteredChars(result);
72+
expect(filteredChars.join("")).toBe(`Hello${endOfLineChar}`);
73+
expect(mockFullStop).toHaveBeenCalledTimes(1);
74+
});
75+
});
76+
77+
describe("noFirstCharNewline", () => {
78+
it("should remove leading newline", async () => {
79+
const charGenerator = await getCharGenerator(["\nHello"]);
80+
const result = charStream.noFirstCharNewline(charGenerator);
81+
const filteredChars = await getFilteredChars(result);
82+
expect(filteredChars.join("")).toBe("");
83+
});
84+
85+
it("should keep content if no leading newline", async () => {
86+
const charGenerator = await getCharGenerator(["Hello\nWorld"]);
87+
const result = charStream.noFirstCharNewline(charGenerator);
88+
const filteredChars = await getFilteredChars(result);
89+
expect(filteredChars.join("")).toBe("Hello\nWorld");
90+
});
91+
92+
it("should remove leading carriage return", async () => {
93+
const charGenerator = await getCharGenerator(["\rHello"]);
94+
const result = charStream.noFirstCharNewline(charGenerator);
95+
const filteredChars = await getFilteredChars(result);
96+
expect(filteredChars.join("")).toBe("");
97+
});
98+
});
99+
100+
describe("stopAtStopTokens", () => {
101+
it("should stop at the first occurrence of a stop token", async () => {
102+
const charGenerator = await getCharGenerator(["Hello<|endoftext|>World"]);
103+
const result = charStream.stopAtStopTokens(charGenerator, [
104+
"<|endoftext|>",
105+
]);
106+
const filteredChars = await getFilteredChars(result);
107+
expect(filteredChars.join("")).toBe("Hello");
108+
});
109+
110+
it("should return all content if no stop tokens are provided", async () => {
111+
const charGenerator = await getCharGenerator(["Hello<|endoftext|>World"]);
112+
const result = charStream.stopAtStopTokens(charGenerator, []);
113+
const filteredChars = await getFilteredChars(result);
114+
expect(filteredChars.join("")).toBe("Hello<|endoftext|>World");
115+
});
116+
117+
it("should handle stop tokens that span multiple chunks", async () => {
118+
const charGenerator = await getCharGenerator([
119+
"Hello<|",
120+
"endoftext|>World",
121+
]);
122+
const result = charStream.stopAtStopTokens(charGenerator, [
123+
"<|endoftext|>",
124+
]);
125+
const filteredChars = await getFilteredChars(result);
126+
expect(filteredChars.join("")).toBe("Hello");
127+
});
128+
129+
it("should yield remaining characters in buffer if no stop token is found", async () => {
130+
const charGenerator = await getCharGenerator(["Hello", "World"]);
131+
const result = charStream.stopAtStopTokens(charGenerator, [
132+
"<|endoftext|>",
133+
]);
134+
const filteredChars = await getFilteredChars(result);
135+
expect(filteredChars.join("")).toBe("HelloWorld");
136+
});
137+
});
138+
});

core/autocomplete/charStream.ts

+32
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,24 @@
1+
/**
2+
* Asynchronous generator that yields characters from the input stream until it encounters
3+
* an end-of-line character followed by a non-whitespace character.
4+
*
5+
* @param {AsyncGenerator<string>} stream - The input stream of characters.
6+
* @param {string[]} endOfLine - An array of characters considered as end-of-line markers.
7+
* @param {() => void} fullStop - A function to be called when the generator stops.
8+
* @yields {string} Characters from the input stream.
9+
* @returns {AsyncGenerator<string>} An async generator that yields characters.
10+
*/
111
export async function* onlyWhitespaceAfterEndOfLine(
212
stream: AsyncGenerator<string>,
313
endOfLine: string[],
414
fullStop: () => void,
515
): AsyncGenerator<string> {
616
let pending = "";
17+
718
for await (let chunk of stream) {
819
chunk = pending + chunk;
920
pending = "";
21+
1022
for (let i = 0; i < chunk.length - 1; i++) {
1123
if (
1224
endOfLine.includes(chunk[i]) &&
@@ -17,6 +29,7 @@ export async function* onlyWhitespaceAfterEndOfLine(
1729
return;
1830
}
1931
}
32+
2033
if (endOfLine.includes(chunk[chunk.length - 1])) {
2134
pending = chunk[chunk.length - 1];
2235
yield chunk.slice(0, chunk.length - 1);
@@ -27,6 +40,11 @@ export async function* onlyWhitespaceAfterEndOfLine(
2740
yield pending;
2841
}
2942

43+
/**
44+
* Yields characters from the stream, stopping if the first character is a newline.
45+
* @param {AsyncGenerator<string>} stream - The input character stream.
46+
* @yields {string} Characters from the stream.
47+
*/
3048
export async function* noFirstCharNewline(stream: AsyncGenerator<string>) {
3149
let first = true;
3250
for await (const char of stream) {
@@ -40,6 +58,20 @@ export async function* noFirstCharNewline(stream: AsyncGenerator<string>) {
4058
}
4159
}
4260

61+
/**
62+
* Asynchronously yields characters from the input stream, stopping if a stop token is encountered.
63+
*
64+
* @param {AsyncGenerator<string>} stream - The input stream of characters.
65+
* @param {string[]} stopTokens - Array of tokens that signal when to stop yielding.
66+
* @yields {string} Characters from the input stream.
67+
* @returns {AsyncGenerator<string>} An async generator that yields characters until a stop condition is met.
68+
* @description
69+
* 1. If no stop tokens are provided, yields all characters from the stream.
70+
* 2. Otherwise, buffers incoming chunks and checks for stop tokens.
71+
* 3. Yields characters one by one if no stop token is found at the start of the buffer.
72+
* 4. Stops yielding and returns if a stop token is encountered.
73+
* 5. After the stream ends, yields any remaining buffered characters.
74+
*/
4375
export async function* stopAtStopTokens(
4476
stream: AsyncGenerator<string>,
4577
stopTokens: string[],

0 commit comments

Comments
 (0)