tests: charStream

Patrick-Erichsen · Patrick-Erichsen · commit d05d056b1fcc · 2024-08-16T17:16:33.000-07:00
diff --git a/core/autocomplete/charStream.test.ts b/core/autocomplete/charStream.test.ts
@@ -0,0 +1,138 @@
+import { jest } from "@jest/globals";
+import * as charStream from "./charStream";
+import { Typescript } from "./languages";
+
+describe("charStream", () => {
+  let mockFullStop: jest.Mock;
+
+  async function getCharGenerator(chars: string[]) {
+    return (async function* () {
+      for (const char of chars) {
+        yield char;
+      }
+    })();
+  }
+
+  async function getFilteredChars(results: AsyncGenerator<string>) {
+    const output = [];
+    for await (const char of results) {
+      output.push(char);
+    }
+    return output;
+  }
+
+  beforeEach(() => {
+    mockFullStop = jest.fn();
+  });
+
+  describe("onlyWhitespaceAfterEndOfLine", () => {
+    const endOfLineChar = Typescript.endOfLine[0];
+
+    it("should stop at end of line if non-whitespace follows", async () => {
+      const charGenerator = await getCharGenerator([
+        `Hello${endOfLineChar}World`,
+      ]);
+
+      const result = charStream.onlyWhitespaceAfterEndOfLine(
+        charGenerator,
+        [endOfLineChar],
+        mockFullStop,
+      );
+      const filteredChars = await getFilteredChars(result);
+
+      expect(filteredChars.join("")).toBe(`Hello${endOfLineChar}`);
+      expect(mockFullStop).toHaveBeenCalledTimes(1);
+    });
+
+    it("should continue past end of line if only whitespace follows", async () => {
+      const charGenerator = await getCharGenerator([
+        `Hello${endOfLineChar}  World`,
+      ]);
+      const result = charStream.onlyWhitespaceAfterEndOfLine(
+        charGenerator,
+        [endOfLineChar],
+        mockFullStop,
+      );
+      const filteredChars = await getFilteredChars(result);
+      expect(filteredChars.join("")).toBe(`Hello${endOfLineChar}  World`);
+      expect(mockFullStop).not.toHaveBeenCalled();
+    });
+
+    it("should handle end of line at the end of chunk", async () => {
+      const charGenerator = await getCharGenerator([
+        `Hello${endOfLineChar}`,
+        "World",
+      ]);
+      const result = charStream.onlyWhitespaceAfterEndOfLine(
+        charGenerator,
+        [endOfLineChar],
+        mockFullStop,
+      );
+      const filteredChars = await getFilteredChars(result);
+      expect(filteredChars.join("")).toBe(`Hello${endOfLineChar}`);
+      expect(mockFullStop).toHaveBeenCalledTimes(1);
+    });
+  });
+
+  describe("noFirstCharNewline", () => {
+    it("should remove leading newline", async () => {
+      const charGenerator = await getCharGenerator(["\nHello"]);
+      const result = charStream.noFirstCharNewline(charGenerator);
+      const filteredChars = await getFilteredChars(result);
+      expect(filteredChars.join("")).toBe("");
+    });
+
+    it("should keep content if no leading newline", async () => {
+      const charGenerator = await getCharGenerator(["Hello\nWorld"]);
+      const result = charStream.noFirstCharNewline(charGenerator);
+      const filteredChars = await getFilteredChars(result);
+      expect(filteredChars.join("")).toBe("Hello\nWorld");
+    });
+
+    it("should remove leading carriage return", async () => {
+      const charGenerator = await getCharGenerator(["\rHello"]);
+      const result = charStream.noFirstCharNewline(charGenerator);
+      const filteredChars = await getFilteredChars(result);
+      expect(filteredChars.join("")).toBe("");
+    });
+  });
+
+  describe("stopAtStopTokens", () => {
+    it("should stop at the first occurrence of a stop token", async () => {
+      const charGenerator = await getCharGenerator(["Hello<|endoftext|>World"]);
+      const result = charStream.stopAtStopTokens(charGenerator, [
+        "<|endoftext|>",
+      ]);
+      const filteredChars = await getFilteredChars(result);
+      expect(filteredChars.join("")).toBe("Hello");
+    });
+
+    it("should return all content if no stop tokens are provided", async () => {
+      const charGenerator = await getCharGenerator(["Hello<|endoftext|>World"]);
+      const result = charStream.stopAtStopTokens(charGenerator, []);
+      const filteredChars = await getFilteredChars(result);
+      expect(filteredChars.join("")).toBe("Hello<|endoftext|>World");
+    });
+
+    it("should handle stop tokens that span multiple chunks", async () => {
+      const charGenerator = await getCharGenerator([
+        "Hello<|",
+        "endoftext|>World",
+      ]);
+      const result = charStream.stopAtStopTokens(charGenerator, [
+        "<|endoftext|>",
+      ]);
+      const filteredChars = await getFilteredChars(result);
+      expect(filteredChars.join("")).toBe("Hello");
+    });
+
+    it("should yield remaining characters in buffer if no stop token is found", async () => {
+      const charGenerator = await getCharGenerator(["Hello", "World"]);
+      const result = charStream.stopAtStopTokens(charGenerator, [
+        "<|endoftext|>",
+      ]);
+      const filteredChars = await getFilteredChars(result);
+      expect(filteredChars.join("")).toBe("HelloWorld");
+    });
+  });
+});
diff --git a/core/autocomplete/charStream.ts b/core/autocomplete/charStream.ts
@@ -1,12 +1,24 @@
+/**
+ * Asynchronous generator that yields characters from the input stream until it encounters
+ * an end-of-line character followed by a non-whitespace character.
+ *
+ * @param {AsyncGenerator<string>} stream - The input stream of characters.
+ * @param {string[]} endOfLine - An array of characters considered as end-of-line markers.
+ * @param {() => void} fullStop - A function to be called when the generator stops.
+ * @yields {string} Characters from the input stream.
+ * @returns {AsyncGenerator<string>} An async generator that yields characters.
+ */
 export async function* onlyWhitespaceAfterEndOfLine(
   stream: AsyncGenerator<string>,
   endOfLine: string[],
   fullStop: () => void,
 ): AsyncGenerator<string> {
   let pending = "";
+
   for await (let chunk of stream) {
     chunk = pending + chunk;
     pending = "";
+
     for (let i = 0; i < chunk.length - 1; i++) {
       if (
         endOfLine.includes(chunk[i]) &&
@@ -17,6 +29,7 @@ export async function* onlyWhitespaceAfterEndOfLine(
         return;
       }
     }
+
     if (endOfLine.includes(chunk[chunk.length - 1])) {
       pending = chunk[chunk.length - 1];
       yield chunk.slice(0, chunk.length - 1);
@@ -27,6 +40,11 @@ export async function* onlyWhitespaceAfterEndOfLine(
   yield pending;
 }
 
+/**
+ * Yields characters from the stream, stopping if the first character is a newline.
+ * @param {AsyncGenerator<string>} stream - The input character stream.
+ * @yields {string} Characters from the stream.
+ */
 export async function* noFirstCharNewline(stream: AsyncGenerator<string>) {
   let first = true;
   for await (const char of stream) {
@@ -40,6 +58,20 @@ export async function* noFirstCharNewline(stream: AsyncGenerator<string>) {
   }
 }
 
+/**
+ * Asynchronously yields characters from the input stream, stopping if a stop token is encountered.
+ *
+ * @param {AsyncGenerator<string>} stream - The input stream of characters.
+ * @param {string[]} stopTokens - Array of tokens that signal when to stop yielding.
+ * @yields {string} Characters from the input stream.
+ * @returns {AsyncGenerator<string>} An async generator that yields characters until a stop condition is met.
+ * @description
+ * 1. If no stop tokens are provided, yields all characters from the stream.
+ * 2. Otherwise, buffers incoming chunks and checks for stop tokens.
+ * 3. Yields characters one by one if no stop token is found at the start of the buffer.
+ * 4. Stops yielding and returns if a stop token is encountered.
+ * 5. After the stream ends, yields any remaining buffered characters.
+ */
 export async function* stopAtStopTokens(
   stream: AsyncGenerator<string>,
   stopTokens: string[],