Skip to content

Walkdir caching first pass #4587

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Mar 14, 2025
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@
"cwd": "${workspaceFolder}/extensions/vscode",
"args": [
// Pass a directory to manually test in
"${workspaceFolder}/manual-testing-sandbox",
"${workspaceFolder}/manual-testing-sandbox/test.js",
"/Users/dallin/Documents/code/continuedev/continue-extras/react",
"/Users/dallin/Documents/code/continuedev/continue-extras/react/README.md",
"--extensionDevelopmentPath=${workspaceFolder}/extensions/vscode"
],
"pauseForSourceMap": false,
Expand Down
4 changes: 3 additions & 1 deletion core/config/getSystemPromptDotFile.ts
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,9 @@ export async function getAssistantFilesFromDir(
return [];
}

const uris = await walkDir(dir, ide);
const uris = await walkDir(dir, ide, {
source: "get dir assistant files",
});
const assistantFilePaths = uris.filter(
(p) => p.endsWith(".yaml") || p.endsWith(".yml"),
);
Expand Down
10 changes: 8 additions & 2 deletions core/context/providers/FileContextProvider.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@ import {
} from "../../";
import { walkDirs } from "../../indexing/walkDir";
import {
getUriPathBasename,
getShortestUniqueRelativeUriPaths,
getUriDescription,
getUriPathBasename,
} from "../../util/uri";

const MAX_SUBMENU_ITEMS = 10_000;
Expand Down Expand Up @@ -53,7 +53,13 @@ class FileContextProvider extends BaseContextProvider {
args: LoadSubmenuItemsArgs,
): Promise<ContextSubmenuItem[]> {
const workspaceDirs = await args.ide.getWorkspaceDirs();
const results = await walkDirs(args.ide, undefined, workspaceDirs);
const results = await walkDirs(
args.ide,
{
source: "load submenu items - file",
},
workspaceDirs,
);
const files = results.flat().slice(-MAX_SUBMENU_ITEMS);
const withUniquePaths = getShortestUniqueRelativeUriPaths(
files,
Expand Down
4 changes: 3 additions & 1 deletion core/context/providers/FileTreeContextProvider.ts
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,9 @@ class FileTreeContextProvider extends BaseContextProvider {
directories: [],
};

const uris = await walkDir(workspaceDir, extras.ide);
const uris = await walkDir(workspaceDir, extras.ide, {
source: "get context items - file tree",
});
const relativePaths = uris.map(
(uri) => findUriInDirs(uri, [workspaceDir]).relativePathOrBasename,
);
Expand Down
1 change: 1 addition & 0 deletions core/context/providers/FolderContextProvider.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ class FolderContextProvider extends BaseContextProvider {
args.ide,
{
onlyDirs: true,
source: "load submenu items - folder",
},
workspaceDirs,
);
Expand Down
1 change: 1 addition & 0 deletions core/context/providers/RepoMapContextProvider.ts
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ class RepoMapContextProvider extends BaseContextProvider {
args.ide,
{
onlyDirs: true,
source: "load submenu items - repo map",
},
workspaceDirs,
);
Expand Down
7 changes: 5 additions & 2 deletions core/core.ts
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ import {
} from ".";

import { shouldIgnore } from "./indexing/shouldIgnore";
import { walkDirCache } from "./indexing/walkDir";
import type { FromCoreProtocol, ToCoreProtocol } from "./protocol";
import type { IMessenger, Message } from "./protocol/messenger";

Expand Down Expand Up @@ -825,6 +826,7 @@ export class Core {
if (!config || config.disableIndexing) {
return; // TODO silent in case of commands?
}
walkDirCache.invalidate();
if (data?.shouldClearIndexes) {
const codebaseIndexer = await this.codebaseIndexerPromise;
await codebaseIndexer.clearIndexes();
Expand Down Expand Up @@ -852,9 +854,8 @@ export class Core {
// TODO - remove remaining logic for these from IDEs where possible
on("files/changed", async ({ data }) => {
if (data?.uris?.length) {
walkDirCache.invalidate(); // safe approach for now - TODO - only invalidate on relevant changes
for (const uri of data.uris) {
// Listen for file changes in the workspace
// URI TODO is this equality statement valid?
const currentProfileUri =
this.configHandler.currentProfile?.profileDescription.uri ?? "";

Expand Down Expand Up @@ -925,12 +926,14 @@ export class Core {

on("files/created", async ({ data }) => {
if (data?.uris?.length) {
walkDirCache.invalidate();
void refreshIfNotIgnored(data.uris);
}
});

on("files/deleted", async ({ data }) => {
if (data?.uris?.length) {
walkDirCache.invalidate();
void refreshIfNotIgnored(data.uris);
}
});
Expand Down
4 changes: 3 additions & 1 deletion core/indexing/CodebaseIndexer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,9 @@ export class CodebaseIndexer {
status: "indexing",
};
const directoryFiles = [];
for await (const p of walkDirAsync(directory, this.ide)) {
for await (const p of walkDirAsync(directory, this.ide, {
source: "codebase indexing: refresh dirs",
})) {
directoryFiles.push(p);
if (abortSignal.aborted) {
yield {
Expand Down
141 changes: 124 additions & 17 deletions core/indexing/walkDir.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,14 @@ import {
export interface WalkerOptions {
onlyDirs?: boolean;
returnRelativeUrisPaths?: boolean;
source?: string;
}

type Entry = [string, FileType];

const LIST_DIR_CACHE_TIME = 30_000; // 30 seconds
const IGNORE_FILE_CACHE_TIME = 30_000; // 30 seconds

// helper struct used for the DFS walk
type WalkableEntry = {
name: string;
Expand All @@ -36,6 +40,38 @@ type IgnoreContext = {
dirname: string;
};

class WalkDirCache {
dirListCache: Map<
string,
{
time: number;
entries: Promise<[string, FileType][]>;
}
> = new Map();
dirIgnoreCache: Map<
string,
{
time: number;
ignore: Promise<Ignore>;
}
> = new Map();
// The super safe approach for now
invalidate() {
this.dirListCache.clear();
this.dirIgnoreCache.clear();
}
// invalidateIgnore(uri: string) {
// this.dirIgnoreCache.delete(uri);
// }
// invalidateParent(uri: string) {
// const splitUri = fileUri.split("/");
// splitUri.pop();
// const parent = splitUri.join("/");
// this.dirListCache.delete(uri);
// }
}
export const walkDirCache = new WalkDirCache(); // TODO - singleton approach better?

class DFSWalker {
constructor(
private readonly uri: string,
Expand All @@ -45,9 +81,19 @@ class DFSWalker {

// walk is a depth-first search implementation
public async *walk(): AsyncGenerator<string> {
const start = Date.now();
let ignoreFileTime = 0;
let ignoreTime = 0;
let listDirTime = 0;
let dirs = 0;
let listDirCacheHits = 0;
let ignoreCacheHits = 0;

let section = Date.now();
const defaultAndGlobalIgnores = ignore()
.add(defaultIgnoreFileAndDir)
.add(getGlobalContinueIgArray());
ignoreFileTime += Date.now() - section;

const rootContext: WalkContext = {
walkableEntry: {
Expand All @@ -62,15 +108,63 @@ class DFSWalker {
const stack = [rootContext];

for (let cur = stack.pop(); cur; cur = stack.pop()) {
// No caching approach:
// const entries = await this.ide.listDir(cur.walkableEntry.uri);
// const newIgnore = await getIgnoreContext(
// cur.walkableEntry.uri,
// entries,
// this.ide,
// defaultAndGlobalIgnores,
// );

// Only directories will be added to the stack
const entries = await this.ide.listDir(cur.walkableEntry.uri);
dirs++;

section = Date.now();
let entries: [string, FileType][] = [];
const cachedListdir = walkDirCache.dirListCache.get(
cur.walkableEntry.uri,
);
if (
cachedListdir &&
cachedListdir.time > Date.now() - LIST_DIR_CACHE_TIME
) {
entries = await cachedListdir.entries;
listDirCacheHits++;
} else {
const promise = this.ide.listDir(cur.walkableEntry.uri);
walkDirCache.dirListCache.set(cur.walkableEntry.uri, {
time: Date.now(),
entries: promise,
});
entries = await promise;
}
listDirTime += Date.now() - section;

const newIgnore = await getIgnoreContext(
section = Date.now();
let newIgnore: Ignore;
const cachedIgnore = walkDirCache.dirIgnoreCache.get(
cur.walkableEntry.uri,
entries,
this.ide,
defaultAndGlobalIgnores,
);
if (
cachedIgnore &&
cachedIgnore.time > Date.now() - IGNORE_FILE_CACHE_TIME
) {
newIgnore = await cachedIgnore.ignore;
ignoreCacheHits++;
} else {
const ignorePromise = getIgnoreContext(
cur.walkableEntry.uri,
entries,
this.ide,
defaultAndGlobalIgnores,
);
walkDirCache.dirIgnoreCache.set(cur.walkableEntry.uri, {
time: Date.now(),
ignore: ignorePromise,
});
newIgnore = await ignorePromise;
}

const ignoreContexts = [
...cur.ignoreContexts,
Expand All @@ -79,6 +173,7 @@ class DFSWalker {
dirname: cur.walkableEntry.relativeUriPath,
},
];
ignoreFileTime += Date.now() - section;

for (const entry of entries) {
if (this.entryIsSymlink(entry)) {
Expand All @@ -103,19 +198,24 @@ class DFSWalker {
continue;
}
}
let shouldInclude = true;
let shouldIgnore = false;
for (const ig of ignoreContexts) {
if (shouldIgnore) {
continue;
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is another improvement - if one ignore comes out as true, skip the rest.

}
// remove the directory name and path separator from the match path, unless this an ignore file
// in the root directory
const prefixLength =
ig.dirname.length === 0 ? 0 : ig.dirname.length + 1;
// The ignore library expects a path relative to the ignore file location
const matchPath = relPath.substring(prefixLength);
section = Date.now();
if (ig.ignore.ignores(matchPath)) {
shouldInclude = false;
shouldIgnore = true;
}
ignoreTime += Date.now() - section;
}
if (!shouldInclude) {
if (shouldIgnore) {
continue;
}

Expand All @@ -141,6 +241,9 @@ class DFSWalker {
}
}
}
// console.log(
// `Walk Dir Result:\nSource: ${this.options.source ?? "unknown"}\nDir: ${this.uri}\nDuration: ${Date.now() - start}ms:\n\tList dir: ${listDirTime}ms (${listDirCacheHits}/${dirs} cache hits)\n\tIgnore files: ${ignoreFileTime}ms (${ignoreCacheHits}/${dirs} cache hits)\n\tIgnoring: ${ignoreTime}ms`,
// );
}

private entryIsDirectory(entry: Entry) {
Expand All @@ -157,6 +260,15 @@ const defaultOptions: WalkerOptions = {
returnRelativeUrisPaths: false,
};

export async function* walkDirAsync(
path: string,
ide: IDE,
_optionOverrides?: WalkerOptions,
): AsyncGenerator<string> {
const options = { ...defaultOptions, ..._optionOverrides };
yield* new DFSWalker(path, ide, options).walk();
}

export async function walkDir(
uri: string,
ide: IDE,
Expand All @@ -169,15 +281,6 @@ export async function walkDir(
return urisOrRelativePaths;
}

export async function* walkDirAsync(
path: string,
ide: IDE,
_optionOverrides?: WalkerOptions,
): AsyncGenerator<string> {
const options = { ...defaultOptions, ..._optionOverrides };
yield* new DFSWalker(path, ide, options).walk();
}

export async function walkDirs(
ide: IDE,
_optionOverrides?: WalkerOptions,
Expand Down Expand Up @@ -227,6 +330,10 @@ export async function getIgnoreContext(
getContinueIgnorePatterns(),
]);

if (ignoreArrays[0].length === 0 && ignoreArrays[1].length === 0) {
return defaultAndGlobalIgnores;
}

// Note precedence here!
const ignoreContext = ignore()
.add(ignoreArrays[0]) // gitignore
Expand Down
4 changes: 3 additions & 1 deletion core/promptFiles/v2/getPromptFiles.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@ export async function getPromptFilesFromDir(
return [];
}

const uris = await walkDir(dir, ide);
const uris = await walkDir(dir, ide, {
source: "get dir prompt files",
});
const promptFilePaths = uris.filter((p) => p.endsWith(".prompt"));
const results = promptFilePaths.map(async (uri) => {
const content = await ide.readFile(uri); // make a try catch
Expand Down
8 changes: 7 additions & 1 deletion core/util/generateRepoMap.ts
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,13 @@ class RepoMapGenerator {

async generate(): Promise<string> {
this.dirs = this.options.dirUris ?? (await this.ide.getWorkspaceDirs());
this.allUris = await walkDirs(this.ide, undefined, this.dirs);
this.allUris = await walkDirs(
this.ide,
{
source: "generate repo map",
},
this.dirs,
);

// Initialize
await this.writeToStream(this.PREAMBLE);
Expand Down
Loading
Loading