Skip to content

Refresh index fixes #1795

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jul 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions core/indexing/CodebaseIndexer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import { FullTextSearchCodebaseIndex } from "./FullTextSearch.js";
import { LanceDbIndex } from "./LanceDbIndex.js";
import { ChunkCodebaseIndex } from "./chunk/ChunkCodebaseIndex.js";
import { getComputeDeleteAddRemove } from "./refreshIndex.js";
import { CodebaseIndex } from "./types.js";
import { CodebaseIndex, IndexResultType } from "./types.js";
import { walkDir } from "./walkDir.js";

export class PauseToken {
Expand Down Expand Up @@ -112,7 +112,7 @@ export class CodebaseIndexer {
branch,
artifactId: codebaseIndex.artifactId,
};
const [results, markComplete] = await getComputeDeleteAddRemove(
const [results, lastUpdated, markComplete] = await getComputeDeleteAddRemove(
tag,
{ ...stats },
(filepath) => this.ide.readFile(filepath),
Expand Down Expand Up @@ -159,6 +159,10 @@ export class CodebaseIndexer {
};
}

lastUpdated.forEach((lastUpdated, path) => {
markComplete([lastUpdated], IndexResultType.UpdateLastUpdated);
});

completedRelativeExpectedTime += codebaseIndex.relativeExpectedTime;
yield {
progress:
Expand Down
41 changes: 31 additions & 10 deletions core/indexing/chunk/ChunkCodebaseIndex.ts
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,24 @@ export class ChunkCodebaseIndex implements CodebaseIndex {
handleChunk(chunk);
}

accumulatedProgress =
(i / results.compute.length) * (1 - progressReservedForTagging);
yield {
progress: accumulatedProgress,
desc: `Chunking ${getBasename(item.path)}`,
status: "indexing",
};
markComplete([item], IndexResultType.Compute);
// Insert chunks
for await (const chunk of chunkDocument(
item.path,
contents[i],
this.maxChunkSize,
item.cacheKey,
)) {
handleChunk(chunk);
}

accumulatedProgress =
(i / results.compute.length) * (1 - progressReservedForTagging);
yield {
Expand All @@ -125,17 +143,20 @@ export class ChunkCodebaseIndex implements CodebaseIndex {
}

// Add tag
for (const item of results.addTag) {
const chunksWithPath = await db.all(
"SELECT * FROM chunks WHERE cacheKey = ?",
[item.cacheKey],
);
const addContents = await Promise.all(
results.addTag.map(({ path }) => this.readFile(path)),
);
for (let i = 0; i < results.addTag.length; i++) {
const item = results.addTag[i];

for (const chunk of chunksWithPath) {
await db.run("INSERT INTO chunk_tags (chunkId, tag) VALUES (?, ?)", [
chunk.id,
tagString,
]);
// Insert chunks
for await (const chunk of chunkDocument(
item.path,
addContents[i],
this.maxChunkSize,
item.cacheKey,
)) {
handleChunk(chunk);
}

markComplete([item], IndexResultType.AddTag);
Expand Down
153 changes: 119 additions & 34 deletions core/indexing/refreshIndex.ts
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,36 @@ export class SqliteDb {
artifactId STRING NOT NULL
)`,
);
// Delete duplicate rows from tag_catalog
await db.exec(`
DELETE FROM tag_catalog
WHERE id NOT IN (
SELECT MIN(id)
FROM tag_catalog
GROUP BY dir, branch, artifactId, path, cacheKey
)
`);

// Delete duplicate rows from global_cache
await db.exec(`
DELETE FROM global_cache
WHERE id NOT IN (
SELECT MIN(id)
FROM global_cache
GROUP BY cacheKey, dir, branch, artifactId
)
`);

// Add unique constraints if they don't exist
await db.exec(
`CREATE UNIQUE INDEX IF NOT EXISTS idx_tag_catalog_unique
ON tag_catalog(dir, branch, artifactId, path, cacheKey)`,
);

await db.exec(
`CREATE UNIQUE INDEX IF NOT EXISTS idx_global_cache_unique
ON global_cache(cacheKey, dir, branch, artifactId)`,
);
}

private static indexSqlitePath = getIndexSqlitePath();
Expand Down Expand Up @@ -90,13 +120,22 @@ enum AddRemoveResultType {
Remove = "remove",
UpdateNewVersion = "updateNewVersion",
UpdateOldVersion = "updateOldVersion",
UpdateLastUpdated = "updateLastUpdated",
Compute = "compute"
}

async function getAddRemoveForTag(
tag: IndexTag,
currentFiles: LastModifiedMap,
readFile: (path: string) => Promise<string>,
): Promise<[PathAndCacheKey[], PathAndCacheKey[], MarkCompleteCallback]> {
): Promise<
[
PathAndCacheKey[],
PathAndCacheKey[],
PathAndCacheKey[],
MarkCompleteCallback,
]
> {
const newLastUpdatedTimestamp = Date.now();

const saved = await getSavedItemsForTag(tag);
Expand All @@ -105,6 +144,7 @@ async function getAddRemoveForTag(
const updateNewVersion: PathAndCacheKey[] = [];
const updateOldVersion: PathAndCacheKey[] = [];
const remove: PathAndCacheKey[] = [];
const updateLastUpdated: PathAndCacheKey[] = [];

for (const item of saved) {
const { lastUpdated, ...pathAndCacheKey } = item;
Expand All @@ -116,11 +156,16 @@ async function getAddRemoveForTag(
// Exists in old and new, so determine whether it was updated
if (lastUpdated < currentFiles[item.path]) {
// Change was made after last update
updateNewVersion.push({
path: pathAndCacheKey.path,
cacheKey: calculateHash(await readFile(pathAndCacheKey.path)),
});
updateOldVersion.push(pathAndCacheKey);
const newHash = calculateHash(await readFile(pathAndCacheKey.path));
if (pathAndCacheKey.cacheKey !== newHash) {
updateNewVersion.push({
path: pathAndCacheKey.path,
cacheKey: newHash,
});
updateOldVersion.push(pathAndCacheKey);
} else {
updateLastUpdated.push(pathAndCacheKey);
}
} else {
// Already updated, do nothing
}
Expand All @@ -143,18 +188,43 @@ async function getAddRemoveForTag(
// Create the markComplete callback function
const db = await SqliteDb.get();
const itemToAction: {
[key: string]: [PathAndCacheKey, AddRemoveResultType];
} = {};

async function markComplete(items: PathAndCacheKey[], _: IndexResultType) {
const actions = items.map(
(item) =>
itemToAction[
JSON.stringify({ path: item.path, cacheKey: item.cacheKey })
],
);
for (const [{ path, cacheKey }, resultType] of actions) {
switch (resultType) {
[key in AddRemoveResultType]: PathAndCacheKey[];
} = {
[AddRemoveResultType.Add]: [],
[AddRemoveResultType.Remove]: [],
[AddRemoveResultType.UpdateNewVersion]: [],
[AddRemoveResultType.UpdateOldVersion]: [],
[AddRemoveResultType.UpdateLastUpdated]: [],
[AddRemoveResultType.Compute]: [],
};

async function markComplete(
items: PathAndCacheKey[],
resultType: IndexResultType,
) {
const addRemoveResultType =
mapIndexResultTypeToAddRemoveResultType(resultType);

const actionItems = itemToAction[addRemoveResultType];
if (!actionItems) {
console.warn(`No action items found for result type: ${resultType}`);
return;
}

for (const item of items) {
const { path, cacheKey } = item;
switch (addRemoveResultType) {
case AddRemoveResultType.Compute:
await db.run(
"REPLACE INTO tag_catalog (path, cacheKey, lastUpdated, dir, branch, artifactId) VALUES (?, ?, ?, ?, ?, ?)",
path,
cacheKey,
newLastUpdatedTimestamp,
tag.directory,
tag.branch,
tag.artifactId,
);
break;
case AddRemoveResultType.Add:
await db.run(
"INSERT INTO tag_catalog (path, cacheKey, lastUpdated, dir, branch, artifactId) VALUES (?, ?, ?, ?, ?, ?)",
Expand Down Expand Up @@ -182,6 +252,7 @@ async function getAddRemoveForTag(
tag.artifactId,
);
break;
case AddRemoveResultType.UpdateLastUpdated:
case AddRemoveResultType.UpdateNewVersion:
await db.run(
`UPDATE tag_catalog SET
Expand All @@ -208,27 +279,22 @@ async function getAddRemoveForTag(
}

for (const item of updateNewVersion) {
itemToAction[JSON.stringify(item)] = [
item,
AddRemoveResultType.UpdateNewVersion,
];
itemToAction[AddRemoveResultType.UpdateNewVersion].push(item);
}
for (const item of add) {
itemToAction[JSON.stringify(item)] = [item, AddRemoveResultType.Add];
itemToAction[AddRemoveResultType.Add].push(item);
}
for (const item of updateOldVersion) {
itemToAction[JSON.stringify(item)] = [
item,
AddRemoveResultType.UpdateOldVersion,
];
itemToAction[AddRemoveResultType.UpdateOldVersion].push(item);
}
for (const item of remove) {
itemToAction[JSON.stringify(item)] = [item, AddRemoveResultType.Remove];
itemToAction[AddRemoveResultType.Remove].push(item);
}

return [
[...add, ...updateNewVersion],
[...remove, ...updateOldVersion],
updateLastUpdated,
markComplete,
];
}
Expand All @@ -255,13 +321,31 @@ function calculateHash(fileContents: string): string {
return hash.digest("hex");
}

function mapIndexResultTypeToAddRemoveResultType(
resultType: IndexResultType,
): AddRemoveResultType {
switch (resultType) {
case "updateLastUpdated":
return AddRemoveResultType.UpdateLastUpdated;
case "compute":
return AddRemoveResultType.Compute;
case "addTag":
return AddRemoveResultType.Add;
case "del":
case "removeTag":
return AddRemoveResultType.Remove;
default:
throw new Error(`Unexpected result type: ${resultType}`);
}
}

export async function getComputeDeleteAddRemove(
tag: IndexTag,
currentFiles: LastModifiedMap,
readFile: (path: string) => Promise<string>,
repoName: string | undefined,
): Promise<[RefreshIndexResults, MarkCompleteCallback]> {
const [add, remove, markComplete] = await getAddRemoveForTag(
): Promise<[RefreshIndexResults, PathAndCacheKey[], MarkCompleteCallback]> {
const [add, remove, lastUpdated, markComplete] = await getAddRemoveForTag(
tag,
currentFiles,
readFile,
Expand Down Expand Up @@ -305,6 +389,7 @@ export async function getComputeDeleteAddRemove(

return [
results,
lastUpdated,
async (items, resultType) => {
// Update tag catalog
markComplete(items, resultType);
Expand Down Expand Up @@ -347,15 +432,15 @@ export class GlobalCacheCodeBaseIndex implements CodebaseIndex {
_: MarkCompleteCallback,
repoName: string | undefined,
): AsyncGenerator<IndexingProgressUpdate> {
const add = [...results.compute, ...results.addTag];
const add = results.addTag;
const remove = [...results.del, ...results.removeTag];
await Promise.all([
...add.map(({ cacheKey }) => {
return this.computeOrAddTag(cacheKey, tag);
}),
...remove.map(({ cacheKey }) => {
return this.deleteOrRemoveTag(cacheKey, tag);
}),
...add.map(({ cacheKey }) => {
return this.computeOrAddTag(cacheKey, tag);
}),
]);
yield { progress: 1, desc: "Done updating global cache", status: "done" };
}
Expand Down
1 change: 1 addition & 0 deletions core/indexing/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ export enum IndexResultType {
Delete = "del",
AddTag = "addTag",
RemoveTag = "removeTag",
UpdateLastUpdated = "updateLastUpdated"
}

export type MarkCompleteCallback = (
Expand Down
Loading