Skip to content

Commit 6c8f035

Browse files
authored
Merge pull request #4170 from continuedev/dallin/docs-issues
Docs Fixes and Embeddings Provider Test Run
2 parents f825183 + 06166cb commit 6c8f035

File tree

3 files changed

+82
-58
lines changed

3 files changed

+82
-58
lines changed

core/indexing/docs/DocsService.ts

+55-50
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,8 @@ export default class DocsService {
157157

158158
const currentStatus = this.statuses.get(doc.startUrl);
159159
if (currentStatus) {
160-
return currentStatus;
160+
this.handleStatusUpdate(currentStatus);
161+
return;
161162
}
162163

163164
const sharedStatus = {
@@ -211,11 +212,7 @@ export default class DocsService {
211212
if (isAborted) {
212213
return true;
213214
}
214-
// // Handle indexing disabled change mid-indexing
215-
// if (this.config.disableIndexing) {
216-
// this.abort(startUrl);
217-
// return true;
218-
// }
215+
219216
// Handle embeddings provider change mid-indexing
220217
if (this.config.embeddingsProvider.embeddingId !== startedWithEmbedder) {
221218
this.abort(startUrl);
@@ -277,10 +274,6 @@ export default class DocsService {
277274
const oldConfig = this.config;
278275
this.config = newConfig; // IMPORTANT - need to set up top, other methods below use this without passing it in
279276

280-
// if (this.config.disableIndexing) {
281-
// return;
282-
// }
283-
284277
// No point in indexing if no docs context provider
285278
const hasDocsContextProvider = this.hasDocsContextProvider();
286279
if (!hasDocsContextProvider) {
@@ -368,25 +361,25 @@ export default class DocsService {
368361
siteIndexingConfig: SiteIndexingConfig,
369362
forceReindex: boolean = false,
370363
): Promise<void> {
371-
// if (this.config.disableIndexing) {
372-
// console.warn("Attempting to add/index docs when indexing is disabled");
373-
// return;
374-
// }
375364
const { startUrl, useLocalCrawling, maxDepth } = siteIndexingConfig;
376365

366+
// First, if indexing is already in process, don't attempt
367+
// This queue is necessary because indexAndAdd is invoked circularly by config edits
368+
// TODO shouldn't really be a gap between adding and checking in queue but probably fine
369+
if (this.docsIndexingQueue.has(startUrl)) {
370+
return;
371+
}
372+
377373
const { isPreindexed, provider } =
378374
await this.getEmbeddingsProvider(startUrl);
379375
if (isPreindexed) {
380376
console.warn("Attempted to indexAndAdd pre-indexed doc");
381377
return;
382378
}
383-
384-
// Queue - indexAndAdd is invoked circularly by config edits. This prevents duplicate runs
385-
if (this.docsIndexingQueue.has(startUrl)) {
386-
return;
387-
}
388-
389379
const startedWithEmbedder = provider.embeddingId;
380+
381+
// Check if doc has been successfully indexed with the given embedder
382+
// Note at this point we know it's not a pre-indexed doc
390383
const indexExists = await this.hasMetadata(startUrl);
391384

392385
// Build status update - most of it is fixed values
@@ -404,22 +397,6 @@ export default class DocsService {
404397
url: siteIndexingConfig.startUrl,
405398
};
406399

407-
// Clear current indexes if reIndexing
408-
if (indexExists) {
409-
if (forceReindex) {
410-
await this.deleteIndexes(startUrl);
411-
} else {
412-
this.handleStatusUpdate({
413-
...fixedStatus,
414-
progress: 1,
415-
description: "Complete",
416-
status: "complete",
417-
debugInfo: "Already indexed",
418-
});
419-
return;
420-
}
421-
}
422-
423400
// If not force-reindexing and has failed with same config, don't reattempt
424401
if (!forceReindex) {
425402
const globalContext = new GlobalContext();
@@ -441,6 +418,31 @@ export default class DocsService {
441418
}
442419
}
443420

421+
if (indexExists && !forceReindex) {
422+
this.handleStatusUpdate({
423+
...fixedStatus,
424+
progress: 1,
425+
description: "Complete",
426+
status: "complete",
427+
debugInfo: "Already indexed",
428+
});
429+
return;
430+
}
431+
432+
// Do a test run on the embedder
433+
// This particular failure will not mark as a failed config in global context
434+
// Since SiteIndexingConfig is likely to be valid
435+
try {
436+
await provider.embed(["continue-test-run"]);
437+
} catch (e) {
438+
console.error("Failed to test embeddings connection", e);
439+
void this.ide.showToast(
440+
"error",
441+
"Failed to test embeddings connection. check your embeddings model configuration",
442+
);
443+
return;
444+
}
445+
444446
const markFailedInGlobalContext = () => {
445447
const globalContext = new GlobalContext();
446448
const failedDocs = globalContext.get("failedDocs") ?? [];
@@ -463,6 +465,11 @@ export default class DocsService {
463465
try {
464466
this.docsIndexingQueue.add(startUrl);
465467

468+
// Clear current indexes if reIndexing
469+
if (indexExists && forceReindex) {
470+
await this.deleteIndexes(startUrl);
471+
}
472+
466473
this.addToConfig(siteIndexingConfig);
467474

468475
this.handleStatusUpdate({
@@ -564,9 +571,10 @@ export default class DocsService {
564571
});
565572

566573
try {
567-
const subpathEmbeddings = await provider.embed(
568-
article.chunks.map((c) => c.content),
569-
);
574+
const subpathEmbeddings =
575+
article.chunks.length > 0
576+
? await provider.embed(article.chunks.map((c) => c.content))
577+
: [];
570578
chunks.push(...article.chunks);
571579
embeddings.push(...subpathEmbeddings);
572580
const toWait = 100 * this.docsIndexingQueue.size + 50;
@@ -592,7 +600,6 @@ export default class DocsService {
592600
});
593601

594602
void this.ide.showToast("info", `Failed to index ${startUrl}`);
595-
this.docsIndexingQueue.delete(startUrl);
596603
markFailedInGlobalContext();
597604
return;
598605
}
@@ -635,11 +642,6 @@ export default class DocsService {
635642
favicon,
636643
});
637644

638-
this.docsIndexingQueue.delete(startUrl);
639-
640-
if (this.shouldCancel(startUrl, startedWithEmbedder)) {
641-
return;
642-
}
643645
this.handleStatusUpdate({
644646
...fixedStatus,
645647
description: "Complete",
@@ -655,7 +657,11 @@ export default class DocsService {
655657

656658
removeFromFailedGlobalContext();
657659
} catch (e) {
658-
let description = `Error getting docs from: ${siteIndexingConfig.startUrl}`;
660+
console.error(
661+
`Error indexing docs at: ${siteIndexingConfig.startUrl}`,
662+
e,
663+
);
664+
let description = `Error indexing docs at: ${siteIndexingConfig.startUrl}`;
659665
if (e instanceof Error) {
660666
if (
661667
e.message.includes("github.com") &&
@@ -664,7 +670,6 @@ export default class DocsService {
664670
description = "Github rate limit exceeded"; // This text is used verbatim elsewhere
665671
}
666672
}
667-
console.error("Error indexing docs", e);
668673
this.handleStatusUpdate({
669674
...fixedStatus,
670675
description,
@@ -916,7 +921,7 @@ export default class DocsService {
916921
// Anything found in old config, new config, AND sqlite that doesn't match should be reindexed
917922
// TODO if only favicon and title change, only update, don't embed
918923
// Otherwise anything found in new config that isn't in sqlite should be added/indexed
919-
const newDocs: SiteIndexingConfig[] = [];
924+
const addedDocs: SiteIndexingConfig[] = [];
920925
const changedDocs: SiteIndexingConfig[] = [];
921926
for (const doc of newConfigDocs) {
922927
const currentIndexedDoc = currentStartUrls.includes(doc.startUrl);
@@ -953,14 +958,14 @@ export default class DocsService {
953958
}
954959
}
955960
} else {
956-
newDocs.push(doc);
961+
addedDocs.push(doc);
957962
void Telemetry.capture("add_docs_config", { url: doc.startUrl });
958963
}
959964
}
960965

961966
await Promise.allSettled([
962967
...changedDocs.map((doc) => this.indexAndAdd(doc, true)),
963-
...newDocs.map((doc) => this.indexAndAdd(doc)),
968+
...addedDocs.map((doc) => this.indexAndAdd(doc)),
964969
]);
965970

966971
for (const doc of deletedDocs) {

gui/src/components/indexing/DocsIndexingStatus.tsx

-1
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,6 @@ function DocsIndexingStatus({ docConfig }: IndexingStatusViewerProps) {
9191

9292
return (
9393
<div className="mt-2 flex w-full flex-col">
94-
{/* {`${status.type} - ${status.id} - ${status.status} - ${status.progress} - ${status.description} - ${status.icon}`} */}
9594
<div
9695
className={`flex flex-row items-center justify-between gap-2 text-sm`}
9796
>

gui/src/components/indexing/DocsIndexingStatuses.tsx

+27-7
Original file line numberDiff line numberDiff line change
@@ -7,27 +7,47 @@ import { useAppSelector } from "../../redux/hooks";
77
import { useContext, useMemo } from "react";
88
import { ExclamationTriangleIcon } from "@heroicons/react/24/outline";
99
import { IdeMessengerContext } from "../../context/IdeMessenger";
10+
import { IndexingStatus } from "core";
1011

1112
function DocsIndexingStatuses() {
1213
const dispatch = useDispatch();
1314
const config = useAppSelector((store) => store.config.config);
1415
const ideMessenger = useContext(IdeMessengerContext);
16+
const indexingStatuses = useAppSelector(
17+
(store) => store.indexing.indexing.statuses,
18+
);
1519

1620
const hasDocsProvider = useMemo(() => {
1721
return !!config.contextProviders?.some(
1822
(provider) => provider.title === "docs",
1923
);
2024
}, [config]);
2125

22-
const configDocs = useMemo(() => {
23-
return config.docs ?? [];
24-
}, [config]);
26+
// TODO - this might significantly impact performance during indexing
27+
const sortedConfigDocs = useMemo(() => {
28+
const sorter = (status: IndexingStatus["status"]) => {
29+
// TODO - further sorting?
30+
if (status === "indexing" || status === "paused") return 0;
31+
if (status === "failed") return 1;
32+
if (status === "aborted" || status === "pending") return 2;
33+
return 3;
34+
};
35+
36+
const docs = [...(config.docs ?? [])];
37+
docs.sort((a, b) =>
38+
sorter(indexingStatuses[b.startUrl]?.status ?? "pending") >
39+
sorter(indexingStatuses[a.startUrl]?.status ?? "pending")
40+
? -1
41+
: 1,
42+
);
43+
return docs;
44+
}, [config, indexingStatuses]);
2545

2646
return (
2747
<div className="flex flex-col gap-1">
2848
<div className="flex flex-row items-center justify-between">
2949
<h3 className="mb-0 mt-0 text-xl">@docs indexes</h3>
30-
{configDocs.length ? (
50+
{sortedConfigDocs.length ? (
3151
<SecondaryButton
3252
className="!my-0 flex h-7 flex-col items-center justify-center"
3353
onClick={() => {
@@ -41,7 +61,7 @@ function DocsIndexingStatuses() {
4161
</div>
4262
<span className="text-xs text-stone-500">
4363
{hasDocsProvider ? (
44-
configDocs.length ? (
64+
sortedConfigDocs.length ? (
4565
"Manage your documentation sources"
4666
) : (
4767
"No docs yet"
@@ -72,7 +92,7 @@ function DocsIndexingStatuses() {
7292
</span>
7393
<div className="flex max-h-[170px] flex-col gap-1 overflow-y-auto overflow-x-hidden pr-2">
7494
<div>
75-
{configDocs.length === 0 && (
95+
{sortedConfigDocs.length === 0 && (
7696
<SecondaryButton
7797
className="flex h-7 flex-col items-center justify-center"
7898
onClick={() => {
@@ -84,7 +104,7 @@ function DocsIndexingStatuses() {
84104
</SecondaryButton>
85105
)}
86106
</div>
87-
{configDocs.map((doc) => {
107+
{sortedConfigDocs.map((doc) => {
88108
return <DocsIndexingStatus key={doc.startUrl} docConfig={doc} />;
89109
})}
90110
</div>

0 commit comments

Comments
 (0)