@@ -157,7 +157,8 @@ export default class DocsService {
157
157
158
158
const currentStatus = this . statuses . get ( doc . startUrl ) ;
159
159
if ( currentStatus ) {
160
- return currentStatus ;
160
+ this . handleStatusUpdate ( currentStatus ) ;
161
+ return ;
161
162
}
162
163
163
164
const sharedStatus = {
@@ -211,11 +212,7 @@ export default class DocsService {
211
212
if ( isAborted ) {
212
213
return true ;
213
214
}
214
- // // Handle indexing disabled change mid-indexing
215
- // if (this.config.disableIndexing) {
216
- // this.abort(startUrl);
217
- // return true;
218
- // }
215
+
219
216
// Handle embeddings provider change mid-indexing
220
217
if ( this . config . embeddingsProvider . embeddingId !== startedWithEmbedder ) {
221
218
this . abort ( startUrl ) ;
@@ -277,10 +274,6 @@ export default class DocsService {
277
274
const oldConfig = this . config ;
278
275
this . config = newConfig ; // IMPORTANT - need to set up top, other methods below use this without passing it in
279
276
280
- // if (this.config.disableIndexing) {
281
- // return;
282
- // }
283
-
284
277
// No point in indexing if no docs context provider
285
278
const hasDocsContextProvider = this . hasDocsContextProvider ( ) ;
286
279
if ( ! hasDocsContextProvider ) {
@@ -368,25 +361,25 @@ export default class DocsService {
368
361
siteIndexingConfig : SiteIndexingConfig ,
369
362
forceReindex : boolean = false ,
370
363
) : Promise < void > {
371
- // if (this.config.disableIndexing) {
372
- // console.warn("Attempting to add/index docs when indexing is disabled");
373
- // return;
374
- // }
375
364
const { startUrl, useLocalCrawling, maxDepth } = siteIndexingConfig ;
376
365
366
+ // First, if indexing is already in process, don't attempt
367
+ // This queue is necessary because indexAndAdd is invoked circularly by config edits
368
+ // TODO shouldn't really be a gap between adding and checking in queue but probably fine
369
+ if ( this . docsIndexingQueue . has ( startUrl ) ) {
370
+ return ;
371
+ }
372
+
377
373
const { isPreindexed, provider } =
378
374
await this . getEmbeddingsProvider ( startUrl ) ;
379
375
if ( isPreindexed ) {
380
376
console . warn ( "Attempted to indexAndAdd pre-indexed doc" ) ;
381
377
return ;
382
378
}
383
-
384
- // Queue - indexAndAdd is invoked circularly by config edits. This prevents duplicate runs
385
- if ( this . docsIndexingQueue . has ( startUrl ) ) {
386
- return ;
387
- }
388
-
389
379
const startedWithEmbedder = provider . embeddingId ;
380
+
381
+ // Check if doc has been successfully indexed with the given embedder
382
+ // Note at this point we know it's not a pre-indexed doc
390
383
const indexExists = await this . hasMetadata ( startUrl ) ;
391
384
392
385
// Build status update - most of it is fixed values
@@ -404,22 +397,6 @@ export default class DocsService {
404
397
url : siteIndexingConfig . startUrl ,
405
398
} ;
406
399
407
- // Clear current indexes if reIndexing
408
- if ( indexExists ) {
409
- if ( forceReindex ) {
410
- await this . deleteIndexes ( startUrl ) ;
411
- } else {
412
- this . handleStatusUpdate ( {
413
- ...fixedStatus ,
414
- progress : 1 ,
415
- description : "Complete" ,
416
- status : "complete" ,
417
- debugInfo : "Already indexed" ,
418
- } ) ;
419
- return ;
420
- }
421
- }
422
-
423
400
// If not force-reindexing and has failed with same config, don't reattempt
424
401
if ( ! forceReindex ) {
425
402
const globalContext = new GlobalContext ( ) ;
@@ -441,6 +418,31 @@ export default class DocsService {
441
418
}
442
419
}
443
420
421
+ if ( indexExists && ! forceReindex ) {
422
+ this . handleStatusUpdate ( {
423
+ ...fixedStatus ,
424
+ progress : 1 ,
425
+ description : "Complete" ,
426
+ status : "complete" ,
427
+ debugInfo : "Already indexed" ,
428
+ } ) ;
429
+ return ;
430
+ }
431
+
432
+ // Do a test run on the embedder
433
+ // This particular failure will not mark as a failed config in global context
434
+ // Since SiteIndexingConfig is likely to be valid
435
+ try {
436
+ await provider . embed ( [ "continue-test-run" ] ) ;
437
+ } catch ( e ) {
438
+ console . error ( "Failed to test embeddings connection" , e ) ;
439
+ void this . ide . showToast (
440
+ "error" ,
441
+ "Failed to test embeddings connection. check your embeddings model configuration" ,
442
+ ) ;
443
+ return ;
444
+ }
445
+
444
446
const markFailedInGlobalContext = ( ) => {
445
447
const globalContext = new GlobalContext ( ) ;
446
448
const failedDocs = globalContext . get ( "failedDocs" ) ?? [ ] ;
@@ -463,6 +465,11 @@ export default class DocsService {
463
465
try {
464
466
this . docsIndexingQueue . add ( startUrl ) ;
465
467
468
+ // Clear current indexes if reIndexing
469
+ if ( indexExists && forceReindex ) {
470
+ await this . deleteIndexes ( startUrl ) ;
471
+ }
472
+
466
473
this . addToConfig ( siteIndexingConfig ) ;
467
474
468
475
this . handleStatusUpdate ( {
@@ -564,9 +571,10 @@ export default class DocsService {
564
571
} ) ;
565
572
566
573
try {
567
- const subpathEmbeddings = await provider . embed (
568
- article . chunks . map ( ( c ) => c . content ) ,
569
- ) ;
574
+ const subpathEmbeddings =
575
+ article . chunks . length > 0
576
+ ? await provider . embed ( article . chunks . map ( ( c ) => c . content ) )
577
+ : [ ] ;
570
578
chunks . push ( ...article . chunks ) ;
571
579
embeddings . push ( ...subpathEmbeddings ) ;
572
580
const toWait = 100 * this . docsIndexingQueue . size + 50 ;
@@ -592,7 +600,6 @@ export default class DocsService {
592
600
} ) ;
593
601
594
602
void this . ide . showToast ( "info" , `Failed to index ${ startUrl } ` ) ;
595
- this . docsIndexingQueue . delete ( startUrl ) ;
596
603
markFailedInGlobalContext ( ) ;
597
604
return ;
598
605
}
@@ -635,11 +642,6 @@ export default class DocsService {
635
642
favicon,
636
643
} ) ;
637
644
638
- this . docsIndexingQueue . delete ( startUrl ) ;
639
-
640
- if ( this . shouldCancel ( startUrl , startedWithEmbedder ) ) {
641
- return ;
642
- }
643
645
this . handleStatusUpdate ( {
644
646
...fixedStatus ,
645
647
description : "Complete" ,
@@ -655,7 +657,11 @@ export default class DocsService {
655
657
656
658
removeFromFailedGlobalContext ( ) ;
657
659
} catch ( e ) {
658
- let description = `Error getting docs from: ${ siteIndexingConfig . startUrl } ` ;
660
+ console . error (
661
+ `Error indexing docs at: ${ siteIndexingConfig . startUrl } ` ,
662
+ e ,
663
+ ) ;
664
+ let description = `Error indexing docs at: ${ siteIndexingConfig . startUrl } ` ;
659
665
if ( e instanceof Error ) {
660
666
if (
661
667
e . message . includes ( "github.com" ) &&
@@ -664,7 +670,6 @@ export default class DocsService {
664
670
description = "Github rate limit exceeded" ; // This text is used verbatim elsewhere
665
671
}
666
672
}
667
- console . error ( "Error indexing docs" , e ) ;
668
673
this . handleStatusUpdate ( {
669
674
...fixedStatus ,
670
675
description,
@@ -916,7 +921,7 @@ export default class DocsService {
916
921
// Anything found in old config, new config, AND sqlite that doesn't match should be reindexed
917
922
// TODO if only favicon and title change, only update, don't embed
918
923
// Otherwise anything found in new config that isn't in sqlite should be added/indexed
919
- const newDocs : SiteIndexingConfig [ ] = [ ] ;
924
+ const addedDocs : SiteIndexingConfig [ ] = [ ] ;
920
925
const changedDocs : SiteIndexingConfig [ ] = [ ] ;
921
926
for ( const doc of newConfigDocs ) {
922
927
const currentIndexedDoc = currentStartUrls . includes ( doc . startUrl ) ;
@@ -953,14 +958,14 @@ export default class DocsService {
953
958
}
954
959
}
955
960
} else {
956
- newDocs . push ( doc ) ;
961
+ addedDocs . push ( doc ) ;
957
962
void Telemetry . capture ( "add_docs_config" , { url : doc . startUrl } ) ;
958
963
}
959
964
}
960
965
961
966
await Promise . allSettled ( [
962
967
...changedDocs . map ( ( doc ) => this . indexAndAdd ( doc , true ) ) ,
963
- ...newDocs . map ( ( doc ) => this . indexAndAdd ( doc ) ) ,
968
+ ...addedDocs . map ( ( doc ) => this . indexAndAdd ( doc ) ) ,
964
969
] ) ;
965
970
966
971
for ( const doc of deletedDocs ) {
0 commit comments