|
| 1 | +import { Chunk } from "../../.."; |
| 2 | +import { RETRIEVAL_PARAMS } from "../../../util/parameters"; |
| 3 | +import { deduplicateChunks } from "../util"; |
| 4 | +import BaseRetrievalPipeline from "./BaseRetrievalPipeline"; |
| 5 | + |
| 6 | +export default class RerankerRetrievalPipeline extends BaseRetrievalPipeline { |
| 7 | + private async _retrieveInitial(): Promise<Chunk[]> { |
| 8 | + const { input, nRetrieve } = this.options; |
| 9 | + |
| 10 | + // Get all retrieval results |
| 11 | + const retrievalResults: Chunk[] = []; |
| 12 | + |
| 13 | + // Full-text search |
| 14 | + const ftsResults = await this.retrieveFts(input, nRetrieve / 2); |
| 15 | + retrievalResults.push(...ftsResults); |
| 16 | + |
| 17 | + // Embeddings |
| 18 | + const embeddingResults = await this.retrieveEmbeddings(input, nRetrieve); |
| 19 | + retrievalResults.push( |
| 20 | + ...embeddingResults.slice(0, nRetrieve - ftsResults.length), |
| 21 | + ); |
| 22 | + |
| 23 | + const results: Chunk[] = deduplicateChunks(retrievalResults); |
| 24 | + return results; |
| 25 | + } |
| 26 | + |
| 27 | + private async _rerank(input: string, chunks: Chunk[]): Promise<Chunk[]> { |
| 28 | + if (!this.options.reranker) { |
| 29 | + throw new Error("No reranker provided"); |
| 30 | + } |
| 31 | + |
| 32 | + let scores: number[] = await this.options.reranker.rerank(input, chunks); |
| 33 | + |
| 34 | + // Filter out low-scoring results |
| 35 | + let results = chunks; |
| 36 | + // let results = chunks.filter( |
| 37 | + // (_, i) => scores[i] >= RETRIEVAL_PARAMS.rerankThreshold, |
| 38 | + // ); |
| 39 | + // scores = scores.filter( |
| 40 | + // (score) => score >= RETRIEVAL_PARAMS.rerankThreshold, |
| 41 | + // ); |
| 42 | + |
| 43 | + results.sort( |
| 44 | + (a, b) => scores[results.indexOf(a)] - scores[results.indexOf(b)], |
| 45 | + ); |
| 46 | + results = results.slice(-this.options.nFinal); |
| 47 | + return results; |
| 48 | + } |
| 49 | + |
| 50 | + private async _expandWithEmbeddings(chunks: Chunk[]): Promise<Chunk[]> { |
| 51 | + const topResults = chunks.slice( |
| 52 | + -RETRIEVAL_PARAMS.nResultsToExpandWithEmbeddings, |
| 53 | + ); |
| 54 | + |
| 55 | + const expanded = await Promise.all( |
| 56 | + topResults.map(async (chunk, i) => { |
| 57 | + const results = await this.retrieveEmbeddings( |
| 58 | + chunk.content, |
| 59 | + RETRIEVAL_PARAMS.nEmbeddingsExpandTo, |
| 60 | + ); |
| 61 | + return results; |
| 62 | + }), |
| 63 | + ); |
| 64 | + return expanded.flat(); |
| 65 | + } |
| 66 | + |
| 67 | + private async _expandRankedResults(chunks: Chunk[]): Promise<Chunk[]> { |
| 68 | + let results: Chunk[] = []; |
| 69 | + |
| 70 | + const embeddingsResults = await this._expandWithEmbeddings(chunks); |
| 71 | + results.push(...embeddingsResults); |
| 72 | + |
| 73 | + return results; |
| 74 | + } |
| 75 | + |
| 76 | + async run(): Promise<Chunk[]> { |
| 77 | + // Retrieve initial results |
| 78 | + let results = await this._retrieveInitial(); |
| 79 | + |
| 80 | + // Rerank |
| 81 | + const { input } = this.options; |
| 82 | + results = await this._rerank(input, results); |
| 83 | + |
| 84 | + // // // Expand top reranked results |
| 85 | + // const expanded = await this._expandRankedResults(results); |
| 86 | + // results.push(...expanded); |
| 87 | + |
| 88 | + // // De-duplicate |
| 89 | + // results = deduplicateChunks(results); |
| 90 | + |
| 91 | + // // Rerank again |
| 92 | + // results = await this._rerank(input, results); |
| 93 | + |
| 94 | + // TODO: stitch together results |
| 95 | + |
| 96 | + return results; |
| 97 | + } |
| 98 | +} |
| 99 | + |
| 100 | +// Source: expansion with code graph |
| 101 | +// consider doing this after reranking? Or just having a lower reranking threshold |
| 102 | +// This is VS Code only until we use PSI for JetBrains or build our own general solution |
| 103 | +// TODO: Need to pass in the expandSnippet function as a function argument |
| 104 | +// because this import causes `tsc` to fail |
| 105 | +// if ((await extras.ide.getIdeInfo()).ideType === "vscode") { |
| 106 | +// const { expandSnippet } = await import( |
| 107 | +// "../../../extensions/vscode/src/util/expandSnippet" |
| 108 | +// ); |
| 109 | +// let expansionResults = ( |
| 110 | +// await Promise.all( |
| 111 | +// extras.selectedCode.map(async (rif) => { |
| 112 | +// return expandSnippet( |
| 113 | +// rif.filepath, |
| 114 | +// rif.range.start.line, |
| 115 | +// rif.range.end.line, |
| 116 | +// extras.ide, |
| 117 | +// ); |
| 118 | +// }), |
| 119 | +// ) |
| 120 | +// ).flat() as Chunk[]; |
| 121 | +// retrievalResults.push(...expansionResults); |
| 122 | +// } |
| 123 | + |
| 124 | +// Source: Open file exact match |
| 125 | +// Source: Class/function name exact match |
0 commit comments