|
| 1 | +import { BaseRetriever, BaseRetrieverInput } from "@langchain/core/retrievers"; |
| 2 | +import { Document, DocumentInterface } from "@langchain/core/documents"; |
| 3 | +import { CallbackManagerForRetrieverRun } from "@langchain/core/callbacks/manager"; |
| 4 | + |
| 5 | +export interface EnsembleRetrieverInput extends BaseRetrieverInput { |
| 6 | + /** A list of retrievers to ensemble. */ |
| 7 | + retrievers: BaseRetriever[]; |
| 8 | + /** |
| 9 | + * A list of weights corresponding to the retrievers. Defaults to equal |
| 10 | + * weighting for all retrievers. |
| 11 | + */ |
| 12 | + weights?: number[]; |
| 13 | + /** |
| 14 | + * A constant added to the rank, controlling the balance between the importance |
| 15 | + * of high-ranked items and the consideration given to lower-ranked items. |
| 16 | + * Default is 60. |
| 17 | + */ |
| 18 | + c?: number; |
| 19 | +} |
| 20 | + |
| 21 | +/** |
| 22 | + * Ensemble retriever that aggregates and orders the results of |
| 23 | + * multiple retrievers by using weighted Reciprocal Rank Fusion. |
| 24 | + */ |
| 25 | +export class EnsembleRetriever extends BaseRetriever { |
| 26 | + static lc_name() { |
| 27 | + return "EnsembleRetriever"; |
| 28 | + } |
| 29 | + |
| 30 | + lc_namespace = ["langchain", "retrievers", "ensemble_retriever"]; |
| 31 | + |
| 32 | + retrievers: BaseRetriever[]; |
| 33 | + |
| 34 | + weights: number[]; |
| 35 | + |
| 36 | + c = 60; |
| 37 | + |
| 38 | + constructor(args: EnsembleRetrieverInput) { |
| 39 | + super(args); |
| 40 | + this.retrievers = args.retrievers; |
| 41 | + this.weights = |
| 42 | + args.weights || |
| 43 | + new Array(args.retrievers.length).fill(1 / args.retrievers.length); |
| 44 | + this.c = args.c || 60; |
| 45 | + } |
| 46 | + |
| 47 | + async _getRelevantDocuments( |
| 48 | + query: string, |
| 49 | + runManager?: CallbackManagerForRetrieverRun |
| 50 | + ) { |
| 51 | + return this._rankFusion(query, runManager); |
| 52 | + } |
| 53 | + |
| 54 | + async _rankFusion( |
| 55 | + query: string, |
| 56 | + runManager?: CallbackManagerForRetrieverRun |
| 57 | + ) { |
| 58 | + const retrieverDocs = await Promise.all( |
| 59 | + this.retrievers.map((retriever, i) => |
| 60 | + retriever.invoke(query, { |
| 61 | + callbacks: runManager?.getChild(`retriever_${i + 1}`), |
| 62 | + }) |
| 63 | + ) |
| 64 | + ); |
| 65 | + |
| 66 | + const fusedDocs = await this._weightedReciprocalRank(retrieverDocs); |
| 67 | + return fusedDocs; |
| 68 | + } |
| 69 | + |
| 70 | + async _weightedReciprocalRank(docList: DocumentInterface[][]) { |
| 71 | + if (docList.length !== this.weights.length) { |
| 72 | + throw new Error( |
| 73 | + "Number of retrieved document lists must be equal to the number of weights." |
| 74 | + ); |
| 75 | + } |
| 76 | + |
| 77 | + const rrfScoreDict = docList.reduce( |
| 78 | + (rffScore: Record<string, number>, retrieverDoc, idx) => { |
| 79 | + let rank = 1; |
| 80 | + const weight = this.weights[idx]; |
| 81 | + while (rank <= retrieverDoc.length) { |
| 82 | + const { pageContent } = retrieverDoc[rank - 1]; |
| 83 | + if (!rffScore[pageContent]) { |
| 84 | + // eslint-disable-next-line no-param-reassign |
| 85 | + rffScore[pageContent] = 0; |
| 86 | + } |
| 87 | + // eslint-disable-next-line no-param-reassign |
| 88 | + rffScore[pageContent] += weight / (rank + this.c); |
| 89 | + rank += 1; |
| 90 | + } |
| 91 | + |
| 92 | + return rffScore; |
| 93 | + }, |
| 94 | + {} |
| 95 | + ); |
| 96 | + |
| 97 | + const uniqueDocs = this._uniqueUnion(docList.flat()); |
| 98 | + const sortedDocs = Array.from(uniqueDocs).sort( |
| 99 | + (a, b) => rrfScoreDict[b.pageContent] - rrfScoreDict[a.pageContent] |
| 100 | + ); |
| 101 | + |
| 102 | + return sortedDocs; |
| 103 | + } |
| 104 | + |
| 105 | + private _uniqueUnion(documents: Document[]): Document[] { |
| 106 | + const documentSet = new Set(); |
| 107 | + const result = []; |
| 108 | + |
| 109 | + for (const doc of documents) { |
| 110 | + const key = doc.pageContent; |
| 111 | + if (!documentSet.has(key)) { |
| 112 | + documentSet.add(key); |
| 113 | + result.push(doc); |
| 114 | + } |
| 115 | + } |
| 116 | + |
| 117 | + return result; |
| 118 | + } |
| 119 | +} |
0 commit comments