Skip to content

Commit bf2dd62

Browse files
committed
使用数据层
1 parent 3da55b6 commit bf2dd62

File tree

4 files changed

+102
-73
lines changed

4 files changed

+102
-73
lines changed

packages/core/src/DBlayer.ts

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
import { z } from "zod";
2+
import { PluginWithConfig } from "./Plugin";
23

34
const pluginSchema = z.object({
45
name: z.string(),
5-
pluginType: z.enum(["DocLoader", "DocSplitter"]),
66
config: z.any()
77
})
88

@@ -13,7 +13,6 @@ const baseSchema = z.object({
1313

1414
const configSchema = z.object({
1515
meiliSearchConfig: z.object({}),
16-
fileOpThrottleMs: z.number(),
1716
})
1817

1918
export interface DBLayer {
@@ -29,7 +28,7 @@ export interface DBLayer {
2928
// 删除
3029
delete: (name: string) => Promise<void>
3130
// 获取
32-
get: () => Promise<z.infer<typeof pluginSchema>[]>;
31+
get: () => Promise<PluginWithConfig[]>;
3332
},
3433
// 知识库表
3534
base: {

packages/core/src/DocBase.ts

Lines changed: 93 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,20 @@
11
import { omit, throttle } from "es-toolkit";
22
import defaultDocLoaderPlugin, {
3+
DocLoaderInput,
34
type DocLoader,
45
type DocLoaderPlugin,
56
} from "./DocLoader";
67
import { DocManager } from "./DocManager";
78
import defaultDocSplitterPlugin, {
89
type DocSplitterPlugin,
910
} from "./DocSplitter";
10-
import type { PluginWithConfig } from "./Plugin";
11+
import type { Content, PluginWithConfig } from "./Plugin";
1112
import { createMeilisearchClient, getExtFromPath } from "./Utils";
12-
import { type Config as MeiliSearchConfig, type SearchParams } from "meilisearch";
13+
import { MeiliSearch, type SearchParams } from "meilisearch";
1314
import { basename } from "path";
1415
import { FSLayer, Scanner, Watcher } from "./FSLayer";
1516
import { AnyZodObject } from "zod";
17+
import { DBLayer } from "./DBLayer";
1618

1719
export interface DifyKnowledgeRequest {
1820
knowledge_id: string;
@@ -34,15 +36,16 @@ export interface DifyKnowledgeResponseRecord {
3436
* DocBase 初始化配置
3537
*/
3638
export interface DocBaseOptions {
39+
db: DBLayer
3740
/**
3841
* MeiliSearch 配置
3942
*/
40-
meiliSearchConfig: MeiliSearchConfig;
43+
// meiliSearchConfig: MeiliSearchConfig;
4144
/**
4245
* 初始化知识库目录
4346
* @default []
4447
*/
45-
initPaths?: string[];
48+
// initPaths?: string[];
4649
/**
4750
* 初始化插件列表
4851
* @default
@@ -51,25 +54,27 @@ export interface DocBaseOptions {
5154
* { plugin: defaultDocSplitterPlugin, params: { len: 1000 } }
5255
* ]
5356
*/
54-
initPlugins?: PluginWithConfig<any>[];
57+
// initPlugins?: PluginWithConfig<any>[];
5558
/**
5659
* 是否在初始化时扫描初始化知识库目录
5760
* @default false
5861
*/
59-
initscan?: boolean;
62+
// initscan?: boolean;
6063
/**
6164
* 索引前缀
6265
*/
63-
indexPrefix?: string;
66+
// indexPrefix?: string;
6467
/**
6568
* 文件变动时间节流时段(毫秒),在该时段内每个文件最多执行一次嵌入更新操作
6669
*/
67-
fileOpThrottleMs?: number;
70+
// fileOpThrottleMs?: number;
6871
}
6972

7073
export class DocBase {
74+
#meiliSearch: MeiliSearch;
75+
7176
/** 文档管理器 */
72-
#docManager!: DocManager;
77+
#docManagers: Map<string, DocManager> = new Map();
7378

7479
/** 文档加载指向器,映射文件扩展名到文档加载器名称 */
7580
#docExtToLoaderName: Map<string, string> = new Map();
@@ -87,41 +92,44 @@ export class DocBase {
8792
#docWatcher!: Watcher;
8893

8994
/** 任务缓存器 */
90-
#watcherTaskCache = new Map<string, "remove" | "upsert">();
91-
// 节流器默认 500 毫秒
92-
fileOpThrottleMs: number = 500;
95+
#watcherTaskCache = new Map<string, {
96+
docManagerId: string
97+
type: "remove" | "upsert"
98+
}>();
9399

94100
// 执行任务缓存器中的任务, 每 watcherTaskThrottleMs 毫秒最多执行一次
95101
#doWatcherTask = throttle(
96102
async () => {
97103
console.debug("Starting to execute watcher tasks...");
98104
const results = await Promise.allSettled(
99-
this.#watcherTaskCache.entries().map(async ([path, type]) => {
105+
this.#watcherTaskCache.entries().map(async ([path, { docManagerId, type }]) => {
106+
const docManager = this.#docManagers.get(docManagerId);
100107
if (type === "upsert") {
101108
console.debug(`Upserting document: ${path}`);
102-
await this.#docManager.upsertDoc(path);
109+
await docManager.upsertDoc(path);
103110
console.debug(`Document upserted: ${path}`);
104111
} else if (type === "remove") {
105112
console.debug(`Deleting document: ${path}`);
106-
await this.#docManager.deleteDocByPath(path);
113+
await docManager.deleteDocByPath(path);
107114
console.debug(`Document deleted: ${path}`);
108115
}
109116
})
110117
);
111118
console.debug("Watcher tasks execution completed.");
112119
return results;
113120
},
114-
this.fileOpThrottleMs,
121+
// 最小执行间隔为半秒
122+
500,
115123
{ edges: ["trailing"] }
116124
);
117125

118126
/** 获取挂载的知识库目录 */
119-
get dirs() {
120-
console.info("Fetching watched directories...");
121-
const watchedPaths = this.#docWatcher.getWatchedPaths();
122-
console.info("Watched directories fetched successfully.");
123-
return watchedPaths;
124-
}
127+
// get dirs() {
128+
// console.info("Fetching watched directories...");
129+
// const watchedPaths = this.#docWatcher.getWatchedPaths();
130+
// console.info("Watched directories fetched successfully.");
131+
// return watchedPaths;
132+
// }
125133

126134
/** 获取支持的文档类型 */
127135
get exts() {
@@ -176,7 +184,8 @@ export class DocBase {
176184
* 扫描指定目录中的文档
177185
* @param dirs - 要扫描的目录数组
178186
*/
179-
#scan = async (dirs: string[]) => {
187+
#scan = async (id: string, dirs: string[]) => {
188+
const docManager = this.#docManagers.get(id)
180189
await this.#docScanner({
181190
dirs,
182191
exts: Array.from(this.#docExtToLoaderName.keys()),
@@ -185,7 +194,7 @@ export class DocBase {
185194
await Promise.all(
186195
paths.map(async (path) => {
187196
console.info(`Upserting document during scan: ${path}`);
188-
await this.#docManager.upsertDoc(path);
197+
await docManager.upsertDoc(path);
189198
console.info(`Document upserted during scan: ${path}`);
190199
})
191200
);
@@ -197,34 +206,42 @@ export class DocBase {
197206

198207
/** 启动 docbase */
199208
start = async ({
200-
meiliSearchConfig,
209+
// meiliSearchConfig,
201210
// TODO 为区分多知识库的参数
202-
indexPrefix,
211+
// indexPrefix,
203212
// TODO 改成后期增删查改
204-
initPaths = [],
205-
initPlugins = [
213+
// initPaths = [],
214+
215+
db,
216+
// fileOpThrottleMs,
217+
}: DocBaseOptions) => {
218+
console.info("Starting DocBase...");
219+
// TODO 初始化配置?
220+
const { meiliSearchConfig } = await db.config.get();
221+
// TODO
222+
// @ts-ignore
223+
this.#meiliSearch = await createMeilisearchClient(meiliSearchConfig)
224+
225+
// 初始化插件
226+
console.info("Loading all plugins...");
227+
const plugins = await db.plugin.get()
228+
const initPlugins: PluginWithConfig[] = [
206229
{
207230
plugin: defaultDocLoaderPlugin,
208-
params: {},
231+
config: {},
209232
},
210233
{
211234
plugin: defaultDocSplitterPlugin,
212-
params: {
235+
config: {
213236
len: 1000,
214237
},
215238
},
216-
],
217-
initscan = false,
218-
fileOpThrottleMs,
219-
}: DocBaseOptions) => {
220-
console.info("Starting DocBase...");
221-
this.fileOpThrottleMs = fileOpThrottleMs;
222-
console.info("Loading all plugins...");
239+
...plugins
240+
]
223241
// 加载所有插件
224242
await Promise.all(
225243
initPlugins.map((initPlugin) => this.loadPlugin(initPlugin))
226244
);
227-
console.info("All plugins loaded successfully.");
228245

229246
const docSplitterExist = typeof this.#docSplitter.func === "function";
230247
const docLoadersExist = this.#docLoaders.size > 0;
@@ -235,16 +252,7 @@ export class DocBase {
235252
console.error("Loaded components: \n" + msg);
236253
throw new Error("Loaded components: \n" + msg);
237254
}
238-
239-
// 初始化文档管理器
240-
console.info("Initializing document manager...");
241-
this.#docManager = new DocManager({
242-
indexPrefix,
243-
meiliSearch: await createMeilisearchClient(meiliSearchConfig),
244-
docLoader: (path) => this.#hyperDocLoader(path),
245-
docSplitter: (text) => this.#docSplitter.func(text),
246-
});
247-
await this.#docManager.init();
255+
console.info("All plugins loaded successfully.");
248256

249257
// 初始化监视器扫描器
250258
console.info("Initializing watcher and scanner...");
@@ -266,24 +274,43 @@ export class DocBase {
266274
this.#docScanner = scanner;
267275
console.info("Watcher and scanner initialized successfully.");
268276

277+
// 初始化文档管理器
278+
console.info("Initializing DocManager...");
279+
const docLoader = (input: DocLoaderInput) => this.#hyperDocLoader(input)
280+
const docSplitter = (content: AsyncIterable<Content>) => this.#docSplitter.func(content)
281+
const base = await db.base.get()
282+
283+
base.map(async ({ path, id }) => {
284+
const docm = new DocManager({
285+
indexPrefix: id,
286+
meiliSearch: this.#meiliSearch,
287+
docLoader,
288+
docSplitter,
289+
});
290+
await docm.init()
291+
// 扫描目录
292+
await this.#scan([path]);
293+
// 监视目录
294+
this.#docWatcher.watch(path)
295+
return docm
296+
})
297+
console.info("DocManager initialized successfully.");
298+
269299
// 扫描加载默认目录下文档
270-
if (initscan) {
271-
console.info("Scanning initial directories...");
272-
await this.#scan(initPaths);
273-
console.info("Initial directories scanned successfully.");
274-
}
300+
// console.info("Scanning initial directories...");
301+
// console.info("Initial directories scanned successfully.");
275302

276303
// 开启监视,同步变动文档
277-
console.info("Starting to watch directories...");
278-
initPaths.map((initPath) => this.#docWatcher.watch(initPath));
279-
console.info("Directories are being watched.");
304+
// console.info("Starting to watch directories...");
305+
// initPaths.map((initPath) => this.#docWatcher.watch(initPath));
306+
// console.info("Directories are being watched.");
280307
console.info("DocBase started successfully.");
281308
};
282309

283310
/**
284311
* 立即扫描所有目录
285312
*/
286-
scanAllNow = async () => {
313+
scanAllNow = async (id: string) => {
287314
console.info("Starting to scan all directories immediately...");
288315
await this.#scan(this.dirs);
289316
console.info("All directories scanned immediately.");
@@ -361,9 +388,9 @@ export class DocBase {
361388
pluginWithConfig: PluginWithConfig<T>
362389
) => {
363390
console.info(`Loading ${pluginWithConfig.plugin.pluginType} plugin ${pluginWithConfig.plugin.name}`);
364-
const { plugin, params } = pluginWithConfig;
391+
const { plugin, config } = pluginWithConfig;
365392

366-
plugin.init && await plugin.init(params)
393+
plugin.init && await plugin.init(config)
367394

368395
switch (plugin.pluginType) {
369396
case "DocLoader":
@@ -432,8 +459,10 @@ export class DocBase {
432459
* @param opt - meilisearch 搜索选项
433460
* @returns 返回搜索结果
434461
*/
435-
search = async (query: string, opt?: SearchParams) => {
436-
console.info(`Searching for documents with query: ${query}`);
462+
search = async (params?: SearchParams & {
463+
knowledgeId: string;
464+
}) => {
465+
console.info(`Searching for documents with query: ${params.q}`);
437466
const results = await this.#docManager.search(query, opt);
438467
console.info(`Search completed. Found ${results.length} documents.`);
439468
return results;
@@ -442,22 +471,18 @@ export class DocBase {
442471
/**
443472
* 作为 Dify 外部知识库搜索
444473
* @param params - Dify 知识库搜索请求参数
445-
* @param params.query - 搜索查询字符串
446-
* @param params.retrieval_setting - 检索设置
447-
* @param params.retrieval_setting.top_k - 返回结果的最大数量
448-
* @param params.retrieval_setting.score_threshold - 相关性得分阈值
449474
* @returns 返回符合 Dify 格式的搜索结果数组
450475
*/
451476
difySearch = async (
452477
params: DifyKnowledgeRequest
453478
): Promise<DifyKnowledgeResponseRecord[]> => {
454479
console.info("Performing Dify search...");
455-
// 等待多知识库支持
456-
// params.knowledge_id;
457480
const q = params.query;
458481
const { top_k, score_threshold } = params.retrieval_setting;
459482

460-
const results = await this.search(q, {
483+
const results = await this.search({
484+
q,
485+
knowledgeId: params.knowledge_id,
461486
limit: top_k,
462487
rankingScoreThreshold: score_threshold,
463488
showRankingScore: true,

packages/core/src/DocLoader.ts

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,17 @@ import { IDataType } from "hash-wasm";
44
import { AsyncStream } from "itertools-ts";
55
import { AnyZodObject } from "zod";
66

7+
export interface DocLoaderInput {
8+
path: string;
9+
hash: (data: IDataType) => Promise<string>;
10+
}
11+
712
/**
813
* 文档加载器类型定义
914
* @param path - 文档路径
1015
* @returns 返回加载后的文档对象的迭代器,false 表示不符合条件的文件,跳过处理
1116
*/
12-
export type DocLoader = (input: { path: string, hash: (data: IDataType) => Promise<string> }) => Promise<{
17+
export type DocLoader = (input: DocLoaderInput) => Promise<{
1318
hash: string;
1419
content: AsyncIterable<Content>
1520
} | false>;

packages/core/src/Plugin.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,5 +68,5 @@ export interface PluginWithConfig<T extends AnyZodObject = AnyZodObject> {
6868
/** 插件实例 */
6969
plugin: DocBasePlugin<T>;
7070
/** 插件参数 */
71-
params: z.infer<T>;
71+
config: z.infer<T>;
7272
}

0 commit comments

Comments
 (0)