From d701c014021573f67b6aa0493b288ebc8100ce27 Mon Sep 17 00:00:00 2001 From: yangjundong <1047934838@qq.com> Date: Tue, 17 Dec 2024 09:02:10 +0000 Subject: [PATCH 1/4] [feat][store] Impl document scan filter --- src/document/document_index.cc | 30 +++-- src/document/document_index.h | 3 +- src/document/document_reader.cc | 39 ++++++ src/document/document_reader.h | 25 ++++ src/engine/engine.h | 7 + src/engine/mono_store_engine.cc | 7 + src/engine/mono_store_engine.h | 2 + src/engine/raft_store_engine.cc | 7 + src/engine/raft_store_engine.h | 2 + src/engine/storage.cc | 36 +++++ src/engine/storage.h | 2 + src/engine/txn_engine_helper.cc | 1 - src/handler/raft_apply_handler.cc | 3 - src/server/document_service.cc | 123 +++++++++++++++++- src/server/document_service.h | 5 + .../unit_test/document/test_document_index.cc | 36 ++--- .../unit_test/document/test_tantivy_search.cc | 106 +++++++++------ 17 files changed, 351 insertions(+), 83 deletions(-) diff --git a/src/document/document_index.cc b/src/document/document_index.cc index d4e902851..ba92de322 100644 --- a/src/document/document_index.cc +++ b/src/document/document_index.cc @@ -364,7 +364,7 @@ butil::Status DocumentIndex::Delete(const std::vector& delete_ids) { } butil::Status DocumentIndex::Search(uint32_t topk, const std::string& query_string, bool use_range_filter, - int64_t start_id, int64_t end_id, bool use_id_filter, + int64_t start_id, int64_t end_id, bool use_id_filter, bool query_unlimited, const std::vector& alive_ids, const std::vector& column_names, std::vector& results) { @@ -376,7 +376,7 @@ butil::Status DocumentIndex::Search(uint32_t topk, const std::string& query_stri return butil::Status(pb::error::EILLEGAL_PARAMTETERS, err_msg); } - if (topk == 0) { + if (!query_unlimited && topk == 0) { return butil::Status(pb::error::EILLEGAL_PARAMTETERS, "topk must be greater than 0"); } @@ -393,8 +393,9 @@ butil::Status DocumentIndex::Search(uint32_t topk, const std::string& query_stri // } // } - auto search_result = ffi_bm25_search_with_column_names(index_path_, query_string, topk, alive_ids, use_id_filter, - use_range_filter, start_id, end_id, column_names); + auto search_result = + ffi_bm25_search_with_column_names(index_path_, query_string, topk, alive_ids, use_id_filter, use_range_filter, + start_id, end_id, column_names, query_unlimited); if (search_result.error_code == 0) { for (const auto& row_id_with_score : search_result.result) { @@ -1096,15 +1097,16 @@ butil::Status DocumentIndexWrapper::Search(const pb::common::Range& region_range if (sibling_document_index != nullptr) { DINGO_LOG(INFO) << fmt::format("[document_index.wrapper][id({})] search document in sibling document index.", Id()); std::vector results_1; - auto status = sibling_document_index->Search(parameter.top_n(), parameter.query_string(), false, 0, INT64_MAX, - use_id_filter, alive_ids, column_names, results_1); + auto status = + sibling_document_index->Search(parameter.top_n(), parameter.query_string(), false, 0, INT64_MAX, use_id_filter, + parameter.query_unlimited(), alive_ids, column_names, results_1); if (!status.ok()) { return status; } std::vector results_2; status = document_index->Search(parameter.top_n(), parameter.query_string(), false, 0, INT64_MAX, use_id_filter, - alive_ids, column_names, results_2); + parameter.query_unlimited(), alive_ids, column_names, results_2); if (!status.ok()) { return status; } @@ -1120,21 +1122,23 @@ butil::Status DocumentIndexWrapper::Search(const pb::common::Range& region_range DINGO_LOG(INFO) << fmt::format( "[document_index.wrapper][id({})] search document in document index with range_filter, range({}) " - "query_string({}) top_n({}) min_document_id({}) max_document_id({})", + "query_string({}) top_n({}, query_unlimited({})) min_document_id({}) max_document_id({})", Id(), DocumentCodec::DebugRange(false, region_range), parameter.query_string(), parameter.top_n(), - min_document_id, max_document_id); + parameter.query_unlimited(), min_document_id, max_document_id); // use range filter return document_index->Search(parameter.top_n(), parameter.query_string(), true, min_document_id, max_document_id, - use_id_filter, alive_ids, column_names, results); + use_id_filter, parameter.query_unlimited(), alive_ids, column_names, results); } DINGO_LOG(INFO) << fmt::format( - "[document_index.wrapper][id({})] search document in document index, range({}) query_string({}) top_n({})", Id(), - DocumentCodec::DebugRange(false, region_range), parameter.query_string(), parameter.top_n()); + "[document_index.wrapper][id({})] search document in document index, range({}) query_string({}) top_n({}), " + "query_unlimited({})", + Id(), DocumentCodec::DebugRange(false, region_range), parameter.query_string(), parameter.top_n(), + parameter.query_unlimited()); return document_index->Search(parameter.top_n(), parameter.query_string(), false, 0, INT64_MAX, use_id_filter, - alive_ids, column_names, results); + parameter.query_unlimited(), alive_ids, column_names, results); } // For document index, all node need to hold the index, so this function always return true. diff --git a/src/document/document_index.h b/src/document/document_index.h index 504d13bc9..8fd97a09f 100644 --- a/src/document/document_index.h +++ b/src/document/document_index.h @@ -67,7 +67,8 @@ class DocumentIndex { butil::Status Load(const std::string& path); butil::Status Search(uint32_t topk, const std::string& query_string, bool use_range_filter, int64_t start_id, - int64_t end_id, bool use_id_filter, const std::vector& alive_ids, + int64_t end_id, bool use_id_filter, bool query_unlimited, + const std::vector& alive_ids, const std::vector& column_names, std::vector& results); diff --git a/src/document/document_reader.cc b/src/document/document_reader.cc index c5d950857..3aa3ef687 100644 --- a/src/document/document_reader.cc +++ b/src/document/document_reader.cc @@ -32,6 +32,8 @@ namespace dingodb { +DECLARE_int64(stream_message_max_limit_size); + butil::Status DocumentReader::QueryDocumentWithId(int64_t ts, const pb::common::Range& region_range, int64_t partition_id, int64_t document_id, bool with_scalar_data, bool with_table_data, std::vector& selected_scalar_keys, @@ -129,6 +131,28 @@ butil::Status DocumentReader::DocumentSearch(std::shared_ptr ctx, bool& has_more, + std::vector& results) { + auto status = butil::Status(); + auto stream = ctx->stream; + auto stream_state = + std::dynamic_pointer_cast(stream->GetOrNewStreamState([&]() -> StreamStatePtr { + std::vector all_results; + status = SearchDocument(ctx->ts, ctx->partition_id, ctx->document_index, ctx->region_range, ctx->parameter, + all_results); + if (!status.ok()) { + return nullptr; + } + return DocumentSearchAllStreamState::New(std::move(all_results)); + })); + if (!status.ok()) { + DINGO_LOG(ERROR) << "Document search all failed: " << Helper::PrintStatus(status); + return status; + } + has_more = stream_state->Batch(stream->Limit(), results); + return status; +} + butil::Status DocumentReader::DocumentBatchQuery(std::shared_ptr ctx, std::vector& document_with_ids) { for (auto document_id : ctx->document_ids) { @@ -355,4 +379,19 @@ butil::Status DocumentReader::ScanDocumentId(std::shared_ptr& results) { + results.reserve(limit); + size_t count = 0; + size_t total_bytes = 0; + bool has_more; + while (current_ != end_ && count < limit && total_bytes < FLAGS_stream_message_max_limit_size) { + results.push_back(*current_); + total_bytes += current_->ByteSizeLong(); + ++current_; + ++count; + } + has_more = (current_ != end_); + return has_more; +} + } // namespace dingodb diff --git a/src/document/document_reader.h b/src/document/document_reader.h index cd6dda7d6..1db5484ed 100644 --- a/src/document/document_reader.h +++ b/src/document/document_reader.h @@ -22,6 +22,7 @@ #include #include "butil/status.h" +#include "common/stream.h" #include "engine/engine.h" #include "engine/raw_engine.h" #include "mvcc/reader.h" @@ -41,6 +42,8 @@ class DocumentReader { butil::Status DocumentSearch(std::shared_ptr ctx, std::vector& results); + butil::Status DocumentSearchAll(std::shared_ptr ctx, bool& has_more, + std::vector& results); butil::Status DocumentBatchQuery(std::shared_ptr ctx, std::vector& document_with_ids); @@ -72,6 +75,28 @@ class DocumentReader { mvcc::ReaderPtr reader_; }; +class DocumentSearchAllStreamState; +using DocumentSearchAllStreamStatePtr = std::shared_ptr; + +class DocumentSearchAllStreamState : public StreamState { + public: + DocumentSearchAllStreamState(std::vector& vec) { + results_ = vec; + current_ = results_.begin(); + end_ = results_.end(); + } + ~DocumentSearchAllStreamState() override = default; + bool Batch(int32_t limit, std::vector& results); + static DocumentSearchAllStreamStatePtr New(std::vector vec) { + return std::make_shared(vec); + } + + private: + std::vector::iterator current_; + std::vector::iterator end_; + std::vector results_; +}; + } // namespace dingodb #endif // DINGODB_DOCUMENT_READER_H_ \ No newline at end of file diff --git a/src/engine/engine.h b/src/engine/engine.h index f9aecbd02..6e27e9b5c 100644 --- a/src/engine/engine.h +++ b/src/engine/engine.h @@ -23,6 +23,7 @@ #include "butil/status.h" #include "common/context.h" +#include "common/stream.h" #include "config/config.h" #include "document/document_index.h" #include "engine/raw_engine.h" @@ -225,10 +226,16 @@ class Engine : public std::enable_shared_from_this { DocumentIndexWrapperPtr document_index; pb::common::ScalarSchema scalar_schema; + + StreamPtr Stream() { return stream; } + void SetStream(StreamPtr streamptr) { stream = streamptr; } + StreamPtr stream; }; virtual butil::Status DocumentSearch(std::shared_ptr ctx, std::vector& results) = 0; + virtual butil::Status DocumentSearchAll(std::shared_ptr ctx, bool& has_more, + std::vector& results) = 0; virtual butil::Status DocumentBatchQuery(std::shared_ptr ctx, std::vector& doc_with_ids) = 0; diff --git a/src/engine/mono_store_engine.cc b/src/engine/mono_store_engine.cc index ba5235e20..c22756fe5 100644 --- a/src/engine/mono_store_engine.cc +++ b/src/engine/mono_store_engine.cc @@ -304,6 +304,13 @@ butil::Status MonoStoreEngine::DocumentReader::DocumentSearch(std::shared_ptrDocumentSearch(ctx, results); } +butil::Status MonoStoreEngine::DocumentReader::DocumentSearchAll(std::shared_ptr ctx, + bool& has_more, + std::vector& results) { + auto vector_reader = dingodb::DocumentReader::New(reader_); + return vector_reader->DocumentSearchAll(ctx, has_more, results); +} + butil::Status MonoStoreEngine::DocumentReader::DocumentBatchQuery( std::shared_ptr ctx, std::vector& document_with_ids) { auto vector_reader = dingodb::DocumentReader::New(reader_); diff --git a/src/engine/mono_store_engine.h b/src/engine/mono_store_engine.h index d5a37a35f..14e9f53e9 100644 --- a/src/engine/mono_store_engine.h +++ b/src/engine/mono_store_engine.h @@ -155,6 +155,8 @@ class MonoStoreEngine : public Engine { butil::Status DocumentSearch(std::shared_ptr ctx, std::vector& results) override; + butil::Status DocumentSearchAll(std::shared_ptr ctx, bool& has_more, + std::vector& results) override; butil::Status DocumentBatchQuery(std::shared_ptr ctx, std::vector& document_with_ids) override; butil::Status DocumentGetBorderId(int64_t ts, const pb::common::Range& region_range, bool get_min, diff --git a/src/engine/raft_store_engine.cc b/src/engine/raft_store_engine.cc index 2e0745014..667ab9743 100644 --- a/src/engine/raft_store_engine.cc +++ b/src/engine/raft_store_engine.cc @@ -527,6 +527,13 @@ butil::Status RaftStoreEngine::DocumentReader::DocumentSearch(std::shared_ptrDocumentSearch(ctx, results); } +butil::Status RaftStoreEngine::DocumentReader::DocumentSearchAll(std::shared_ptr ctx, + bool& has_more, + std::vector& results) { + auto document_reader = dingodb::DocumentReader::New(reader_); + return document_reader->DocumentSearchAll(ctx, has_more, results); +} + butil::Status RaftStoreEngine::DocumentReader::DocumentBatchQuery( std::shared_ptr ctx, std::vector& document_with_ids) { auto document_reader = dingodb::DocumentReader::New(reader_); diff --git a/src/engine/raft_store_engine.h b/src/engine/raft_store_engine.h index 6adb555af..6f725c981 100644 --- a/src/engine/raft_store_engine.h +++ b/src/engine/raft_store_engine.h @@ -204,6 +204,8 @@ class RaftStoreEngine : public Engine, public RaftControlAble { butil::Status DocumentSearch(std::shared_ptr ctx, std::vector& results) override; + butil::Status DocumentSearchAll(std::shared_ptr ctx, bool& has_more, + std::vector& results) override; butil::Status DocumentBatchQuery(std::shared_ptr ctx, std::vector& document_with_ids) override; butil::Status DocumentGetBorderId(int64_t ts, const pb::common::Range& region_range, bool get_min, diff --git a/src/engine/storage.cc b/src/engine/storage.cc index e61adf160..0030ab047 100644 --- a/src/engine/storage.cc +++ b/src/engine/storage.cc @@ -1049,6 +1049,42 @@ butil::Status Storage::DocumentSearch(std::shared_ptr ctx, + const pb::stream::StreamRequestMeta& req_stream_meta, bool& has_more, + std::vector& results) { + auto status = ValidateLeader(ctx->region_id); + if (BAIDU_UNLIKELY(!status.ok())) { + return status; + } + // after validate leader + auto stream_meta = req_stream_meta; + auto stream = Server::GetInstance().GetStreamManager()->GetOrNew(stream_meta); + if (stream == nullptr) { + return butil::Status(pb::error::ESTREAM_EXPIRED, fmt::format("stream({}) is expired.", stream_meta.stream_id())); + } + ctx->SetStream(stream); + + DINGO_LOG(DEBUG) << "DocumentSearchAll region_id: " << ctx->region_id << ", stream limit: " << stream_meta.limit() + << ", has_more: " << has_more; + + auto document_reader = GetEngineDocumentReader(ctx->store_engine_type, ctx->raw_engine_type); + + status = document_reader->DocumentSearchAll(ctx, has_more, results); + if (BAIDU_UNLIKELY(!status.ok())) { + if (pb::error::EKEY_NOT_FOUND == status.error_code()) { + // return OK if not found + return butil::Status::OK(); + } + Server::GetInstance().GetStreamManager()->RemoveStream(stream); + return status; + } + if (!has_more || stream_meta.close()) { + Server::GetInstance().GetStreamManager()->RemoveStream(stream); + } + + return butil::Status(); +} + butil::Status Storage::DocumentGetBorderId(store::RegionPtr region, bool get_min, int64_t ts, int64_t& document_id) { auto status = ValidateLeader(region); if (BAIDU_UNLIKELY(!status.ok())) { diff --git a/src/engine/storage.h b/src/engine/storage.h index 5211fb6ed..0ab2c8393 100644 --- a/src/engine/storage.h +++ b/src/engine/storage.h @@ -188,6 +188,8 @@ class Storage { std::vector& document_with_ids); butil::Status DocumentSearch(std::shared_ptr ctx, std::vector& results); + butil::Status DocumentSearchAll(std::shared_ptr ctx, const pb::stream::StreamRequestMeta& req_stream_meta, bool& has_more, + std::vector& results); butil::Status DocumentGetBorderId(store::RegionPtr region, bool get_min, int64_t ts, int64_t& document_id); butil::Status DocumentScanQuery(std::shared_ptr ctx, std::vector& document_with_ids); diff --git a/src/engine/txn_engine_helper.cc b/src/engine/txn_engine_helper.cc index 2e93dddec..81465508c 100644 --- a/src/engine/txn_engine_helper.cc +++ b/src/engine/txn_engine_helper.cc @@ -3714,7 +3714,6 @@ butil::Status TxnEngineHelper::TxnCheckSecondaryLocks(RawEnginePtr raw_engine, s return butil::Status::OK(); } } - DINGO_LOG(INFO) << "yjddebug checksecondary commit_ts:" << response->commit_ts(); return butil::Status::OK(); } diff --git a/src/handler/raft_apply_handler.cc b/src/handler/raft_apply_handler.cc index 09c33f671..cef8b17d7 100644 --- a/src/handler/raft_apply_handler.cc +++ b/src/handler/raft_apply_handler.cc @@ -276,7 +276,6 @@ bool HandlePreCreateRegionSplit(const pb::raft::SplitRequest &request, store::Re // temporary disable split/merge/change_peer, avoid overlap change. store_region_meta->UpdateTemporaryDisableChange(from_region, true); store_region_meta->UpdateTemporaryDisableChange(to_region, true); - pb::common::Range to_range; // child range to_range.set_start_key(old_from_range.start_key()); @@ -510,7 +509,6 @@ bool HandlePostCreateRegionSplit(const pb::raft::SplitRequest &request, store::R // temporary disable split, avoid overlap change. store_region_meta->UpdateTemporaryDisableChange(parent_region, true); - // Set child region definition pb::common::RegionDefinition definition = parent_region->Definition(); definition.set_id(child_region_id); @@ -854,7 +852,6 @@ int CommitMergeHandler::Handle(std::shared_ptr, store::RegionPtr target store_region_meta->UpdateLastChangeJobId(target_region, request.job_id()); // Disable temporary change store_region_meta->UpdateTemporaryDisableChange(target_region, true); - auto source_region = store_region_meta->GetRegion(request.source_region_id()); if (source_region == nullptr) { DINGO_LOG(FATAL) << fmt::format( diff --git a/src/server/document_service.cc b/src/server/document_service.cc index a793f248f..c637cfae7 100644 --- a/src/server/document_service.cc +++ b/src/server/document_service.cc @@ -292,6 +292,121 @@ void DocumentServiceImpl::DocumentSearch(google::protobuf::RpcController* contro } } +static butil::Status ValidateDocumentSearchAllRequest(StoragePtr storage, + const pb::document::DocumentSearchAllRequest* request, + store::RegionPtr region) { + if (region == nullptr) { + return butil::Status( + pb::error::EREGION_NOT_FOUND, + fmt::format("Not found region {} at server {}", request->context().region_id(), Server::GetInstance().Id())); + } + + auto status = ServiceHelper::ValidateRegionEpoch(request->context().region_epoch(), region); + if (!status.ok()) { + return status; + } + + if (request->context().region_id() == 0) { + return butil::Status(pb::error::EILLEGAL_PARAMTETERS, "Param region_id is error"); + } + if (request->stream_meta().limit() <= 0) { + return butil::Status(pb::error::EILLEGAL_PARAMTETERS, "param limit is invalid"); + } + if (request->stream_meta().limit() > FLAGS_stream_message_max_limit_size) { + return butil::Status(pb::error::EILLEGAL_PARAMTETERS, "param limit beyond max limit"); + } + + status = storage->ValidateLeader(region); + if (!status.ok()) { + return status; + } + + if (!region->DocumentIndexWrapper()->IsReady()) { + if (region->DocumentIndexWrapper()->IsBuildError()) { + return butil::Status(pb::error::EDOCUMENT_INDEX_BUILD_ERROR, + fmt::format("Document index {} build error, please wait for recover.", region->Id())); + } + return butil::Status(pb::error::EDOCUMENT_INDEX_NOT_READY, + fmt::format("Document index {} not ready, please retry.", region->Id())); + } + + return ServiceHelper::ValidateRegionState(region); +} + +void DoDocumentSearchAll(StoragePtr storage, google::protobuf::RpcController* controller, + const pb::document::DocumentSearchAllRequest* request, + pb::document::DocumentSearchAllResponse* response, TrackClosure* done) { + brpc::Controller* cntl = (brpc::Controller*)controller; + brpc::ClosureGuard done_guard(done); + auto tracker = done->Tracker(); + tracker->SetServiceQueueWaitTime(); + + auto region = done->GetRegion(); + int64_t region_id = request->context().region_id(); + + butil::Status status = ValidateDocumentSearchAllRequest(storage, request, region); + if (!status.ok()) { + ServiceHelper::SetError(response->mutable_error(), status.error_code(), status.error_str()); + ServiceHelper::GetStoreRegionInfo(region, response->mutable_error()); + return; + } + + auto* mut_request = const_cast(request); + auto ctx = std::make_shared(); + ctx->partition_id = region->PartitionId(); + ctx->region_id = region->Id(); + ctx->document_index = region->DocumentIndexWrapper(); + ctx->region_range = region->Range(false); + ctx->parameter.Swap(mut_request->mutable_parameter()); + ctx->raw_engine_type = region->GetRawEngineType(); + ctx->store_engine_type = region->GetStoreEngineType(); + + std::vector document_results; + bool has_more = false; + status = storage->DocumentSearchAll(ctx, mut_request->stream_meta(), has_more, document_results); + if (!status.ok()) { + ServiceHelper::SetError(response->mutable_error(), status.error_code(), status.error_str()); + return; + } + + for (auto& document_with_score : document_results) { + *(response->add_document_with_scores()) = document_with_score; + } + auto stream = ctx->Stream(); + CHECK(stream != nullptr) << fmt::format("[region({})] stream is nullptr.", region_id); + + auto* mut_stream_meta = response->mutable_stream_meta(); + mut_stream_meta->set_stream_id(stream->StreamId()); + mut_stream_meta->set_has_more(has_more); +} + +void DocumentServiceImpl::DocumentSearchAll(google::protobuf::RpcController* controller, + const pb::document::DocumentSearchAllRequest* request, + pb::document::DocumentSearchAllResponse* response, + google::protobuf::Closure* done) { + auto* svr_done = new ServiceClosure(__func__, done, request, response); + + if (BAIDU_UNLIKELY(svr_done->GetRegion() == nullptr)) { + brpc::ClosureGuard done_guard(svr_done); + return; + } + + if (!FLAGS_enable_async_document_search) { + return DoDocumentSearchAll(storage_, controller, request, response, svr_done); + } + + // Run in queue. + auto task = std::make_shared([this, controller, request, response, svr_done]() { + DoDocumentSearchAll(storage_, controller, request, response, svr_done); + }); + bool ret = read_worker_set_->ExecuteLeastQueue(task); + if (BAIDU_UNLIKELY(!ret)) { + brpc::ClosureGuard done_guard(svr_done); + ServiceHelper::SetError(response->mutable_error(), pb::error::EREQUEST_FULL, + "WorkerSet queue is full, please wait and retry"); + } +} + static butil::Status ValidateDocumentAddRequest(StoragePtr storage, const pb::document::DocumentAddRequest* request, store::RegionPtr region) { auto status = ServiceHelper::ValidateRegionEpoch(request->context().region_epoch(), region); @@ -1587,10 +1702,10 @@ void DoTxnPrewriteDocument(StoragePtr storage, google::protobuf::RpcController* } std::vector kvs; - status = - storage->TxnPrewrite(ctx, region, mutations, request->primary_lock(), request->start_ts(), request->lock_ttl(), - request->txn_size(), request->try_one_pc(), request->min_commit_ts(), - request->max_commit_ts(), pessimistic_checks, for_update_ts_checks, lock_extra_datas, secondaries); + status = storage->TxnPrewrite(ctx, region, mutations, request->primary_lock(), request->start_ts(), + request->lock_ttl(), request->txn_size(), request->try_one_pc(), + request->min_commit_ts(), request->max_commit_ts(), pessimistic_checks, + for_update_ts_checks, lock_extra_datas, secondaries); if (!status.ok()) { ServiceHelper::SetError(response->mutable_error(), status.error_code(), status.error_str()); diff --git a/src/server/document_service.h b/src/server/document_service.h index 6f8a737be..6278a4af7 100644 --- a/src/server/document_service.h +++ b/src/server/document_service.h @@ -36,6 +36,11 @@ class DocumentServiceImpl : public pb::document::DocumentService { pb::document::DocumentBatchQueryResponse* response, google::protobuf::Closure* done) override; void DocumentSearch(google::protobuf::RpcController* controller, const pb::document::DocumentSearchRequest* request, pb::document::DocumentSearchResponse* response, google::protobuf::Closure* done) override; + + void DocumentSearchAll(google::protobuf::RpcController* controller, + const pb::document::DocumentSearchAllRequest* request, + pb::document::DocumentSearchAllResponse* response, google::protobuf::Closure* done) override; + void DocumentGetBorderId(google::protobuf::RpcController* controller, const pb::document::DocumentGetBorderIdRequest* request, pb::document::DocumentGetBorderIdResponse* response, diff --git a/test/unit_test/document/test_document_index.cc b/test/unit_test/document/test_document_index.cc index ab8261cc2..074b516c0 100644 --- a/test/unit_test/document/test_document_index.cc +++ b/test/unit_test/document/test_document_index.cc @@ -142,7 +142,7 @@ TEST(DingoDocumentIndexTest, test_default_create) { { std::vector results; - ret = document_index->Search(5, "discover", false, 0, INT64_MAX, false, {}, {}, results); + ret = document_index->Search(5, "discover", false, 0, INT64_MAX, false, false, {}, {}, results); std::cout << "status: " << ret.error_code() << ", " << ret.error_str() << '\n'; EXPECT_EQ(ret.ok(), true); EXPECT_EQ(results.size(), 1); @@ -150,7 +150,7 @@ TEST(DingoDocumentIndexTest, test_default_create) { { std::vector results; - ret = document_index->Search(10, "of", false, 0, INT64_MAX, false, {}, {}, results); + ret = document_index->Search(10, "of", false, 0, INT64_MAX, false, false, {}, {}, results); std::cout << "status: " << ret.error_code() << ", " << ret.error_str() << '\n'; EXPECT_EQ(ret.ok(), true); EXPECT_EQ(results.size(), 5); @@ -248,7 +248,7 @@ TEST(DingoDocumentIndexTest, test_load_or_create) { { std::vector results; - ret = document_index->Search(5, "discover", false, 0, INT64_MAX, false, {}, {}, results); + ret = document_index->Search(5, "discover", false, 0, INT64_MAX, false, false, {}, {}, results); std::cout << "status: " << ret.error_code() << ", " << ret.error_str() << '\n'; EXPECT_EQ(ret.ok(), true); EXPECT_EQ(results.size(), 1); @@ -256,7 +256,7 @@ TEST(DingoDocumentIndexTest, test_load_or_create) { { std::vector results; - ret = document_index->Search(10, "of", false, 0, INT64_MAX, false, {}, {}, results); + ret = document_index->Search(10, "of", false, 0, INT64_MAX, false, false, {}, {}, results); std::cout << "status: " << ret.error_code() << ", " << ret.error_str() << '\n'; EXPECT_EQ(ret.ok(), true); EXPECT_EQ(results.size(), 5); @@ -264,7 +264,7 @@ TEST(DingoDocumentIndexTest, test_load_or_create) { { std::vector results; - ret = document_index->Search(10, R"(text:"of")", true, 5, 8, false, {}, {}, results); + ret = document_index->Search(10, R"(text:"of")", true, 5, 8, false, false, {}, {}, results); std::cout << "status: " << ret.error_code() << ", " << ret.error_str() << '\n'; EXPECT_EQ(ret.ok(), true); EXPECT_EQ(results.size(), 3); @@ -276,7 +276,7 @@ TEST(DingoDocumentIndexTest, test_load_or_create) { alive_ids.push_back(6); alive_ids.push_back(7); alive_ids.push_back(8); - ret = document_index->Search(10, R"(text:"of")", true, 5, 8, true, alive_ids, {}, results); + ret = document_index->Search(10, R"(text:"of")", true, 5, 8, true, false, alive_ids, {}, results); std::cout << "status: " << ret.error_code() << ", " << ret.error_str() << '\n'; EXPECT_EQ(ret.ok(), true); EXPECT_EQ(results.size(), 2); @@ -288,7 +288,7 @@ TEST(DingoDocumentIndexTest, test_load_or_create) { alive_ids.push_back(6); alive_ids.push_back(7); alive_ids.push_back(8); - ret = document_index->Search(10, R"(text:"of")", false, 5, 8, true, alive_ids, {}, results); + ret = document_index->Search(10, R"(text:"of")", false, 5, 8, true, false, alive_ids, {}, results); std::cout << "status: " << ret.error_code() << ", " << ret.error_str() << '\n'; EXPECT_EQ(ret.ok(), true); EXPECT_EQ(results.size(), 3); @@ -300,7 +300,7 @@ TEST(DingoDocumentIndexTest, test_load_or_create) { alive_ids.push_back(6); alive_ids.push_back(7); alive_ids.push_back(8); - ret = document_index->Search(10, R"(text:"of" AND row_id:IN [6])", true, 5, 8, true, alive_ids, {}, results); + ret = document_index->Search(10, R"(text:"of" AND row_id:IN [6])", true, 5, 8, true, false, alive_ids, {}, results); std::cout << "status: " << ret.error_code() << ", " << ret.error_str() << '\n'; EXPECT_EQ(ret.ok(), true); EXPECT_EQ(results.size(), 1); @@ -312,8 +312,8 @@ TEST(DingoDocumentIndexTest, test_load_or_create) { alive_ids.push_back(6); alive_ids.push_back(7); alive_ids.push_back(8); - ret = document_index->Search(10, R"(text:"of" AND row_id:IN [6 7] AND i64: >= 1006)", true, 5, 8, true, alive_ids, - {}, results); + ret = document_index->Search(10, R"(text:"of" AND row_id:IN [6 7] AND i64: >= 1006)", true, 5, 8, true, false, + alive_ids, {}, results); std::cout << "status: " << ret.error_code() << ", " << ret.error_str() << '\n'; EXPECT_EQ(ret.ok(), true); EXPECT_EQ(results.size(), 1); @@ -444,7 +444,7 @@ TEST(DingoDocumentIndexTest, test_upsert) { { std::vector results; - ret = document_index->Search(5, "discover", false, 0, INT64_MAX, false, {}, {}, results); + ret = document_index->Search(5, "discover", false, 0, INT64_MAX, false, false, {}, {}, results); std::cout << "status: " << ret.error_code() << ", " << ret.error_str() << '\n'; EXPECT_EQ(ret.ok(), true); EXPECT_EQ(results.size(), 1); @@ -452,7 +452,7 @@ TEST(DingoDocumentIndexTest, test_upsert) { { std::vector results; - ret = document_index->Search(10, "of", false, 0, INT64_MAX, false, {}, {}, results); + ret = document_index->Search(10, "of", false, 0, INT64_MAX, false, false, {}, {}, results); std::cout << "status: " << ret.error_code() << ", " << ret.error_str() << '\n'; EXPECT_EQ(ret.ok(), true); EXPECT_EQ(results.size(), 4); @@ -460,7 +460,7 @@ TEST(DingoDocumentIndexTest, test_upsert) { { std::vector results; - ret = document_index->Search(10, R"(text:"of")", true, 5, 8, false, {}, {}, results); + ret = document_index->Search(10, R"(text:"of")", true, 5, 8, false, false, {}, {}, results); std::cout << "status: " << ret.error_code() << ", " << ret.error_str() << '\n'; EXPECT_EQ(ret.ok(), true); EXPECT_EQ(results.size(), 2); @@ -472,7 +472,7 @@ TEST(DingoDocumentIndexTest, test_upsert) { alive_ids.push_back(6); alive_ids.push_back(7); alive_ids.push_back(8); - ret = document_index->Search(10, R"(text:"of")", true, 5, 8, true, alive_ids, {}, results); + ret = document_index->Search(10, R"(text:"of")", true, 5, 8, true, false, alive_ids, {}, results); std::cout << "status: " << ret.error_code() << ", " << ret.error_str() << '\n'; EXPECT_EQ(ret.ok(), true); @@ -489,7 +489,7 @@ TEST(DingoDocumentIndexTest, test_upsert) { alive_ids.push_back(6); alive_ids.push_back(7); alive_ids.push_back(8); - ret = document_index->Search(10, R"(text:"of")", false, 5, 8, true, alive_ids, {}, results); + ret = document_index->Search(10, R"(text:"of")", false, 5, 8, true, false, alive_ids, {}, results); std::cout << "status: " << ret.error_code() << ", " << ret.error_str() << '\n'; EXPECT_EQ(ret.ok(), true); EXPECT_EQ(results.size(), 2); @@ -501,7 +501,7 @@ TEST(DingoDocumentIndexTest, test_upsert) { alive_ids.push_back(6); alive_ids.push_back(7); alive_ids.push_back(8); - ret = document_index->Search(10, R"(text:"of" AND row_id:IN [6])", true, 5, 8, true, alive_ids, {}, results); + ret = document_index->Search(10, R"(text:"of" AND row_id:IN [6])", true, 5, 8, true, false, alive_ids, {}, results); std::cout << "status: " << ret.error_code() << ", " << ret.error_str() << '\n'; EXPECT_EQ(ret.ok(), true); EXPECT_EQ(results.size(), 1); @@ -513,8 +513,8 @@ TEST(DingoDocumentIndexTest, test_upsert) { alive_ids.push_back(6); alive_ids.push_back(7); alive_ids.push_back(8); - ret = document_index->Search(10, R"(text:"of" AND row_id:IN [6 7] AND i64: >= 1006)", true, 5, 8, true, alive_ids, - {}, results); + ret = document_index->Search(10, R"(text:"of" AND row_id:IN [6 7] AND i64: >= 1006)", true, 5, 8, true, false, + alive_ids, {}, results); std::cout << "status: " << ret.error_code() << ", " << ret.error_str() << '\n'; EXPECT_EQ(ret.ok(), true); EXPECT_EQ(results.size(), 0); diff --git a/test/unit_test/document/test_tantivy_search.cc b/test/unit_test/document/test_tantivy_search.cc index 105fae64c..66f802d41 100644 --- a/test/unit_test/document/test_tantivy_search.cc +++ b/test/unit_test/document/test_tantivy_search.cc @@ -159,7 +159,7 @@ TEST(DingoTantivySearchTest, test_default_create) { ret = ffi_load_index_reader(index_path); EXPECT_EQ(ret.result, true); - auto result = ffi_bm25_search(index_path, "of", 10, {}, false).result; + auto result = ffi_bm25_search(index_path, "of", 10, {}, false, false).result; for (const auto& it : result) { std::cout << "rowid:" << it.row_id << " score:" << it.score << " doc_id:" << it.doc_id << " seg_id:" << it.seg_id @@ -287,7 +287,7 @@ TEST(DingoTantivySearchTest, test_tokenizer_create) { ret = ffi_load_index_reader(index_path); EXPECT_EQ(ret.result, true); - auto result = ffi_bm25_search(index_path, "影响深远", 10, {}, false).result; + auto result = ffi_bm25_search(index_path, "影响深远", 10, {}, false, false).result; for (const auto& it : result) { std::cout << "rowid:" << it.row_id << " score:" << it.score << " doc_id:" << it.doc_id << " seg_id:" << it.seg_id @@ -471,7 +471,7 @@ TEST(DingoTantivySearchTest, test_multi_type_column) { ret = ffi_load_index_reader(index_path); EXPECT_EQ(ret.result, true); - auto result = ffi_bm25_search(index_path, "社会", 10, {}, false).result; + auto result = ffi_bm25_search(index_path, "社会", 10, {}, false, false).result; std::cout << "ffi_bm25_search result size:" << result.size() << '\n'; for (const auto& it : result) { @@ -479,32 +479,35 @@ TEST(DingoTantivySearchTest, test_multi_type_column) { << " seg_id:" << it.seg_id << '\n'; } - result = ffi_bm25_search_with_column_names(index_path, "社会", 10, {}, false, false, 0, 0, {"col1"}).result; + result = ffi_bm25_search_with_column_names(index_path, "社会", 10, {}, false, false, 0, 0, {"col1"}, false).result; std::cout << "ffi_bm25_search_with_column_names col1 result size:" << result.size() << '\n'; for (const auto& it : result) { std::cout << "ffi_bm25_search_with_column_names rowid:" << it.row_id << " score:" << it.score << " doc_id:" << it.doc_id << " seg_id:" << it.seg_id << '\n'; } - result = ffi_bm25_search_with_column_names(index_path, "balance", 10, {}, false, false, 0, 0, {"col4"}).result; + result = ffi_bm25_search_with_column_names(index_path, "balance", 10, {}, false, false, 0, 0, {"col4"}, false).result; std::cout << "ffi_bm25_search_with_column_names col4 result size:" << result.size() << '\n'; for (const auto& it : result) { std::cout << "ffi_bm25_search_with_column_names rowid:" << it.row_id << " score:" << it.score << " doc_id:" << it.doc_id << " seg_id:" << it.seg_id << '\n'; } - result = ffi_bm25_search_with_column_names(index_path, "社会", 10, {}, false, false, 0, 0, {"col1", "col4"}).result; + result = + ffi_bm25_search_with_column_names(index_path, "社会", 10, {}, false, false, 0, 0, {"col1", "col4"}, false).result; std::cout << "ffi_bm25_search_with_column_names col1,col4 result size:" << result.size() << '\n'; for (const auto& it : result) { std::cout << "ffi_bm25_search_with_column_names rowid:" << it.row_id << " score:" << it.score << " doc_id:" << it.doc_id << " seg_id:" << it.seg_id << '\n'; } - result = ffi_bm25_search_with_column_names(index_path, "社会", 10, {}, false, false, 0, 0, {"col11", "col44"}).result; + result = ffi_bm25_search_with_column_names(index_path, "社会", 10, {}, false, false, 0, 0, {"col11", "col44"}, false) + .result; std::cout << "ffi_bm25_search_with_column_names col11,col44 result size:" << result.size() << '\n'; result = - ffi_bm25_search_with_column_names(index_path, "col2: IN [200 300 400]", 10, {}, false, false, 0, 0, {}).result; + ffi_bm25_search_with_column_names(index_path, "col2: IN [200 300 400]", 10, {}, false, false, 0, 0, {}, false) + .result; std::cout << "ffi_bm25_search_with_column_names-1 parser result size:" << result.size() << '\n'; for (const auto& it : result) { std::cout << "ffi_bm25_search_with_column_names rowid:" << it.row_id << " score:" << it.score @@ -512,7 +515,8 @@ TEST(DingoTantivySearchTest, test_multi_type_column) { } result = - ffi_bm25_search_with_column_names(index_path, "col222: IN [200 300 400]", 10, {}, false, false, 0, 0, {}).result; + ffi_bm25_search_with_column_names(index_path, "col222: IN [200 300 400]", 10, {}, false, false, 0, 0, {}, false) + .result; std::cout << "ffi_bm25_search_with_column_names-2 parser result size:" << result.size() << '\n'; for (const auto& it : result) { std::cout << "ffi_bm25_search_with_column_names rowid:" << it.row_id << " score:" << it.score @@ -520,7 +524,7 @@ TEST(DingoTantivySearchTest, test_multi_type_column) { } auto bm25_result = - ffi_bm25_search_with_column_names(index_path, "col2: IN [200 300 400]", 10, {}, false, false, 0, 0, {}); + ffi_bm25_search_with_column_names(index_path, "col2: IN [200 300 400]", 10, {}, false, false, 0, 0, {}, false); if (bm25_result.error_code != 0) { std::cout << "ffi_bm25_search_with_column_names2-1 error:" << bm25_result.error_msg.c_str() << '\n'; } else { @@ -532,7 +536,7 @@ TEST(DingoTantivySearchTest, test_multi_type_column) { } bm25_result = ffi_bm25_search_with_column_names(index_path, "col222: IN [200 300 400 500 600 700 800]", 10, {}, false, - false, 0, 0, {}); + false, 0, 0, {}, false); if (bm25_result.error_code != 0) { std::cout << "ffi_bm25_search_with_column_names2-2 error:" << bm25_result.error_msg.c_str() << '\n'; } else { @@ -557,7 +561,7 @@ TEST(DingoTantivySearchTest, test_multi_type_column) { alived_ids.push_back(6); alived_ids.push_back(7); bm25_result = ffi_bm25_search_with_column_names(index_path, "col2: IN [800 700 600 500 400 300 200]", 3, alived_ids, - true, false, 0, 0, {}); + true, false, 0, 0, {}, false); if (bm25_result.error_code != 0) { std::cout << "ffi_bm25_search_with_column_names2-3 filter_ids error:" << bm25_result.error_msg.c_str() << '\n'; } else { @@ -630,7 +634,8 @@ TEST(DingoTantivySearchTest, test_load_multi_type_column) { std::cout << "ffi_index_reader_reload success" << '\n'; } - auto bm25_result = ffi_bm25_search_with_column_names(index_path, "col2: IN [101]", 10, {}, false, false, 0, 0, {}); + auto bm25_result = + ffi_bm25_search_with_column_names(index_path, "col2: IN [101]", 10, {}, false, false, 0, 0, {}, false); if (bm25_result.error_code != 0) { std::cout << "ffi_bm25_search_with_column_names2-1 error:" << bm25_result.error_msg.c_str() << '\n'; EXPECT_EQ(bm25_result.error_code, 0); @@ -728,7 +733,7 @@ TEST(DingoTantivySearchTest, test_bytes_column) { alived_ids.push_back(1); alived_ids.push_back(2); auto bm25_result = - ffi_bm25_search_with_column_names(index_path, "col2: IN [100 200]", 3, alived_ids, true, false, 0, 0, {}); + ffi_bm25_search_with_column_names(index_path, "col2: IN [100 200]", 3, alived_ids, true, false, 0, 0, {}, false); if (bm25_result.error_code != 0) { std::cout << __func__ << "test-1 filter_ids error:" << bm25_result.error_msg.c_str() << '\n'; EXPECT_EQ(bm25_result.error_code, 0); @@ -740,7 +745,7 @@ TEST(DingoTantivySearchTest, test_bytes_column) { } } - bm25_result = ffi_bm25_search_with_column_names(index_path, "col3: > 101", 10, {}, false, false, 0, 0, {}); + bm25_result = ffi_bm25_search_with_column_names(index_path, "col3: > 101", 10, {}, false, false, 0, 0, {}, false); if (bm25_result.error_code != 0) { std::cout << __func__ << "test-2 parser error:" << bm25_result.error_msg.c_str() << '\n'; EXPECT_EQ(bm25_result.error_code, 0); @@ -753,7 +758,7 @@ TEST(DingoTantivySearchTest, test_bytes_column) { } bm25_result = - ffi_bm25_search_with_column_names(index_path, "col5: IN [dGVzdDExMQ==]", 10, {}, false, false, 0, 0, {}); + ffi_bm25_search_with_column_names(index_path, "col5: IN [dGVzdDExMQ==]", 10, {}, false, false, 0, 0, {}, false); if (bm25_result.error_code != 0) { std::cout << __func__ << "test-3 parser error:" << bm25_result.error_msg.c_str() << '\n'; EXPECT_EQ(bm25_result.error_code, 0); @@ -766,7 +771,8 @@ TEST(DingoTantivySearchTest, test_bytes_column) { } } - bm25_result = ffi_bm25_search_with_column_names(index_path, "col5: IN [dGVzdDExMQ==]", 10, {}, false, true, 1, 2, {}); + bm25_result = + ffi_bm25_search_with_column_names(index_path, "col5: IN [dGVzdDExMQ==]", 10, {}, false, true, 1, 2, {}, false); if (bm25_result.error_code != 0) { std::cout << __func__ << "test-4 parser error:" << bm25_result.error_msg.c_str() << '\n'; EXPECT_EQ(bm25_result.error_code, 0); @@ -1034,50 +1040,54 @@ TEST(DingoTantivySearchTest, test_parse_query_range) { ret = ffi_load_index_reader(index_path); EXPECT_EQ(ret.result, true); - auto bm25_result = ffi_bm25_search_with_column_names(index_path, "col2:{800 TO *}", 10, {}, false, false, 0, 0, {}); + auto bm25_result = + ffi_bm25_search_with_column_names(index_path, "col2:{800 TO *}", 10, {}, false, false, 0, 0, {}, false); EXPECT_EQ(bm25_result.error_code, 0); EXPECT_EQ(bm25_result.result.size(), 2); bm25_result = - ffi_bm25_search_with_column_names(index_path, "col2:{300 TO 1000}", 10, {}, false, false, 0, 0, {"col2"}); + ffi_bm25_search_with_column_names(index_path, "col2:{300 TO 1000}", 10, {}, false, false, 0, 0, {"col2"}, false); EXPECT_EQ(bm25_result.error_code, 0); EXPECT_EQ(bm25_result.result.size(), 6); // ignore column_name - bm25_result = ffi_bm25_search_with_column_names(index_path, "col2:{800 TO *}", 10, {}, false, false, 0, 0, {"col3"}); + bm25_result = + ffi_bm25_search_with_column_names(index_path, "col2:{800 TO *}", 10, {}, false, false, 0, 0, {"col3"}, false); EXPECT_EQ(bm25_result.error_code, 0); EXPECT_EQ(bm25_result.result.size(), 2); // 'Unsupported query: Range query need to target a specific field.' - bm25_result = ffi_bm25_search_with_column_names(index_path, "[300 TO *]", 10, {}, false, false, 0, 0, {""}); + bm25_result = ffi_bm25_search_with_column_names(index_path, "[300 TO *]", 10, {}, false, false, 0, 0, {""}, false); EXPECT_NE(bm25_result.error_code, 0); // 'Unsupported query: Range query need to target a specific field.' - bm25_result = ffi_bm25_search_with_column_names(index_path, "[300 TO *]", 10, {}, false, false, 0, 0, {"col2"}); + bm25_result = + ffi_bm25_search_with_column_names(index_path, "[300 TO *]", 10, {}, false, false, 0, 0, {"col2"}, false); EXPECT_NE(bm25_result.error_code, 0); // 'Unsupported query: Range query need to target a specific field.' - bm25_result = ffi_bm25_search_with_column_names(index_path, "[a TO c]", 10, {}, false, false, 0, 0, {}); + bm25_result = ffi_bm25_search_with_column_names(index_path, "[a TO c]", 10, {}, false, false, 0, 0, {}, false); EXPECT_NE(bm25_result.error_code, 0); // 'Unsupported query: Range query need to target a specific field.' - bm25_result = ffi_bm25_search_with_column_names(index_path, "[a TO c]", 10, {}, false, false, 0, 0, {"col4"}); + bm25_result = ffi_bm25_search_with_column_names(index_path, "[a TO c]", 10, {}, false, false, 0, 0, {"col4"}, false); EXPECT_NE(bm25_result.error_code, 0); - bm25_result = ffi_bm25_search_with_column_names(index_path, "col4:[a TO c]", 10, {}, false, false, 0, 0, {}); + bm25_result = ffi_bm25_search_with_column_names(index_path, "col4:[a TO c]", 10, {}, false, false, 0, 0, {}, false); EXPECT_EQ(bm25_result.error_code, 0); EXPECT_EQ(bm25_result.result.size(), 4); - bm25_result = ffi_bm25_search_with_column_names(index_path, "col4:[a TO *]", 10, {}, false, false, 0, 0, {}); + bm25_result = ffi_bm25_search_with_column_names(index_path, "col4:[a TO *]", 10, {}, false, false, 0, 0, {}, false); EXPECT_EQ(bm25_result.error_code, 0); EXPECT_EQ(bm25_result.result.size(), 10); - bm25_result = ffi_bm25_search_with_column_names(index_path, "col4:[a TO c]", 10, {}, false, false, 0, 0, {"col3"}); + bm25_result = + ffi_bm25_search_with_column_names(index_path, "col4:[a TO c]", 10, {}, false, false, 0, 0, {"col3"}, false); EXPECT_EQ(bm25_result.error_code, 0); EXPECT_EQ(bm25_result.result.size(), 4); bm25_result = ffi_bm25_search_with_column_names(index_path, "col2:[0 TO *] AND -col2:[300 TO *]", 10, {}, false, - false, 0, 0, {}); + false, 0, 0, {}, false); EXPECT_EQ(bm25_result.error_code, 0); EXPECT_EQ(bm25_result.result.size(), 2); @@ -1203,17 +1213,19 @@ TEST(DingoTantivySearchTest, test_query_datetime_type) { std::string query_string_1 = fmt::format("{}:\"{}\"", "col5", time_1); std::cout << "query_string_1:" << query_string_1 << std::endl; - auto bm25_result = ffi_bm25_search_with_column_names(index_path, query_string_1, 3, {}, false, false, 0, 0, {"col5"}); + auto bm25_result = + ffi_bm25_search_with_column_names(index_path, query_string_1, 3, {}, false, false, 0, 0, {"col5"}, false); EXPECT_EQ(bm25_result.error_code, 0); EXPECT_EQ(bm25_result.result.size(), 1); // ignore column name - bm25_result = ffi_bm25_search_with_column_names(index_path, query_string_1, 3, {}, false, false, 0, 0, {}); + bm25_result = ffi_bm25_search_with_column_names(index_path, query_string_1, 3, {}, false, false, 0, 0, {}, false); EXPECT_EQ(bm25_result.error_code, 0); EXPECT_EQ(bm25_result.result.size(), 1); // wrong column name - bm25_result = ffi_bm25_search_with_column_names(index_path, query_string_1, 3, {}, false, false, 0, 0, {"col1"}); + bm25_result = + ffi_bm25_search_with_column_names(index_path, query_string_1, 3, {}, false, false, 0, 0, {"col1"}, false); EXPECT_EQ(bm25_result.error_code, 0); EXPECT_EQ(bm25_result.result.size(), 1); @@ -1221,37 +1233,41 @@ TEST(DingoTantivySearchTest, test_query_datetime_type) { // query sentense ignore filed name with column name std::string query_string_2 = fmt::format("\"{}\"", time_3); std::cout << "query_string_2:" << query_string_2 << std::endl; - bm25_result = ffi_bm25_search_with_column_names(index_path, query_string_2, 10, {}, false, false, 0, 0, {"col5"}); + bm25_result = + ffi_bm25_search_with_column_names(index_path, query_string_2, 10, {}, false, false, 0, 0, {"col5"}, false); EXPECT_NE(bm25_result.error_code, 0); // query sentense ignore filed without column name std::cout << "query_string_2:" << query_string_2 << std::endl; - bm25_result = ffi_bm25_search_with_column_names(index_path, query_string_2, 10, {}, false, false, 0, 0, {}); + bm25_result = ffi_bm25_search_with_column_names(index_path, query_string_2, 10, {}, false, false, 0, 0, {}, false); EXPECT_EQ(bm25_result.error_code, 0); EXPECT_EQ(bm25_result.result.size(), 0); // query sentense ignore filed name with wrong column name std::cout << "query_string_2:" << query_string_2 << std::endl; - bm25_result = ffi_bm25_search_with_column_names(index_path, query_string_2, 10, {}, false, false, 0, 0, {"col1"}); + bm25_result = + ffi_bm25_search_with_column_names(index_path, query_string_2, 10, {}, false, false, 0, 0, {"col1"}, false); EXPECT_EQ(bm25_result.error_code, 0); EXPECT_EQ(bm25_result.result.size(), 0); // range query sentense with column name std::string query_string_3 = fmt::format("{}:[{} TO *]", "col5", time_1); std::cout << "query_string_3:" << query_string_3 << std::endl; - bm25_result = ffi_bm25_search_with_column_names(index_path, query_string_3, 10, {}, false, false, 0, 0, {"col5"}); + bm25_result = + ffi_bm25_search_with_column_names(index_path, query_string_3, 10, {}, false, false, 0, 0, {"col5"}, false); EXPECT_EQ(bm25_result.error_code, 0); EXPECT_EQ(bm25_result.result.size(), 6); // range query sentense without column name std::cout << "query_string_3:" << query_string_3 << std::endl; - bm25_result = ffi_bm25_search_with_column_names(index_path, query_string_3, 10, {}, false, false, 0, 0, {}); + bm25_result = ffi_bm25_search_with_column_names(index_path, query_string_3, 10, {}, false, false, 0, 0, {}, false); EXPECT_EQ(bm25_result.error_code, 0); EXPECT_EQ(bm25_result.result.size(), 6); // range query sentense with wrong column name std::cout << "query_string_3:" << query_string_3 << std::endl; - bm25_result = ffi_bm25_search_with_column_names(index_path, query_string_3, 10, {}, false, false, 0, 0, {"col1"}); + bm25_result = + ffi_bm25_search_with_column_names(index_path, query_string_3, 10, {}, false, false, 0, 0, {"col1"}, false); EXPECT_EQ(bm25_result.error_code, 0); EXPECT_EQ(bm25_result.result.size(), 6); @@ -1259,20 +1275,23 @@ TEST(DingoTantivySearchTest, test_query_datetime_type) { // range query sentense without filed name std::string query_string_4 = fmt::format("[{} TO {}]", time_2, time_5); std::cout << "query_string_4:" << query_string_4 << std::endl; - bm25_result = ffi_bm25_search_with_column_names(index_path, query_string_4, 10, {}, false, false, 0, 0, {"col5"}); + bm25_result = + ffi_bm25_search_with_column_names(index_path, query_string_4, 10, {}, false, false, 0, 0, {"col5"}, false); EXPECT_NE(bm25_result.error_code, 0); //'Unsupported query: Range query need to target a specific field.' // match all std::string query_string_5 = fmt::format("{}:*", "col5"); std::cout << "query_string_5:" << query_string_5 << std::endl; - bm25_result = ffi_bm25_search_with_column_names(index_path, query_string_5, 10, {}, false, false, 0, 0, {"col5"}); + bm25_result = + ffi_bm25_search_with_column_names(index_path, query_string_5, 10, {}, false, false, 0, 0, {"col5"}, false); EXPECT_NE(bm25_result.error_code, 0); // match all without filed name std::string query_string_6 = fmt::format("*"); std::cout << "query_string_6:" << query_string_6 << std::endl; - bm25_result = ffi_bm25_search_with_column_names(index_path, query_string_6, 10, {}, false, false, 0, 0, {"col5"}); + bm25_result = + ffi_bm25_search_with_column_names(index_path, query_string_6, 10, {}, false, false, 0, 0, {"col5"}, false); EXPECT_EQ(bm25_result.error_code, 0); EXPECT_EQ(bm25_result.result.size(), 6); @@ -1282,7 +1301,8 @@ TEST(DingoTantivySearchTest, test_query_datetime_type) { time_5 = fmt::format("\"{}\"", time_5); std::string query_string_7 = fmt::format("col5: IN [{} {} {}]", time_1, time_3, time_5); std::cout << "query_string_7:" << query_string_7 << std::endl; - bm25_result = ffi_bm25_search_with_column_names(index_path, query_string_7, 10, {}, false, false, 0, 0, {"col5"}); + bm25_result = + ffi_bm25_search_with_column_names(index_path, query_string_7, 10, {}, false, false, 0, 0, {"col5"}, false); EXPECT_EQ(bm25_result.error_code, 0); EXPECT_EQ(bm25_result.result.size(), 3); @@ -1387,7 +1407,7 @@ TEST(DingoTantivySearchTest, test_query_bool_type) { EXPECT_EQ(ret.result, true); auto bm25_result = - ffi_bm25_search_with_column_names(index_path, "col6:true AND col2:200", 10, {}, false, false, 0, 0, {}); + ffi_bm25_search_with_column_names(index_path, "col6:true AND col2:200", 10, {}, false, false, 0, 0, {}, false); if (bm25_result.error_code != 0) { std::cout << __func__ << "test-1 filter_ids error:" << bm25_result.error_msg.c_str() << '\n'; EXPECT_EQ(bm25_result.error_code, 0); @@ -1398,7 +1418,7 @@ TEST(DingoTantivySearchTest, test_query_bool_type) { } } - bm25_result = ffi_bm25_search_with_column_names(index_path, "col6:false", 10, {}, false, false, 0, 0, {}); + bm25_result = ffi_bm25_search_with_column_names(index_path, "col6:false", 10, {}, false, false, 0, 0, {}, false); if (bm25_result.error_code != 0) { std::cout << __func__ << "test-2 parser error:" << bm25_result.error_msg.c_str() << '\n'; EXPECT_EQ(bm25_result.error_code, 0); From 3f2a110ee7b3640f77cc7d3d504e9a8adcb851d9 Mon Sep 17 00:00:00 2001 From: yangjundong <1047934838@qq.com> Date: Wed, 25 Dec 2024 08:20:42 +0000 Subject: [PATCH 2/4] [feat][client] Add search all document command --- src/client_v2/document_index.cc | 87 +++++++++++++++++++++++++++++++++ src/client_v2/document_index.h | 5 ++ src/client_v2/pretty.cc | 24 +++++++++ src/client_v2/pretty.h | 1 + 4 files changed, 117 insertions(+) diff --git a/src/client_v2/document_index.cc b/src/client_v2/document_index.cc index 635c4f8c4..64e76b379 100644 --- a/src/client_v2/document_index.cc +++ b/src/client_v2/document_index.cc @@ -16,10 +16,13 @@ #include +#include "client_v2/coordinator.h" #include "client_v2/pretty.h" #include "common/helper.h" #include "fmt/format.h" +const int kBatchSize = 3; + namespace client_v2 { void SetUpDocumentIndexSubCommands(CLI::App& app) { @@ -28,6 +31,7 @@ void SetUpDocumentIndexSubCommands(CLI::App& app) { SetUpDocumentBatchAdd(app); SetUpDocumentDelete(app); SetUpDocumentSearch(app); + SetUpDocumentSearchAll(app); SetUpDocumentBatchQuery(app); SetUpDocumentScanQuery(app); SetUpDocumentGetMaxId(app); @@ -324,6 +328,69 @@ void SendDocumentSearch(DocumentSearchOptions const& opt) { Pretty::Show(response); } +void SendDocumentSearchAll(DocumentSearchOptions const& opt) { + // dingodb::pb::document::DocumentSearchRequest request; + // dingodb::pb::document::DocumentSearchResponse response; + + if (opt.query_string.empty()) { + std::cout << "query_string is empty" << std::endl; + return; + } + + auto response = SendSearchAllByStreamMode(opt); + std::cout << "search all documents response:" << response.DebugString() << std::endl; + Pretty::Show(response); +} + +dingodb::pb::document::DocumentSearchAllResponse SendSearchAllByStreamMode(DocumentSearchOptions const& opt) { + dingodb::pb::document::DocumentSearchAllRequest request; + dingodb::pb::document::DocumentSearchAllResponse response; + auto* parameter = request.mutable_parameter(); + parameter->set_query_string(opt.query_string); + parameter->set_without_scalar_data(opt.without_scalar); + parameter->set_query_unlimited(true); + request.mutable_stream_meta()->set_limit(kBatchSize); + if (opt.doc_id > 0) { + parameter->add_document_ids(opt.doc_id); + } + *(request.mutable_context()) = RegionRouter::GetInstance().GenConext(opt.region_id); + + for (;;) { + dingodb::pb::document::DocumentSearchAllResponse sub_response; + // maybe current store interaction is not store node, so need reset. + InteractionManager::GetInstance().ResetStoreInteraction(); + auto status = InteractionManager::GetInstance().SendRequestWithContext("DocumentService", "DocumentSearchAll", + request, sub_response); + std::cout << "search all request: " << request.DebugString() << ", sub_response: " << sub_response.DebugString() + << std::endl; + if (!status.ok()) { + response.mutable_error()->set_errcode(dingodb::pb::error::Errno(status.error_code())); + response.mutable_error()->set_errmsg(status.error_str()); + break; + } + + if (sub_response.error().errcode() != dingodb::pb::error::OK) { + *response.mutable_error() = sub_response.error(); + break; + } + + // set request stream id + if (!sub_response.stream_meta().stream_id().empty()) { + request.mutable_stream_meta()->set_stream_id(sub_response.stream_meta().stream_id()); + } + + // copy data + for (int i = 0; i < sub_response.document_with_scores_size(); ++i) { + response.add_document_with_scores()->Swap(&sub_response.mutable_document_with_scores()->at(i)); + } + if (!sub_response.stream_meta().has_more()) { + break; + } + } + + return response; +} + void SendDocumentBatchQuery(DocumentBatchQueryOptions const& opt) { dingodb::pb::document::DocumentBatchQueryRequest request; dingodb::pb::document::DocumentBatchQueryResponse response; @@ -645,6 +712,26 @@ void RunDocumentSearch(DocumentSearchOptions const& opt) { client_v2::SendDocumentSearch(opt); } +void SetUpDocumentSearchAll(CLI::App& app) { + auto opt = std::make_shared(); + auto* cmd = app.add_subcommand("DocumentSearchAll", "Document search all documents")->group("Document Commands"); + cmd->add_option("--coor_url", opt->coor_url, "Coordinator url, default:file://./coor_list"); + cmd->add_option("--region_id", opt->region_id, "Request parameter region id")->required(); + cmd->add_option("--query_string", opt->query_string, "Request parameter query_string")->required(); + cmd->add_option("--without_scalar", opt->without_scalar, "Request parameter without_scalar") + ->default_val(false) + ->default_str("false"); + cmd->add_option("--doc_id", opt->doc_id, "Request parameter alive id"); + cmd->callback([opt]() { RunDocumentSearchAll(*opt); }); +} + +void RunDocumentSearchAll(DocumentSearchOptions const& opt) { + if (!SetUpStore(opt.coor_url, {}, opt.region_id)) { + exit(-1); + } + client_v2::SendDocumentSearchAll(opt); +} + void SetUpDocumentBatchQuery(CLI::App& app) { auto opt = std::make_shared(); auto* cmd = app.add_subcommand("DocumentBatchQuery", "Document batch query")->group("Document Commands"); diff --git a/src/client_v2/document_index.h b/src/client_v2/document_index.h index 39324a4d7..800ca2235 100644 --- a/src/client_v2/document_index.h +++ b/src/client_v2/document_index.h @@ -97,6 +97,9 @@ struct DocumentSearchOptions { void SetUpDocumentSearch(CLI::App &app); void RunDocumentSearch(DocumentSearchOptions const &opt); +void SetUpDocumentSearchAll(CLI::App &app); +void RunDocumentSearchAll(DocumentSearchOptions const &opt); + struct DocumentBatchQueryOptions { std::string coor_url; int64_t region_id; @@ -156,6 +159,7 @@ void SendDocumentAdd(DocumentAddOptions const &opt); void SendDocumentBatchAdd(DocumentAddOptions const &opt); void SendDocumentDelete(DocumentDeleteOptions const &opt); void SendDocumentSearch(DocumentSearchOptions const &opt); +void SendDocumentSearchAll(DocumentSearchOptions const &opt); void SendDocumentBatchQuery(DocumentBatchQueryOptions const &opt); void SendDocumentGetMaxId(DocumentGetMaxIdOptions const &opt); void SendDocumentGetMinId(DocumentGetMinIdOptions const &opt); @@ -163,6 +167,7 @@ void SendDocumentScanQuery(DocumentScanQueryOptions const &opt); void SendDocumentCount(DocumentCountOptions const &opt); void SendDocumentGetRegionMetrics(DocumentGetRegionMetricsOptions const &opt); +dingodb::pb::document::DocumentSearchAllResponse SendSearchAllByStreamMode(DocumentSearchOptions const &opt); } // namespace client_v2 #endif // DINGODB_CLIENT_DOCUMENT_INDEX_H_ \ No newline at end of file diff --git a/src/client_v2/pretty.cc b/src/client_v2/pretty.cc index d93ab9b52..e06bc6d73 100644 --- a/src/client_v2/pretty.cc +++ b/src/client_v2/pretty.cc @@ -922,6 +922,30 @@ void Pretty::Show(dingodb::pb::document::DocumentSearchResponse& response) { PrintTable(rows); } + +void Pretty::Show(dingodb::pb::document::DocumentSearchAllResponse& response) { + if (ShowError(response.error())) { + return; + } + if (response.document_with_scores_size() == 0) { + std::cout << "Not search document ." << std::endl; + return; + } + std::vector> rows = {{ + ftxui::paragraph("DocumentId"), + ftxui::paragraph("Score"), + }}; + for (auto const& document_with_score : response.document_with_scores()) { + std::vector row = { + ftxui::paragraph(fmt::format("{}", document_with_score.document_with_id().id())), + ftxui::paragraph(fmt::format("{}", document_with_score.score())), + }; + rows.push_back(row); + } + + PrintTable(rows); +} + void Pretty::Show(dingodb::pb::document::DocumentBatchQueryResponse& response) { if (ShowError(response.error())) { return; diff --git a/src/client_v2/pretty.h b/src/client_v2/pretty.h index c49b86dc5..f3e11a795 100644 --- a/src/client_v2/pretty.h +++ b/src/client_v2/pretty.h @@ -56,6 +56,7 @@ class Pretty { static void Show(dingodb::pb::meta::CreateIndexResponse &response); static void Show(dingodb::pb::document::DocumentSearchResponse &response); + static void Show(dingodb::pb::document::DocumentSearchAllResponse &response); static void Show(dingodb::pb::document::DocumentBatchQueryResponse &response); static void Show(dingodb::pb::document::DocumentGetBorderIdResponse &response); static void Show(dingodb::pb::document::DocumentScanQueryResponse &response); From 9adaa46f4b969997e71e0d7295c6ec55ff733c54 Mon Sep 17 00:00:00 2001 From: yangjundong <1047934838@qq.com> Date: Wed, 25 Dec 2024 09:14:56 +0000 Subject: [PATCH 3/4] [chore][store] Update proto --- dingo-store-proto | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dingo-store-proto b/dingo-store-proto index 72a49566e..46b09ed3f 160000 --- a/dingo-store-proto +++ b/dingo-store-proto @@ -1 +1 @@ -Subproject commit 72a49566efab633634e0d4832c245beadd0da420 +Subproject commit 46b09ed3fc7dc29fe4cf5daf3c167d9eaba8f470 From 8d3aab4afcb18d398e939086778852727bf14d5a Mon Sep 17 00:00:00 2001 From: yangjundong <1047934838@qq.com> Date: Wed, 25 Dec 2024 10:13:07 +0000 Subject: [PATCH 4/4] [chore][store] Update for unlimited search --- contrib/tantivy-search | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/tantivy-search b/contrib/tantivy-search index 3a9ebda1d..c169502ef 160000 --- a/contrib/tantivy-search +++ b/contrib/tantivy-search @@ -1 +1 @@ -Subproject commit 3a9ebda1d6975348d01950a69c1aaaf685c4e256 +Subproject commit c169502efaf7e791573fa7ec4eff121c27622390