Skip to content

Commit b057a4f

Browse files
committed
[#21631] DocDB: Avoid including hidden tables in generate_snapshot as of time
Summary: Currently, Generate snapshot as of time is returning the set of tables and tablets that are in a `running` state as of `restore_time`. However, when we have a snapshot schedule, deleted tables are still marked as `RUNNING` but have a hidden flag set to true. These hidden tables should not be included in the output of generate snapshot. Otherwise, cloning will fail in case of a recreated table. The diff fixes the issue by skipping to include the tables that are marked as hidden (eventhough they are in running state). It also skips the tablets that are hidden due to tablet splitting. This solves cloning issues: GH-21602 and GH-21577 Jira: DB-10526 Test Plan: ./yb_build.sh --cxx-test minicluster-snapshot-test --gtest_filter Colocation/MasterExportSnapshotTest.ExportSnapshotAsOfTimeWithHiddenTables/0 ./yb_build.sh --cxx-test minicluster-snapshot-test --gtest_filter Colocation/MasterExportSnapshotTest.ExportSnapshotAsOfTimeWithHiddenTables/1 Reviewers: asrivastava Reviewed By: asrivastava Subscribers: ybase, bogdan, slingam Differential Revision: https://phorge.dev.yugabyte.com/D33536
1 parent 2d742eb commit b057a4f

File tree

2 files changed

+88
-19
lines changed

2 files changed

+88
-19
lines changed

src/yb/integration-tests/minicluster-snapshot-test.cc

+81-17
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@
5353

5454
#include "yb/master/catalog_manager.h"
5555
#include "yb/master/leader_epoch.h"
56+
#include "yb/master/master_admin.proxy.h"
5657
#include "yb/master/master_backup.proxy.h"
5758
#include "yb/master/master_types.pb.h"
5859
#include "yb/master/mini_master.h"
@@ -83,7 +84,7 @@ DECLARE_string(TEST_mini_cluster_pg_host_port);
8384
namespace yb {
8485
namespace master {
8586

86-
constexpr auto kInterval = 6s;
87+
constexpr auto kInterval = 20s;
8788
constexpr auto kRetention = RegularBuildVsDebugVsSanitizers(10min, 18min, 10min);
8889

8990
YB_DEFINE_ENUM(YsqlColocationConfig, (kNotColocated)(kDBColocated));
@@ -396,7 +397,41 @@ class PostgresMiniClusterTest : public pgwrapper::PgMiniTestBase,
396397
}
397398
};
398399

399-
class MasterExportSnapshotTest : public PostgresMiniClusterTest {};
400+
class MasterExportSnapshotTest : public PostgresMiniClusterTest {
401+
public:
402+
void SetUp() override {
403+
PostgresMiniClusterTest::SetUp();
404+
messenger = ASSERT_RESULT(rpc::MessengerBuilder("test-msgr").set_num_reactors(1).Build());
405+
auto proxy_cache = rpc::ProxyCache(messenger.get());
406+
bakup_proxy = std::make_unique<master::MasterBackupProxy>(
407+
&proxy_cache, mini_cluster()->mini_master()->bound_rpc_addr());
408+
admin_proxy = std::make_unique<master::MasterAdminProxy>(
409+
&proxy_cache, mini_cluster()->mini_master()->bound_rpc_addr());
410+
client_ =
411+
ASSERT_RESULT(client::YBClientBuilder()
412+
.add_master_server_addr(cluster_->mini_master()->bound_rpc_addr_str())
413+
.Build());
414+
}
415+
416+
Status CreateDatabaseWithSnapshotSchedule(
417+
master::YsqlColocationConfig colocated = master::YsqlColocationConfig::kNotColocated) {
418+
RETURN_NOT_OK(CreateDatabase(kNamespaceName, colocated));
419+
LOG(INFO) << "Database created.";
420+
schedule_id = VERIFY_RESULT(CreateSnapshotSchedule(
421+
bakup_proxy.get(), YQL_DATABASE_PGSQL, kNamespaceName, kInterval, kRetention, timeout));
422+
RETURN_NOT_OK(WaitScheduleSnapshot(bakup_proxy.get(), schedule_id, timeout));
423+
return Status::OK();
424+
}
425+
426+
protected:
427+
const std::string kNamespaceName = "testdb";
428+
const MonoDelta timeout = MonoDelta::FromSeconds(30);
429+
SnapshotScheduleId schedule_id = SnapshotScheduleId::Nil();
430+
std::unique_ptr<rpc::Messenger> messenger;
431+
std::unique_ptr<master::MasterBackupProxy> bakup_proxy;
432+
std::unique_ptr<master::MasterAdminProxy> admin_proxy;
433+
std::unique_ptr<client::YBClient> client_;
434+
};
400435

401436
INSTANTIATE_TEST_CASE_P(
402437
Colocation, MasterExportSnapshotTest,
@@ -412,18 +447,7 @@ INSTANTIATE_TEST_CASE_P(
412447
// 5. Generate snapshotInfo from schedule using the time t.
413448
// 6. Assert the output of 5 and 3 are the same.
414449
TEST_P(MasterExportSnapshotTest, ExportSnapshotAsOfTime) {
415-
const auto kNamespaceName = "testdb";
416-
ASSERT_OK(CreateDatabase(kNamespaceName, GetParam()));
417-
LOG(INFO) << "Database created.";
418-
auto messenger = ASSERT_RESULT(rpc::MessengerBuilder("test-msgr").set_num_reactors(1).Build());
419-
auto proxy_cache = rpc::ProxyCache(messenger.get());
420-
auto proxy =
421-
master::MasterBackupProxy(&proxy_cache, mini_cluster()->mini_master()->bound_rpc_addr());
422-
423-
const auto timeout = MonoDelta::FromSeconds(30);
424-
SnapshotScheduleId schedule_id = ASSERT_RESULT(CreateSnapshotSchedule(
425-
&proxy, YQL_DATABASE_PGSQL, kNamespaceName, kInterval, kRetention, timeout));
426-
ASSERT_OK(WaitScheduleSnapshot(&proxy, schedule_id, timeout));
450+
ASSERT_OK(CreateDatabaseWithSnapshotSchedule(GetParam()));
427451
auto conn = ASSERT_RESULT(ConnectToDB(kNamespaceName));
428452
// 1.
429453
LOG(INFO) << Format("Create tables t1,t2");
@@ -434,9 +458,10 @@ TEST_P(MasterExportSnapshotTest, ExportSnapshotAsOfTime) {
434458
LOG(INFO) << Format("current timestamp is: {$0}", time);
435459

436460
// 3.
437-
auto decoded_snapshot_id = ASSERT_RESULT(WaitNewSnapshot(&proxy, schedule_id));
438-
ASSERT_OK(WaitForSnapshotComplete(&proxy, decoded_snapshot_id));
439-
master::SnapshotInfoPB ground_truth = ASSERT_RESULT(ExportSnapshot(&proxy, decoded_snapshot_id));
461+
auto decoded_snapshot_id = ASSERT_RESULT(WaitNewSnapshot(bakup_proxy.get(), schedule_id));
462+
ASSERT_OK(WaitForSnapshotComplete(bakup_proxy.get(), decoded_snapshot_id));
463+
master::SnapshotInfoPB ground_truth =
464+
ASSERT_RESULT(ExportSnapshot(bakup_proxy.get(), decoded_snapshot_id));
440465
// 4.
441466
ASSERT_OK(conn.Execute("CREATE TABLE t3 (key INT PRIMARY KEY, c1 INT, c2 TEXT, c3 TEXT)"));
442467
ASSERT_OK(conn.Execute("ALTER TABLE t2 ADD COLUMN new_col TEXT"));
@@ -456,6 +481,45 @@ TEST_P(MasterExportSnapshotTest, ExportSnapshotAsOfTime) {
456481
messenger->Shutdown();
457482
}
458483

484+
// Test that export_snapshot_from_schedule as of time doesn't include hidden tables in
485+
// SnapshotInfoPB.
486+
TEST_P(MasterExportSnapshotTest, ExportSnapshotAsOfTimeWithHiddenTables) {
487+
ASSERT_OK(CreateDatabaseWithSnapshotSchedule(GetParam()));
488+
auto conn = ASSERT_RESULT(ConnectToDB(kNamespaceName));
489+
// 1. Create table t1, then delete it to mark it as hidden and then recreate table t1.
490+
LOG(INFO) << Format("Create tables t1");
491+
ASSERT_OK(conn.Execute("CREATE TABLE t1 (key INT PRIMARY KEY, value INT)"));
492+
ASSERT_OK(conn.Execute("DROP TABLE t1"));
493+
ASSERT_OK(conn.Execute("CREATE TABLE t1 (key INT PRIMARY KEY, value INT)"));
494+
495+
// 2. Mark time t and wait for a new snapshot to be created as part of the snapshot schedule.
496+
Timestamp time = ASSERT_RESULT(GetCurrentTime());
497+
LOG(INFO) << Format("current timestamp is: {$0}", time);
498+
499+
// 3. export_snapshot to generate the SnapshotInfoPB as of current time. It is the traditional
500+
// export_snapshot command (not the new command) to serve as ground truth.
501+
auto decoded_snapshot_id = ASSERT_RESULT(WaitNewSnapshot(bakup_proxy.get(), schedule_id));
502+
ASSERT_OK(WaitForSnapshotComplete(bakup_proxy.get(), decoded_snapshot_id));
503+
master::SnapshotInfoPB ground_truth =
504+
ASSERT_RESULT(ExportSnapshot(bakup_proxy.get(), decoded_snapshot_id));
505+
// 4. Create another table that shouldn't be included in generate snapshot as of time
506+
ASSERT_OK(conn.Execute("CREATE TABLE t2 (key INT PRIMARY KEY, c1 TEXT, c2 TEXT)"));
507+
// 5. Generate snapshotInfo from schedule using the time t.
508+
LOG(INFO) << Format(
509+
"Exporting snapshot from snapshot schedule: $0, Hybrid time = $1", schedule_id, time);
510+
auto deadline = CoarseMonoClock::Now() + timeout;
511+
master::SnapshotInfoPB snapshot_info_as_of_time = ASSERT_RESULT(
512+
mini_cluster()->mini_master()->catalog_manager_impl().GenerateSnapshotInfoFromSchedule(
513+
schedule_id, HybridTime::FromMicros(static_cast<uint64>(time.ToInt64())), deadline));
514+
// 6. Assert the output of 5 and 3 are the same.
515+
LOG(INFO) << Format("SnapshotInfoPB ground_truth: $0", ground_truth.ShortDebugString());
516+
LOG(INFO) << Format(
517+
"SnapshotInfoPB as of time=$0 :$1", time, snapshot_info_as_of_time.ShortDebugString());
518+
ASSERT_TRUE(pb_util::ArePBsEqual(
519+
std::move(ground_truth), std::move(snapshot_info_as_of_time), /* diff_str */ nullptr));
520+
messenger->Shutdown();
521+
}
522+
459523
class PgCloneTest : public PostgresMiniClusterTest {
460524
protected:
461525
void SetUp() override {

src/yb/master/catalog_manager_ext.cc

+7-2
Original file line numberDiff line numberDiff line change
@@ -1426,7 +1426,8 @@ Result<SnapshotInfoPB> CatalogManager::GenerateSnapshotInfoPbAsOfTime(
14261426
}
14271427
return Status::OK();
14281428
}));
1429-
// Pass 2: Get all the SysTablesEntry of the database that are in running state as of read_time.
1429+
// Pass 2: Get all the SysTablesEntry of the database that are in running state and not Hidden as
1430+
// of read_time.
14301431
// Stores SysTablesEntry and its SysTabletsEntries to order the tablets of each table by
14311432
// partitions' start keys.
14321433
std::map<TableId, TableWithTabletsEntries> tables_to_tablets;
@@ -1440,6 +1441,7 @@ Result<SnapshotInfoPB> CatalogManager::GenerateSnapshotInfoPbAsOfTime(
14401441
const Slice& id, const Slice& data) -> Status {
14411442
auto pb = VERIFY_RESULT(pb_util::ParseFromSlice<SysTablesEntryPB>(data));
14421443
if (pb.namespace_id() == namespace_id && pb.state() == SysTablesEntryPB::RUNNING &&
1444+
pb.hide_state() == SysTablesEntryPB_HideState_VISIBLE &&
14431445
!pb.schema().table_properties().is_ysql_catalog_table()) {
14441446
VLOG_WITH_FUNC(1) << "Found SysTablesEntryPB: " << pb.ShortDebugString();
14451447
if (IsColocatedDbParentTableId(id.ToBuffer()) ||
@@ -1461,7 +1463,10 @@ Result<SnapshotInfoPB> CatalogManager::GenerateSnapshotInfoPbAsOfTime(
14611463
&tablets_iter, doc_read_cntxt.schema(), SysRowEntryType::TABLET,
14621464
[namespace_id, &tables_to_tablets](const Slice& id, const Slice& data) -> Status {
14631465
auto pb = VERIFY_RESULT(pb_util::ParseFromSlice<SysTabletsEntryPB>(data));
1464-
if (tables_to_tablets.contains(pb.table_id()) && pb.state() == SysTabletsEntryPB::RUNNING) {
1466+
// TODO(Yamen): handle tablet splitting cases by either keeping the parent or the children
1467+
// according to their state.
1468+
if (tables_to_tablets.contains(pb.table_id()) && pb.state() == SysTabletsEntryPB::RUNNING &&
1469+
pb.hide_hybrid_time() == 0) {
14651470
VLOG_WITH_FUNC(1) << "Found SysTabletsEntryPB: " << pb.ShortDebugString();
14661471
tables_to_tablets[pb.table_id()].tablets_entries.push_back(
14671472
std::make_pair(id.ToBuffer(), pb));

0 commit comments

Comments
 (0)