Skip to content

Commit 50f7f51

Browse files
committed
[#26366] YSQL, DocDB: Enable block-based sampling for non-colocated tables by default
Summary: Enables block-based sampling for non-colocated tables by default. Also fixed `QLRocksDBStorage::GetSampleBlocks/PutAdjustedSampleBlockBounds` empty blocks handling. Jira: DB-15719 Test Plan: Perf test on internal cluster Reviewers: amartsinchyk Reviewed By: amartsinchyk Subscribers: yql, ybase Tags: #jenkins-ready Differential Revision: https://phorge.dev.yugabyte.com/D42863
1 parent 0d7d18a commit 50f7f51

File tree

5 files changed

+16
-36
lines changed

5 files changed

+16
-36
lines changed

src/postgres/src/backend/utils/misc/guc.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -3235,7 +3235,7 @@ static struct config_bool ConfigureNamesBool[] =
32353235
GUC_NOT_IN_SAMPLE
32363236
},
32373237
&yb_allow_separate_requests_for_sampling_stages,
3238-
false,
3238+
true,
32393239
NULL, NULL, NULL
32403240
},
32413241

src/yb/docdb/ql_rocksdb_storage.cc

+2-1
Original file line numberDiff line numberDiff line change
@@ -645,7 +645,8 @@ bool PutAdjustedSampleBlockBounds(
645645
auto adjusted_sample_block_bounds = std::make_pair(
646646
KeyBuffer(sample_block.first.empty() ? partition_lower_bound_key : sample_block.first),
647647
KeyBuffer(sample_block.second.empty() ? partition_upper_bound_key : sample_block.second));
648-
if (adjusted_sample_block_bounds.first == adjusted_sample_block_bounds.second) {
648+
if (adjusted_sample_block_bounds.first == adjusted_sample_block_bounds.second &&
649+
!adjusted_sample_block_bounds.first.empty()) {
649650
// Don't put empty sample block
650651
return false;
651652
}

src/yb/yql/pggate/pg_sample.cc

+10-27
Original file line numberDiff line numberDiff line change
@@ -70,8 +70,6 @@ class PgDocSampleOp : public PgDocReadOp {
7070
uint64_t num_blocks_collected;
7171
double num_rows_processed;
7272
int32 num_rows_collected;
73-
// TODO(analyze_sampling): https://github.com/yugabyte/yugabyte-db/issues/26366: Remove this:
74-
double DEPRECATED_estimated_total_rows;
7573
};
7674

7775
PgDocSampleOp(const PgSession::ScopedRefPtr& pg_session, PgTable* table, PgsqlReadOpPtr read_op)
@@ -163,10 +161,6 @@ class PgDocSampleOp : public PgDocReadOp {
163161
.num_blocks_collected = sampling_state->num_blocks_collected(),
164162
.num_rows_processed = sampling_state->samplerows(),
165163
.num_rows_collected = sampling_state->numrows(),
166-
// TODO(analyze_sampling): https://github.com/yugabyte/yugabyte-db/issues/26366: Remove this:
167-
.DEPRECATED_estimated_total_rows = sampling_state->has_deprecated_estimated_total_rows()
168-
? sampling_state->deprecated_estimated_total_rows()
169-
: sampling_state->samplerows()
170164
};
171165

172166
RETURN_NOT_OK(PgDocReadOp::CompleteProcessResponse());
@@ -611,9 +605,7 @@ class SampleRowsPicker : public SamplePickerBase, public SampleRowsPickerIf {
611605
}
612606

613607
double GetEstimatedRowCount() const override {
614-
// TODO(analyze_sampling): https://github.com/yugabyte/yugabyte-db/issues/26366: Simplify the
615-
// following code to return GetSampleOp().GetSamplingStats().num_rows_processed.
616-
return GetSampleOp().GetSamplingStats().DEPRECATED_estimated_total_rows;
608+
return GetSampleOp().GetSamplingStats().num_rows_processed;
617609
}
618610

619611
Status SetSampleBlocksBounds(std::vector<std::pair<KeyBuffer, KeyBuffer>>&& sample_blocks) {
@@ -686,6 +678,9 @@ class TwoStageSampleRowsPicker : public SampleRowsPickerIf {
686678

687679
double GetEstimatedRowCount() const override {
688680
const auto num_rows_processed = sample_rows_picker_->GetNumRowsProcessed();
681+
VLOG_WITH_FUNC(1) << "num_rows_processed: " << num_rows_processed
682+
<< " num_blocks_collected_: " << num_blocks_collected_
683+
<< " num_blocks_processed_: " << num_blocks_processed_;
689684
return num_blocks_collected_ >= num_blocks_processed_
690685
? num_rows_processed
691686
: 1.0 * num_rows_processed * num_blocks_processed_ / num_blocks_collected_;
@@ -743,34 +738,22 @@ Status PgSample::Prepare(
743738
target_ = PgTable(VERIFY_RESULT(pg_session_->LoadTable(table_id)));
744739
bind_ = PgTable(nullptr);
745740

746-
const auto allow_separate_requests_for_sampling_stages =
747-
yb_allow_separate_requests_for_sampling_stages;
748-
749-
// TODO(analyze_sampling): https://github.com/yugabyte/yugabyte-db/issues/26366:
750-
// Simplify the following code to fallback to YsqlSamplingAlgorithm::FULL_TABLE_SCAN when
751-
// yb_allow_separate_requests_for_sampling_stages is false.
752741
YsqlSamplingAlgorithm ysql_sampling_algorithm;
753-
if (yb_allow_block_based_sampling_algorithm &&
754-
(target_->IsColocated() || allow_separate_requests_for_sampling_stages)) {
742+
if (yb_allow_separate_requests_for_sampling_stages) {
755743
ysql_sampling_algorithm = YsqlSamplingAlgorithm(yb_sampling_algorithm);
756744
} else {
745+
// Older versions might not have ExecuteSampleBlockBased implementation which runs block-based
746+
// sampling stages separately. For both backward compatibility and simplicity, in this case we
747+
// fallback to full table scan for ANALYZE. This only affects performance of colocated tables
748+
// ANALYZE queries launched during upgrade process.
757749
ysql_sampling_algorithm = YsqlSamplingAlgorithm::FULL_TABLE_SCAN;
758750
}
759751

760-
if (ysql_sampling_algorithm == YsqlSamplingAlgorithm::BLOCK_BASED_SAMPLING &&
761-
allow_separate_requests_for_sampling_stages) {
752+
if (ysql_sampling_algorithm == YsqlSamplingAlgorithm::BLOCK_BASED_SAMPLING) {
762753
sample_rows_picker_ = VERIFY_RESULT(TwoStageSampleRowsPicker::Make(
763754
pg_session_, table_id, is_region_local, targrows, rand_state, read_time,
764755
ysql_sampling_algorithm));
765756
} else {
766-
// Old versions might not have ExecuteSampleBlockBased implementation which runs stages
767-
// separately. So for backward compatibility, if yb_allow_separate_requests_for_sampling_stages
768-
// is false, even for YsqlSamplingAlgorithm::BLOCK_BASED_SAMPLING we use SampleRowsPicker class
769-
// which expects tserver to run two-stages-in-single-run
770-
// DEPRECATED_ExecuteSampleBlockBasedColocated function.
771-
// New versions fall back to DEPRECATED_ExecuteSampleBlockBasedColocated when
772-
// PgsqlSamplingStatePB::is_blocks_sampling_stage is false and PgsqlReadRequestPB::sample_blocks
773-
// is empty.
774757
sample_rows_picker_ = VERIFY_RESULT(SampleRowsPicker::Make(
775758
pg_session_, table_id, is_region_local, targrows, rand_state, read_time,
776759
ysql_sampling_algorithm));

src/yb/yql/pggate/util/ybc_guc.cc

+1-3
Original file line numberDiff line numberDiff line change
@@ -93,9 +93,7 @@ int yb_read_after_commit_visibility = 0;
9393

9494
bool yb_allow_block_based_sampling_algorithm = true;
9595

96-
// TODO(analyze_sampling): https://github.com/yugabyte/yugabyte-db/issues/26366:
97-
// Switch to true here and inside src/postgres/src/backend/utils/misc/guc.c.
98-
bool yb_allow_separate_requests_for_sampling_stages = false;
96+
bool yb_allow_separate_requests_for_sampling_stages = true;
9997

10098
// TODO(#24089): Once code duplication between yb_guc and ybc_util is removed, we should be able
10199
// to use YB_SAMPLING_ALGORITHM_BLOCK_BASED_SAMPLING instead of 1 and do it in one place.

src/yb/yql/pgwrapper/pg_wrapper.cc

+2-4
Original file line numberDiff line numberDiff line change
@@ -282,10 +282,8 @@ DEFINE_RUNTIME_AUTO_PG_FLAG(bool, yb_enable_replica_identity, kLocalPersisted, f
282282
DEFINE_RUNTIME_AUTO_PG_FLAG(bool, yb_allow_block_based_sampling_algorithm,
283283
kLocalVolatile, false, true, "Allow YsqlSamplingAlgorithm::BLOCK_BASED_SAMPLING");
284284

285-
// TODO(analyze_sampling): https://github.com/yugabyte/yugabyte-db/issues/26366:
286-
// Convert to auto flag with target value set to true after block-based ANALYZE with concurrent
287-
// dynamic tablet splitting is fully supported and tested.
288-
DEFINE_RUNTIME_PG_FLAG(bool, yb_allow_separate_requests_for_sampling_stages, false,
285+
DEFINE_RUNTIME_AUTO_PG_FLAG(
286+
bool, yb_allow_separate_requests_for_sampling_stages, kLocalVolatile, false, true,
289287
"Allow using separate requests for block-based sampling stages");
290288

291289
DEFINE_RUNTIME_PG_FLAG(

0 commit comments

Comments
 (0)