Skip to content

Commit b7a0458

Browse files
author
Tanuj Nayak
committed
[#26629] YSQL: Push down ef_search parameter to ybhnsw
Summary: The GUC `hnsw.ef_search` in pgvector controls the expansion factor for HNSW searches, where higher values improve accuracy at the cost of increased latency. This change introduces a corresponding GUC, `ybhnsw.ef_search`, and propagates it to DocDB. Additional work is required in DocDB to utilize the propagated value. **Upgrade/Rollback safety:** This change adds new fields to `PgVectorReadOptionsPB` in common.proto that are unused in DocDB code as of this change. This does not suggest any upgrade/rollback issues. Test Plan: Jenkins Manual testing by observing the docdb requests of ybhnsw queries to make sure the correct ef_search parameter is passed down Reviewers: kramanathan Reviewed By: kramanathan Subscribers: yql Differential Revision: https://phorge.dev.yugabyte.com/D42934
1 parent 1047ea0 commit b7a0458

File tree

10 files changed

+78
-7
lines changed

10 files changed

+78
-7
lines changed

src/postgres/third-party-extensions/pgvector/src/ybvector/ybhnsw.c

+41-7
Original file line numberDiff line numberDiff line change
@@ -42,18 +42,25 @@
4242
#define YBHNSW_MIN_EF_CONSTRUCTION 50
4343
#define YBHNSW_MAX_EF_CONSTRUCTION 1000
4444

45+
/* Imported from pgvector defaults as of v0.8.0. */
46+
#define YBHNSW_DEFAULT_EF_SEARCH 40
47+
#define YBHNSW_MIN_EF_SEARCH 1
48+
#define YBHNSW_MAX_EF_SEARCH 1000
49+
4550
static relopt_kind ybhnsw_relopt_kind;
4651

52+
int ybhnsw_ef_search;
53+
4754
/*
4855
* Copied from pgvector's HnswInit (as of pgvector v0.8.0).
4956
*/
50-
typedef struct YbHnswOptions
57+
typedef struct YbHnswCreateOptions
5158
{
5259
int32 vl_len_; /* varlena header (do not touch directly!) */
5360
int m; /* number of connections per node */
5461
int m0; /* number of connections per node in base level */
5562
int ef_construction; /* size of dynamic candidate list */
56-
} YbHnswOptions;
63+
} YbHnswCreateOptions;
5764

5865
void
5966
YbHnswInit(void)
@@ -71,6 +78,11 @@ YbHnswInit(void)
7178
YBHNSW_DEFAULT_EF_CONSTRUCTION,
7279
YBHNSW_MIN_EF_CONSTRUCTION, YBHNSW_MAX_EF_CONSTRUCTION,
7380
AccessExclusiveLock);
81+
82+
DefineCustomIntVariable("ybhnsw.ef_search", "Sets the size of the dynamic candidate list for search",
83+
"Valid range is 1..1000.", &ybhnsw_ef_search,
84+
YBHNSW_DEFAULT_EF_SEARCH, YBHNSW_MIN_EF_SEARCH, YBHNSW_MAX_EF_SEARCH, PGC_USERSET, 0, NULL, NULL, NULL);
85+
MarkGUCPrefixReserved("ybhnsw");
7486
}
7587

7688
/*
@@ -83,22 +95,22 @@ ybhnswoptions(Datum reloptions, bool validate)
8395
* Copied from pgvector's hnswoptions (as of pgvector v0.8.0).
8496
*/
8597
static const relopt_parse_elt tab[] = {
86-
{"m", RELOPT_TYPE_INT, offsetof(YbHnswOptions, m)},
87-
{"m0", RELOPT_TYPE_INT, offsetof(YbHnswOptions, m0)},
98+
{"m", RELOPT_TYPE_INT, offsetof(YbHnswCreateOptions, m)},
99+
{"m0", RELOPT_TYPE_INT, offsetof(YbHnswCreateOptions, m0)},
88100
{"ef_construction", RELOPT_TYPE_INT,
89-
offsetof(YbHnswOptions, ef_construction)},
101+
offsetof(YbHnswCreateOptions, ef_construction)},
90102
};
91103

92104
return (bytea *) build_reloptions(reloptions, validate,
93105
ybhnsw_relopt_kind,
94-
sizeof(YbHnswOptions),
106+
sizeof(YbHnswCreateOptions),
95107
tab, lengthof(tab));
96108
}
97109

98110
static void
99111
bindYbHnswIndexOptions(YbcPgStatement handle, Datum reloptions)
100112
{
101-
YbHnswOptions *hnsw_options = (YbHnswOptions *) ybhnswoptions(reloptions, false);
113+
YbHnswCreateOptions *hnsw_options = (YbHnswCreateOptions *) ybhnswoptions(reloptions, false);
102114
int m = YBHNSW_DEFAULT_M;
103115
int m0 = YBHNSW_DEFAULT_M;
104116
int ef_construction = YBHNSW_DEFAULT_EF_CONSTRUCTION;
@@ -149,6 +161,26 @@ ybhnswbindcolumnschema(YbcPgStatement handle,
149161
YBCBindCreateIndexColumns(handle, indexInfo, indexTupleDesc, coloptions, 0);
150162
}
151163

164+
static void
165+
ybBindHnswReadOptions(YbScanDesc yb_scan)
166+
{
167+
YBCPgDmlHnswSetReadOptions(yb_scan->handle, ybhnsw_ef_search);
168+
}
169+
170+
/*
171+
* ybvectorrescan
172+
* Reset temporary structures to prepare for rescan.
173+
*/
174+
void
175+
ybhnswrescan(IndexScanDesc scan, ScanKey scankeys, int nscankeys,
176+
ScanKey orderbys, int norderbys)
177+
{
178+
ybvectorrescan(scan, scankeys, nscankeys, orderbys, norderbys);
179+
YbVectorScanOpaque so = (YbVectorScanOpaque) scan->opaque;
180+
YbScanDesc ybscan = so->yb_scan_desc;
181+
ybBindHnswReadOptions(ybscan);
182+
}
183+
152184

153185
/*
154186
* ybusearchhandler handler function: return
@@ -164,5 +196,7 @@ ybhnswhandler(PG_FUNCTION_ARGS)
164196
amroutine->yb_ambindschema = ybhnswbindcolumnschema;
165197
amroutine->amoptions = ybhnswoptions;
166198

199+
amroutine->amrescan = ybhnswrescan;
200+
167201
PG_RETURN_POINTER(amroutine);
168202
}

src/yb/common/common.proto

+7
Original file line numberDiff line numberDiff line change
@@ -150,9 +150,16 @@ message PgVectorIdxOptionsPB {
150150
optional PgHnswIndexOptionsPB hnsw = 7;
151151
}
152152

153+
message PgHnswReadOptionsPB {
154+
required uint32 ef_search = 1;
155+
}
156+
153157
message PgVectorReadOptionsPB {
154158
required QLValuePB vector = 1;
155159
required int32 prefetch_size = 2;
160+
oneof index_params {
161+
PgHnswReadOptionsPB hnsw_options = 3;
162+
}
156163
}
157164

158165
message TablePropertiesPB {

src/yb/yql/pggate/pg_dml.cc

+8
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,14 @@ Status PgDml::ANNSetPrefetchSize(int32_t prefetch_size) {
251251
return down_cast<PgDmlRead*>(this)->ANNSetPrefetchSize(prefetch_size);
252252
}
253253

254+
Status PgDml::HnswSetReadOptions(int ef_search) {
255+
if (auto* secondary_index = SecondaryIndexQuery(); secondary_index) {
256+
return secondary_index->HnswSetReadOptions(ef_search);
257+
}
258+
259+
return down_cast<PgDmlRead*>(this)->HnswSetReadOptions(ef_search);
260+
}
261+
254262
Status PgDml::BindTable() {
255263
bind_table_ = true;
256264
return Status::OK();

src/yb/yql/pggate/pg_dml.h

+3
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,9 @@ class PgDml : public PgStatement {
7373
// Bind prefetch size to the current vector index search.
7474
Status ANNSetPrefetchSize(int32_t prefetch_size);
7575

76+
// Set HNSW read options.
77+
Status HnswSetReadOptions(int ef_search);
78+
7679
// Bind the whole table.
7780
Status BindTable();
7881

src/yb/yql/pggate/pg_dml_read.cc

+5
Original file line numberDiff line numberDiff line change
@@ -431,6 +431,11 @@ Status PgDmlRead::ANNSetPrefetchSize(int32_t prefetch_size) {
431431
return Status::OK();
432432
}
433433

434+
Status PgDmlRead::HnswSetReadOptions(int ef_search) {
435+
read_req_->mutable_vector_idx_options()->mutable_hnsw_options()->set_ef_search(ef_search);
436+
return Status::OK();
437+
}
438+
434439
Status PgDmlRead::Exec(const YbcPgExecParameters* exec_params) {
435440
RSTATUS_DCHECK(
436441
!pg_exec_params_ || pg_exec_params_ == exec_params,

src/yb/yql/pggate/pg_dml_read.h

+2
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,8 @@ class PgDmlRead : public PgDml {
113113
Status ANNBindVector(PgExpr* vector);
114114
Status ANNSetPrefetchSize(int32_t prefetch_size);
115115

116+
Status HnswSetReadOptions(int ef_search);
117+
116118
void SetCatalogCacheVersion(std::optional<PgOid> db_oid, uint64_t version) override {
117119
DoSetCatalogCacheVersion(read_req_.get(), db_oid, version);
118120
}

src/yb/yql/pggate/pggate.cc

+4
Original file line numberDiff line numberDiff line change
@@ -1565,6 +1565,10 @@ Status PgApiImpl::DmlANNSetPrefetchSize(PgStatement* handle, int prefetch_size)
15651565
return VERIFY_RESULT_REF(GetStatementAs<PgDml>(handle)).ANNSetPrefetchSize(prefetch_size);
15661566
}
15671567

1568+
Status PgApiImpl::DmlHnswSetReadOptions(PgStatement* handle, int ef_search) {
1569+
return VERIFY_RESULT_REF(GetStatementAs<PgDml>(handle)).HnswSetReadOptions(ef_search);
1570+
}
1571+
15681572
Status PgApiImpl::ExecSelect(PgStatement* handle, const YbcPgExecParameters* exec_params) {
15691573
auto& select = VERIFY_RESULT_REF(GetStatementAs<PgSelect>(handle));
15701574
if (pg_sys_table_prefetcher_ && select.IsReadFromYsqlCatalog() && select.read_req()) {

src/yb/yql/pggate/pggate.h

+2
Original file line numberDiff line numberDiff line change
@@ -607,6 +607,8 @@ class PgApiImpl {
607607

608608
Status DmlANNSetPrefetchSize(PgStatement *handle, int prefetch_size);
609609

610+
Status DmlHnswSetReadOptions(PgStatement *handle, int ef_search);
611+
610612

611613
//------------------------------------------------------------------------------------------------
612614
// Functions.

src/yb/yql/pggate/ybc_pggate.cc

+4
Original file line numberDiff line numberDiff line change
@@ -1514,6 +1514,10 @@ YbcStatus YBCPgDmlANNSetPrefetchSize(YbcPgStatement handle, int prefetch_size) {
15141514
return ToYBCStatus(pgapi->DmlANNSetPrefetchSize(handle, prefetch_size));
15151515
}
15161516

1517+
YbcStatus YBCPgDmlHnswSetReadOptions(YbcPgStatement handle, int ef_search) {
1518+
return ToYBCStatus(pgapi->DmlHnswSetReadOptions(handle, ef_search));
1519+
}
1520+
15171521
YbcStatus YBCPgDmlFetch(YbcPgStatement handle, int32_t natts, uint64_t *values, bool *isnulls,
15181522
YbcPgSysColumns *syscols, bool *has_data) {
15191523
return ToYBCStatus(pgapi->DmlFetch(handle, natts, values, isnulls, syscols, has_data));

src/yb/yql/pggate/ybc_pggate.h

+2
Original file line numberDiff line numberDiff line change
@@ -565,6 +565,8 @@ YbcStatus YBCPgDmlANNBindVector(YbcPgStatement handle, YbcPgExpr vector);
565565

566566
YbcStatus YBCPgDmlANNSetPrefetchSize(YbcPgStatement handle, int prefetch_size);
567567

568+
YbcStatus YBCPgDmlHnswSetReadOptions(YbcPgStatement handle, int ef_search);
569+
568570
// This function is to fetch the targets in YBCPgDmlAppendTarget() from the rows that were defined
569571
// by YBCPgDmlBindColumn().
570572
YbcStatus YBCPgDmlFetch(YbcPgStatement handle, int32_t natts, uint64_t *values, bool *isnulls,

0 commit comments

Comments
 (0)