Skip to content

Commit b29d578

Browse files
antekresicfabriziomellomkindahl
committed
Avoid unnecessary scheduler restarts
When a user creates a database based on a template a FATAL error will be generated, which causes the scheduler to restart. The same happens if a FATAL error is generate in any other way, e.g., if the database does not exist. If you use an actual template database (like template0) this can cause endless errors and retries wasting bgw slots. Hence, we avoid starting the bgw scheduler for databases that does not exist and databases we cannot connect to. In addition to when creating a new database, a re-starting scheduler might generate errors if the database has gone away or if permissions for the database changed. This will trigger an endlessly restarting scheduler. We fix this issue by overriding connection block when connecting to the database and perform the check ourselves to avoid a fatal error, and capturing any non-fatal error that can be generated in the scheduler startup phase and instead report the error ourselves and exiting with exit code 0 (which means the scheduler will not restart). If an error occurs inside the main scheduler process, this will still trigger a restart. Co-authored-by: Fabrízio de Royes Mello <[email protected]> Co-authored-by: Mats Kindahl <[email protected]>
1 parent a650909 commit b29d578

File tree

3 files changed

+117
-40
lines changed

3 files changed

+117
-40
lines changed

.unreleased/pr_7834

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fixes: #7834 Avoid unnecessary scheduler restarts

src/loader/bgw_launcher.c

+81-39
Original file line numberDiff line numberDiff line change
@@ -899,7 +899,7 @@ ts_bgw_cluster_launcher_main(PG_FUNCTION_ARGS)
899899
* Modelled on autovacuum.c -> do_autovacuum.
900900
*/
901901
static void
902-
database_is_template_check(void)
902+
database_checks(void)
903903
{
904904
Form_pg_database pgdb;
905905
HeapTuple tuple;
@@ -911,9 +911,17 @@ database_is_template_check(void)
911911
"syscache")));
912912

913913
pgdb = (Form_pg_database) GETSTRUCT(tuple);
914+
915+
if (!pgdb->datallowconn)
916+
ereport(ERROR,
917+
(errmsg("background worker \"%s\" trying to connect to database that does not "
918+
"allow connections, exiting",
919+
MyBgworkerEntry->bgw_name)));
920+
914921
if (pgdb->datistemplate)
915922
ereport(ERROR,
916-
(errmsg("TimescaleDB background worker connected to template database, exiting")));
923+
(errmsg("background worker \"%s\" trying to connect to template database, exiting",
924+
MyBgworkerEntry->bgw_name)));
917925

918926
ReleaseSysCache(tuple);
919927
}
@@ -951,6 +959,63 @@ process_settings(Oid databaseid)
951959
table_close(relsetting, AccessShareLock);
952960
}
953961

962+
/*
963+
* Get the versioned scheduler for the database.
964+
*
965+
* This captures any errors generated while fetching information and print
966+
* them out, but does not propagate the error further since that might trigger
967+
* a restart.
968+
*/
969+
static PGFunction
970+
get_versioned_scheduler()
971+
{
972+
PG_TRY();
973+
{
974+
bool ts_installed = false;
975+
char version[MAX_VERSION_LEN];
976+
977+
/*
978+
* now we can start our transaction and get the version currently
979+
* installed
980+
*/
981+
StartTransactionCommand();
982+
(void) GetTransactionSnapshot();
983+
984+
/*
985+
* Check whether a database is a template database and raise an error if
986+
* so, as we don't want to run in template dbs.
987+
*/
988+
database_checks();
989+
/* Process any config changes caused by an ALTER DATABASE */
990+
process_settings(MyDatabaseId);
991+
ts_installed = ts_loader_extension_exists();
992+
if (ts_installed)
993+
strlcpy(version, ts_loader_extension_version(), MAX_VERSION_LEN);
994+
995+
ts_loader_extension_check();
996+
CommitTransactionCommand();
997+
if (ts_installed)
998+
{
999+
char soname[MAX_SO_NAME_LEN];
1000+
snprintf(soname, MAX_SO_NAME_LEN, "%s-%s", EXTENSION_SO, version);
1001+
PGFunction versioned_scheduler_main =
1002+
load_external_function(soname, BGW_DB_SCHEDULER_FUNCNAME, false, NULL);
1003+
if (versioned_scheduler_main == NULL)
1004+
ereport(ERROR,
1005+
(errmsg("TimescaleDB version %s does not have a background worker, exiting",
1006+
soname)));
1007+
return versioned_scheduler_main;
1008+
}
1009+
}
1010+
PG_CATCH();
1011+
{
1012+
EmitErrorReport();
1013+
FlushErrorState();
1014+
}
1015+
PG_END_TRY();
1016+
return NULL;
1017+
}
1018+
9541019
/*
9551020
* This can be run either from the cluster launcher at db_startup time, or
9561021
* in the case of an install/uninstall/update of the extension, in the
@@ -964,14 +1029,18 @@ extern Datum
9641029
ts_bgw_db_scheduler_entrypoint(PG_FUNCTION_ARGS)
9651030
{
9661031
Oid db_id = DatumGetObjectId(MyBgworkerEntry->bgw_main_arg);
967-
bool ts_installed = false;
968-
char version[MAX_VERSION_LEN];
9691032
VirtualTransactionId vxid;
9701033

9711034
pqsignal(SIGINT, StatementCancelHandler);
9721035
pqsignal(SIGTERM, die);
9731036
BackgroundWorkerUnblockSignals();
974-
BackgroundWorkerInitializeConnectionByOid(db_id, InvalidOid, 0);
1037+
1038+
/*
1039+
* Connecting to a database that does not allow connections will generate
1040+
* a FATAL error, which might trigger restarts, so we override this check
1041+
* and do it ourselves.
1042+
*/
1043+
BackgroundWorkerInitializeConnectionByOid(db_id, InvalidOid, BGWORKER_BYPASS_ALLOWCONN);
9751044
pgstat_report_appname(MyBgworkerEntry->bgw_name);
9761045

9771046
/*
@@ -988,40 +1057,13 @@ ts_bgw_db_scheduler_entrypoint(PG_FUNCTION_ARGS)
9881057
CommitTransactionCommand();
9891058

9901059
/*
991-
* now we can start our transaction and get the version currently
992-
* installed
993-
*/
994-
StartTransactionCommand();
995-
(void) GetTransactionSnapshot();
996-
997-
/*
998-
* Check whether a database is a template database and raise an error if
999-
* so, as we don't want to run in template dbs.
1060+
* Essentially we morph into the versioned worker here, if there is one.
1061+
*
1062+
* If an error is generated here, we should trigger a restart (if the
1063+
* scheduler is configured for that).
10001064
*/
1001-
database_is_template_check();
1002-
/* Process any config changes caused by an ALTER DATABASE */
1003-
process_settings(MyDatabaseId);
1004-
ts_installed = ts_loader_extension_exists();
1005-
if (ts_installed)
1006-
strlcpy(version, ts_loader_extension_version(), MAX_VERSION_LEN);
1007-
1008-
ts_loader_extension_check();
1009-
CommitTransactionCommand();
1010-
if (ts_installed)
1011-
{
1012-
char soname[MAX_SO_NAME_LEN];
1013-
PGFunction versioned_scheduler_main;
1014-
1015-
snprintf(soname, MAX_SO_NAME_LEN, "%s-%s", EXTENSION_SO, version);
1016-
versioned_scheduler_main =
1017-
load_external_function(soname, BGW_DB_SCHEDULER_FUNCNAME, false, NULL);
1018-
if (versioned_scheduler_main == NULL)
1019-
ereport(LOG,
1020-
(errmsg("TimescaleDB version %s does not have a background worker, exiting",
1021-
soname)));
1022-
else /* essentially we morph into the versioned
1023-
* worker here */
1024-
DirectFunctionCall1(versioned_scheduler_main, ObjectIdGetDatum(InvalidOid));
1025-
}
1065+
PGFunction versioned_scheduler_main = get_versioned_scheduler();
1066+
if (versioned_scheduler_main)
1067+
DirectFunctionCall1(versioned_scheduler_main, ObjectIdGetDatum(InvalidOid));
10261068
PG_RETURN_VOID();
10271069
}

src/loader/loader.c

+35-1
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#include <access/heapam.h>
99
#include <access/parallel.h>
1010
#include <access/xact.h>
11+
#include <catalog/pg_database.h>
1112
#include <commands/dbcommands.h>
1213
#include <commands/defrem.h>
1314
#include <commands/user.h>
@@ -390,6 +391,39 @@ stop_workers_on_db_drop(DropdbStmt *drop_db_statement)
390391
}
391392
}
392393

394+
static bool
395+
database_allowconn(const Oid db_oid)
396+
{
397+
Relation pg_database;
398+
ScanKeyData entry[1];
399+
SysScanDesc scan;
400+
HeapTuple dbtuple;
401+
bool allowconn = false;
402+
403+
pg_database = table_open(DatabaseRelationId, AccessShareLock);
404+
ScanKeyInit(&entry[0],
405+
Anum_pg_database_oid,
406+
BTEqualStrategyNumber,
407+
F_OIDEQ,
408+
ObjectIdGetDatum(db_oid));
409+
scan = systable_beginscan(pg_database, DatabaseOidIndexId, true, NULL, 1, entry);
410+
411+
dbtuple = systable_getnext(scan);
412+
413+
/* We assume that there can be at most one matching tuple */
414+
if (!HeapTupleIsValid(dbtuple))
415+
ereport(ERROR,
416+
(errcode(ERRCODE_UNDEFINED_DATABASE),
417+
errmsg("database with OID \"%u\" does not exist", db_oid)));
418+
419+
allowconn = ((Form_pg_database) GETSTRUCT(dbtuple))->datallowconn;
420+
421+
systable_endscan(scan);
422+
table_close(pg_database, AccessShareLock);
423+
424+
return allowconn;
425+
}
426+
393427
static void
394428
post_analyze_hook(ParseState *pstate, Query *query, JumbleState *jstate)
395429
{
@@ -440,7 +474,7 @@ post_analyze_hook(ParseState *pstate, Query *query, JumbleState *jstate)
440474
{
441475
Oid db_oid = get_database_oid(defGetString(option), false);
442476

443-
if (OidIsValid(db_oid))
477+
if (OidIsValid(db_oid) && database_allowconn(db_oid))
444478
ts_bgw_message_send_and_wait(RESTART, db_oid);
445479
}
446480
}

0 commit comments

Comments
 (0)