Skip to content

Commit 10f18ec

Browse files
committed
[#26648] YSQL: Recommend read committed isolation
Summary: ### Motivation Consider the following scenario Setup: ``` CREATE TABLE tokens(token INT); INSERT INTO tokens SELECT i FROM GENERATE_SERIES(1, 100) i; ``` The following txn faces a read restart error when there is a concurrent insert: ``` BEGIN ISOLATION LEVEL READ COMMITTED; SELECT token FROM tokens LIMIT 1; SELECT token FROM tokens ORDER BY token; <--- read restart error here. ROLLBACK; ``` Concurrent insert: ``` INSERT INTO tokens SELECT i FROM GENERATE_SERIES(200, 300) i; ``` Error: ``` 2025-04-03 15:37:39.463 UTC [321440] ERROR: Restart read required at: { read: { days: 20181 time: 15:37:39.460582 } local_limit: { days: 20181 time: 15:37:39.460582 } global_limit: <min> in_txn_limit: <max> serial_no: 0 } (query layer retry isn't possible because data was already sent, if this is the read committed isolation (or) the first statement in repeatable read/ serializable isolation transaction, consider increasing the tserver gflag ysql_output_buffer_size) 2025-04-03 15:37:39.463 UTC [321440] STATEMENT: SELECT token FROM tokens ORDER BY token ``` The error recommends increasing ysql_output_buffer_size if this is the first statement of a repeatable read txn. This is not the first statement of the txn. However, thats difficult to read. Users often miss the condition and increase the buffer size. Moreover, the error message does not provide a good recommendation when the error is on a non-first statement of repeatable read. ### Fix Change recommendation to consider read committed isolation level. ``` 2025-04-03 15:28:26.175 UTC [309395] ERROR: Restart read required at: { read: { days: 20181 time: 15:28:26.171916 } local_limit: { days: 20181 time: 15:28:26.171916 } global_limit: <min> in_txn_limit: <max> serial_no: 0 } (query layer retry isn't possible because this is not the first command in the transaction. Consider using READ COMMITTED isolation level. Please turn on yb_enable_read_committed_isolation to enable READ COMMITTED isolation level.) 2025-04-03 15:28:26.175 UTC [309395] STATEMENT: SELECT token FROM tokens ORDER BY token ``` Fixes #26648 Jira: DB-16025 Test Plan: Jenkins ``` ./yb_build.sh --cxx-test pg_debug_read_restarts-test ./yb_build.sh --java-test TestPgRegressIsolationWithoutWaitQueues#testPgRegress ./yb_build.sh --java-test TestPgRegressWaitQueues#testPgRegress ./yb_build.sh --java-test TestPgRegressIsolation#testPgRegress ./yb_build.sh --java-test TestPgRegressIsolation#testPgRegressWithDelayedTxnApply ``` Backport-through: 2024.2 Reviewers: pjain, smishra Reviewed By: pjain Subscribers: yql Differential Revision: https://phorge.dev.yugabyte.com/D42974
1 parent d949e0a commit 10f18ec

9 files changed

+87
-28
lines changed

src/postgres/src/backend/access/transam/xact.c

-7
Original file line numberDiff line numberDiff line change
@@ -3217,13 +3217,6 @@ YBStartTransactionCommandInternal(bool yb_skip_read_committed_internal_savepoint
32173217
*/
32183218
if (YBTransactionsEnabled() && IsYBReadCommitted() && !yb_skip_read_committed_internal_savepoint)
32193219
{
3220-
/*
3221-
* Reset field ybDataSentForCurrQuery (indicates whether any data was sent as part of the
3222-
* current query). This helps track if automatic restart of a query is possible in
3223-
* READ COMMITTED isolation level.
3224-
*/
3225-
s->ybDataSentForCurrQuery = false;
3226-
32273220
/*
32283221
* Create a new internal sub txn before any execution. This aids in rolling back any changes
32293222
* before restarting the statement.

src/postgres/src/backend/tcop/postgres.c

+24-4
Original file line numberDiff line numberDiff line change
@@ -4825,6 +4825,27 @@ yb_is_retry_possible(ErrorData *edata, int attempt,
48254825
return false;
48264826
}
48274827

4828+
/*
4829+
* This check is not strictly necessary.
4830+
* However, recommend read committed isolation level when
4831+
* increasing ysql_output_buffer_size is ineffective.
4832+
*
4833+
* This scenario is retryable if isolation is read committed instead.
4834+
*/
4835+
if (!IsYBReadCommitted() && YBIsDataSent() && !YBIsDataSentForCurrQuery())
4836+
{
4837+
const char *retry_err = "";
4838+
4839+
retry_err = psprintf("query layer retry isn't possible because "
4840+
"this is not the first command in the "
4841+
"transaction. Consider using READ COMMITTED "
4842+
"isolation level.");
4843+
edata->message = psprintf("%s (%s)", edata->message, retry_err);
4844+
if (yb_debug_log_internal_restarts)
4845+
elog(LOG, "%s", retry_err);
4846+
return false;
4847+
}
4848+
48284849
/*
48294850
* In REPEATABLE READ and SERIALIZABLE isolation levels, retrying involves restarting the whole
48304851
* transaction. So, we can only retry if no data has been sent to the external client as part of
@@ -4838,10 +4859,7 @@ yb_is_retry_possible(ErrorData *edata, int attempt,
48384859
(IsYBReadCommitted() && YBIsDataSentForCurrQuery()))
48394860
{
48404861
const char *retry_err = ("query layer retry isn't possible because "
4841-
"data was already sent, if this is the read "
4842-
"committed isolation (or) the first "
4843-
"statement in repeatable read/ serializable "
4844-
"isolation transaction, consider increasing "
4862+
"data was already transferred, consider increasing "
48454863
"the tserver gflag ysql_output_buffer_size");
48464864

48474865
edata->message = psprintf("%s (%s)", edata->message, retry_err);
@@ -6254,6 +6272,8 @@ PostgresMain(const char *dbname, const char *username)
62546272
yb_catalog_version_type != CATALOG_VERSION_CATALOG_TABLE)
62556273
yb_catalog_version_type = CATALOG_VERSION_UNSET;
62566274
yb_is_multi_statement_query = false;
6275+
/* New Query => Did not sent any data for the current query. */
6276+
YBMarkDataNotSentForCurrQuery();
62576277
}
62586278

62596279
switch (firstchar)

src/postgres/src/test/isolation/expected/yb.orig.deadlock.out

+2-2
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ step s3_upd_all_k_except_2: UPDATE test SET v=3 where k!=2; <waiting ...>
3434
step s1c: COMMIT;
3535
step s2_upd_all_k_except_1: <... completed>
3636
step s3_upd_all_k_except_2: <... completed>
37-
ERROR: deadlock detected (query layer retry isn't possible because data was already sent, if this is the read committed isolation (or) the first statement in repeatable read/ serializable isolation transaction, consider increasing the tserver gflag ysql_output_buffer_size)
37+
ERROR: deadlock detected (query layer retry isn't possible because this is not the first command in the transaction. Consider using READ COMMITTED isolation level.)
3838
step s2c: COMMIT;
3939
step s3c: COMMIT;
4040
step s1_select: SELECT * FROM test ORDER BY k;
@@ -70,7 +70,7 @@ step s2_upd_k1: UPDATE test SET v=2 WHERE k=1;
7070
step s3_upd_k2: UPDATE test SET v=3 WHERE k=2;
7171
step s2_upd_all_k_except_1: UPDATE test SET v=2 where k!=1; <waiting ...>
7272
step s3_upd_all_k_except_2: UPDATE test SET v=3 where k!=2;
73-
ERROR: deadlock detected (query layer retry isn't possible because data was already sent, if this is the read committed isolation (or) the first statement in repeatable read/ serializable isolation transaction, consider increasing the tserver gflag ysql_output_buffer_size)
73+
ERROR: deadlock detected (query layer retry isn't possible because this is not the first command in the transaction. Consider using READ COMMITTED isolation level.)
7474
step s2_upd_all_k_except_1: <... completed>
7575
step s2c: COMMIT;
7676
step s3c: COMMIT;

src/postgres/src/test/isolation/expected/yb.orig.fk-relationship.out

+2-2
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ step s1_serializable_txn: BEGIN TRANSACTION ISOLATION LEVEL SERIALIZABLE;
9696
step s1_update_fk1: UPDATE tb SET fk1 = 10 WHERE k = 1;
9797
step s2_update_fk2: UPDATE tb SET fk2 = 10 WHERE k = 1;
9898
step s1_commit: COMMIT;
99-
ERROR: could not serialize access due to concurrent update (query layer retry isn't possible because data was already sent, if this is the read committed isolation (or) the first statement in repeatable read/ serializable isolation transaction, consider increasing the tserver gflag ysql_output_buffer_size)
99+
ERROR: could not serialize access due to concurrent update (query layer retry isn't possible because this is not the first command in the transaction. Consider using READ COMMITTED isolation level.)
100100
step s1_select_tb: SELECT * FROM tb;
101101
k|fk1|fk2
102102
-+---+---
@@ -139,7 +139,7 @@ step s1_repeatable_read_txn: BEGIN TRANSACTION ISOLATION LEVEL REPEATABLE READ;
139139
step s1_update_fk1: UPDATE tb SET fk1 = 10 WHERE k = 1;
140140
step s2_update_fk2: UPDATE tb SET fk2 = 10 WHERE k = 1;
141141
step s1_commit: COMMIT;
142-
ERROR: could not serialize access due to concurrent update (query layer retry isn't possible because data was already sent, if this is the read committed isolation (or) the first statement in repeatable read/ serializable isolation transaction, consider increasing the tserver gflag ysql_output_buffer_size)
142+
ERROR: could not serialize access due to concurrent update (query layer retry isn't possible because this is not the first command in the transaction. Consider using READ COMMITTED isolation level.)
143143
step s1_select_tb: SELECT * FROM tb;
144144
k|fk1|fk2
145145
-+---+---

src/postgres/src/test/isolation/expected/yb.orig.modify-transaction-characteristics.out

+8-8
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ step s1_update: UPDATE test SET v=v+1 WHERE k=1;
2121
step s2_update: UPDATE test SET v=v*2 WHERE k=1; <waiting ...>
2222
step s1_commit: COMMIT;
2323
step s2_update: <... completed>
24-
ERROR: could not serialize access due to concurrent update (query layer retry isn't possible because data was already sent, if this is the read committed isolation (or) the first statement in repeatable read/ serializable isolation transaction, consider increasing the tserver gflag ysql_output_buffer_size)
24+
ERROR: could not serialize access due to concurrent update (query layer retry isn't possible because this is not the first command in the transaction. Consider using READ COMMITTED isolation level.)
2525
step s2_rollback: ROLLBACK;
2626
step s2_select: SELECT * FROM test;
2727
k|v
@@ -51,7 +51,7 @@ step s1_update: UPDATE test SET v=v+1 WHERE k=1;
5151
step s2_update: UPDATE test SET v=v*2 WHERE k=1; <waiting ...>
5252
step s1_commit: COMMIT;
5353
step s2_update: <... completed>
54-
ERROR: could not serialize access due to concurrent update (query layer retry isn't possible because data was already sent, if this is the read committed isolation (or) the first statement in repeatable read/ serializable isolation transaction, consider increasing the tserver gflag ysql_output_buffer_size)
54+
ERROR: could not serialize access due to concurrent update (query layer retry isn't possible because this is not the first command in the transaction. Consider using READ COMMITTED isolation level.)
5555
step s2_rollback: ROLLBACK;
5656
step s2_select: SELECT * FROM test;
5757
k|v
@@ -83,7 +83,7 @@ step s1_update: UPDATE test SET v=v+1 WHERE k=1;
8383
step s2_update: UPDATE test SET v=v*2 WHERE k=1; <waiting ...>
8484
step s1_commit: COMMIT;
8585
step s2_update: <... completed>
86-
ERROR: could not serialize access due to concurrent update (query layer retry isn't possible because data was already sent, if this is the read committed isolation (or) the first statement in repeatable read/ serializable isolation transaction, consider increasing the tserver gflag ysql_output_buffer_size)
86+
ERROR: could not serialize access due to concurrent update (query layer retry isn't possible because this is not the first command in the transaction. Consider using READ COMMITTED isolation level.)
8787
step s2_rollback: ROLLBACK;
8888
step s2_select: SELECT * FROM test;
8989
k|v
@@ -113,7 +113,7 @@ step s1_update: UPDATE test SET v=v+1 WHERE k=1;
113113
step s2_update: UPDATE test SET v=v*2 WHERE k=1; <waiting ...>
114114
step s1_commit: COMMIT;
115115
step s2_update: <... completed>
116-
ERROR: could not serialize access due to concurrent update (query layer retry isn't possible because data was already sent, if this is the read committed isolation (or) the first statement in repeatable read/ serializable isolation transaction, consider increasing the tserver gflag ysql_output_buffer_size)
116+
ERROR: could not serialize access due to concurrent update (query layer retry isn't possible because this is not the first command in the transaction. Consider using READ COMMITTED isolation level.)
117117
step s2_rollback: ROLLBACK;
118118
step s2_select: SELECT * FROM test;
119119
k|v
@@ -259,7 +259,7 @@ k|v
259259

260260
step s1_update: UPDATE test SET v=v+1 WHERE k=1; <waiting ...>
261261
step s2_update: UPDATE test SET v=v*2 WHERE k=1;
262-
ERROR: deadlock detected (query layer retry isn't possible because data was already sent, if this is the read committed isolation (or) the first statement in repeatable read/ serializable isolation transaction, consider increasing the tserver gflag ysql_output_buffer_size)
262+
ERROR: deadlock detected (query layer retry isn't possible because this is not the first command in the transaction. Consider using READ COMMITTED isolation level.)
263263
step s1_update: <... completed>
264264
step s1_commit: COMMIT;
265265
step s2_rollback: ROLLBACK;
@@ -289,7 +289,7 @@ k|v
289289

290290
step s1_update: UPDATE test SET v=v+1 WHERE k=1; <waiting ...>
291291
step s2_update: UPDATE test SET v=v*2 WHERE k=1;
292-
ERROR: deadlock detected (query layer retry isn't possible because data was already sent, if this is the read committed isolation (or) the first statement in repeatable read/ serializable isolation transaction, consider increasing the tserver gflag ysql_output_buffer_size)
292+
ERROR: deadlock detected (query layer retry isn't possible because this is not the first command in the transaction. Consider using READ COMMITTED isolation level.)
293293
step s1_update: <... completed>
294294
step s1_commit: COMMIT;
295295
step s2_rollback: ROLLBACK;
@@ -321,7 +321,7 @@ k|v
321321

322322
step s1_update: UPDATE test SET v=v+1 WHERE k=1; <waiting ...>
323323
step s2_update: UPDATE test SET v=v*2 WHERE k=1;
324-
ERROR: deadlock detected (query layer retry isn't possible because data was already sent, if this is the read committed isolation (or) the first statement in repeatable read/ serializable isolation transaction, consider increasing the tserver gflag ysql_output_buffer_size)
324+
ERROR: deadlock detected (query layer retry isn't possible because this is not the first command in the transaction. Consider using READ COMMITTED isolation level.)
325325
step s1_update: <... completed>
326326
step s1_commit: COMMIT;
327327
step s2_rollback: ROLLBACK;
@@ -351,7 +351,7 @@ k|v
351351

352352
step s1_update: UPDATE test SET v=v+1 WHERE k=1; <waiting ...>
353353
step s2_update: UPDATE test SET v=v*2 WHERE k=1;
354-
ERROR: deadlock detected (query layer retry isn't possible because data was already sent, if this is the read committed isolation (or) the first statement in repeatable read/ serializable isolation transaction, consider increasing the tserver gflag ysql_output_buffer_size)
354+
ERROR: deadlock detected (query layer retry isn't possible because this is not the first command in the transaction. Consider using READ COMMITTED isolation level.)
355355
step s1_update: <... completed>
356356
step s1_commit: COMMIT;
357357
step s2_rollback: ROLLBACK;

src/postgres/src/test/isolation/expected/yb.orig.only-abort-sub-txn.out

+1-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ step s1_pick_read_time: SELECT * FROM tbl;
1919
step s2_update_conflicting_key: UPDATE tbl SET v = 2 WHERE k = 1;
2020
step s1_savepoint_a: SAVEPOINT a;
2121
step s1_update_conflicting_key: UPDATE tbl SET v = 3 WHERE k = 1;
22-
ERROR: could not serialize access due to concurrent update (query layer retry isn't possible because data was already sent, if this is the read committed isolation (or) the first statement in repeatable read/ serializable isolation transaction, consider increasing the tserver gflag ysql_output_buffer_size)
22+
ERROR: could not serialize access due to concurrent update (query layer retry isn't possible because this is not the first command in the transaction. Consider using READ COMMITTED isolation level.)
2323
step s1_rollback_to_a: ROLLBACK TO a;
2424
step s1_update_non_conflicting_key: UPDATE tbl SET v = 3 WHERE k = 2;
2525
step s1_commit: COMMIT;

src/postgres/src/test/isolation/expected/yb.orig.user-managed-constraint.out

+4-4
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ step s1_serializable_txn: BEGIN TRANSACTION ISOLATION LEVEL SERIALIZABLE;
6767
step s1_update: UPDATE t SET v1 = add_with_limit(v1, v2, 35) WHERE k = 1;
6868
step s2_update: UPDATE t SET v2 = add_with_limit(v2, v1, 35) WHERE k = 1;
6969
step s1_commit: COMMIT;
70-
ERROR: could not serialize access due to concurrent update (query layer retry isn't possible because data was already sent, if this is the read committed isolation (or) the first statement in repeatable read/ serializable isolation transaction, consider increasing the tserver gflag ysql_output_buffer_size)
70+
ERROR: could not serialize access due to concurrent update (query layer retry isn't possible because this is not the first command in the transaction. Consider using READ COMMITTED isolation level.)
7171
step s1_select: select *, v1 + v2 from t;
7272
k|v1|v2|?column?
7373
-+--+--+--------
@@ -84,7 +84,7 @@ step s1_repeatable_read_txn: BEGIN TRANSACTION ISOLATION LEVEL REPEATABLE READ;
8484
step s1_update: UPDATE t SET v1 = add_with_limit(v1, v2, 35) WHERE k = 1;
8585
step s2_update: UPDATE t SET v2 = add_with_limit(v2, v1, 35) WHERE k = 1;
8686
step s1_commit: COMMIT;
87-
ERROR: could not serialize access due to concurrent update (query layer retry isn't possible because data was already sent, if this is the read committed isolation (or) the first statement in repeatable read/ serializable isolation transaction, consider increasing the tserver gflag ysql_output_buffer_size)
87+
ERROR: could not serialize access due to concurrent update (query layer retry isn't possible because this is not the first command in the transaction. Consider using READ COMMITTED isolation level.)
8888
step s1_select: select *, v1 + v2 from t;
8989
k|v1|v2|?column?
9090
-+--+--+--------
@@ -193,7 +193,7 @@ step s2_update_case:
193193
UPDATE t SET v2 = CASE WHEN v1 + v2 < 35 THEN v1 + v2 ELSE 0 END WHERE k = 1;
194194

195195
step s1_commit: COMMIT;
196-
ERROR: could not serialize access due to concurrent update (query layer retry isn't possible because data was already sent, if this is the read committed isolation (or) the first statement in repeatable read/ serializable isolation transaction, consider increasing the tserver gflag ysql_output_buffer_size)
196+
ERROR: could not serialize access due to concurrent update (query layer retry isn't possible because this is not the first command in the transaction. Consider using READ COMMITTED isolation level.)
197197
step s1_select: select *, v1 + v2 from t;
198198
k|v1|v2|?column?
199199
-+--+--+--------
@@ -214,7 +214,7 @@ step s2_update_case:
214214
UPDATE t SET v2 = CASE WHEN v1 + v2 < 35 THEN v1 + v2 ELSE 0 END WHERE k = 1;
215215

216216
step s1_commit: COMMIT;
217-
ERROR: could not serialize access due to concurrent update (query layer retry isn't possible because data was already sent, if this is the read committed isolation (or) the first statement in repeatable read/ serializable isolation transaction, consider increasing the tserver gflag ysql_output_buffer_size)
217+
ERROR: could not serialize access due to concurrent update (query layer retry isn't possible because this is not the first command in the transaction. Consider using READ COMMITTED isolation level.)
218218
step s1_select: select *, v1 + v2 from t;
219219
k|v1|v2|?column?
220220
-+--+--+--------

src/yb/yql/pgwrapper/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,7 @@ ADD_YB_TEST(pg_create_database-test)
131131
ADD_YB_TEST(pg_ddl_atomicity-test)
132132
ADD_YB_TEST(pg_ddl_atomicity_stress-test)
133133
ADD_YB_TEST(pg_ddl_concurrency-test)
134+
ADD_YB_TEST(pg_debug_read_restarts-test)
134135
ADD_YB_TEST(pg_drop_column_test)
135136
ADD_YB_TEST(pg_export_snapshot-test)
136137
ADD_YB_TEST(pg_explicit_lock-test)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
// Copyright (c) YugabyteDB, Inc.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
4+
// in compliance with the License. You may obtain a copy of the License at
5+
//
6+
// http://www.apache.org/licenses/LICENSE-2.0
7+
//
8+
// Unless required by applicable law or agreed to in writing, software distributed under the License
9+
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
10+
// or implied. See the License for the specific language governing permissions and limitations
11+
// under the License.
12+
//
13+
14+
#include "yb/yql/pgwrapper/libpq_utils.h"
15+
#include "yb/yql/pgwrapper/pg_mini_test_base.h"
16+
17+
namespace yb::pgwrapper {
18+
19+
class PgDebugReadRestartsTest : public PgMiniTestBase {
20+
protected:
21+
size_t NumTabletServers() override {
22+
return 3;
23+
}
24+
};
25+
26+
TEST_F(PgDebugReadRestartsTest, RecommendReadCommitted) {
27+
auto setup_conn = ASSERT_RESULT(Connect());
28+
ASSERT_OK(setup_conn.Execute("DROP TABLE IF EXISTS tokens"));
29+
ASSERT_OK(setup_conn.Execute("CREATE TABLE tokens(token INT)"));
30+
ASSERT_OK(setup_conn.Execute("INSERT INTO tokens SELECT i FROM GENERATE_SERIES(1, 100) i"));
31+
32+
auto read_conn = ASSERT_RESULT(Connect());
33+
auto insert_conn = ASSERT_RESULT(Connect());
34+
ASSERT_OK(read_conn.StartTransaction(SNAPSHOT_ISOLATION));
35+
auto rows = ASSERT_RESULT(read_conn.FetchRows<int32_t>("SELECT token FROM tokens LIMIT 1"));
36+
ASSERT_OK(insert_conn.Execute("INSERT INTO tokens SELECT i FROM GENERATE_SERIES(200, 300) i"));
37+
auto result = read_conn.FetchRows<int32_t>("SELECT token FROM tokens ORDER BY token");
38+
ASSERT_NOK(result);
39+
auto error_string = result.status().ToString();
40+
// Recommend read committed isolation level
41+
ASSERT_STR_CONTAINS(error_string, "Consider using READ COMMITTED");
42+
ASSERT_OK(read_conn.RollbackTransaction());
43+
}
44+
45+
} // namespace yb::pgwrapper

0 commit comments

Comments
 (0)