Skip to content

Commit 1d20981

Browse files
committed
Add spin-lock during recompression on unique constraints
Recompression tries to take an ExclusiveLock at the end of compression in order to update the chunk status from partial to fully compressed. This is mandatory when unique constraints are present in the table. Otherwise recompression is aborted. This adds a spin-lock when acquiring an ExclusiveLock only in the case of unique constraints being present on the table.
1 parent 7ea1677 commit 1d20981

File tree

4 files changed

+257
-39
lines changed

4 files changed

+257
-39
lines changed

.unreleased/pr_8018

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Implements: #8018 Add spin-lock during recompression on unique constraints

tsl/src/compression/recompress.c

+91-38
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,14 @@
2828
#include "ts_catalog/chunk_column_stats.h"
2929
#include "ts_catalog/compression_settings.h"
3030

31+
/*
32+
* Timing parameters for spin locking heuristics.
33+
* These are the same as used by Postgres for truncate locking during lazy vacuum.
34+
* https://github.com/postgres/postgres/blob/4a0650d359c5981270039eeb634c3b7427aa0af5/src/backend/access/heap/vacuumlazy.c#L82
35+
*/
36+
#define RECOMPRESS_EXCLUSIVE_LOCK_WAIT_INTERVAL 50 /* ms */
37+
#define RECOMPRESS_EXCLUSIVE_LOCK_TIMEOUT 5000 /* ms */
38+
3139
static bool fetch_uncompressed_chunk_into_tuplesort(Tuplesortstate *tuplesortstate,
3240
Relation uncompressed_chunk_rel,
3341
Snapshot snapshot);
@@ -50,6 +58,8 @@ static bool check_changed_group(CompressedSegmentInfo *current_segment, TupleTab
5058
int nsegmentby_cols);
5159
static void recompress_segment(Tuplesortstate *tuplesortstate, Relation compressed_chunk_rel,
5260
RowCompressor *row_compressor);
61+
static void try_updating_chunk_status(Chunk *uncompressed_chunk, Relation uncompressed_chunk_rel);
62+
5363
/*
5464
* Recompress an existing chunk by decompressing the batches
5565
* that are affected by the addition of newer data. The existing
@@ -533,38 +543,7 @@ recompress_chunk_segmentwise_impl(Chunk *uncompressed_chunk)
533543
*/
534544
if (ConditionalLockRelation(uncompressed_chunk_rel, ExclusiveLock))
535545
{
536-
TableScanDesc scan = table_beginscan(uncompressed_chunk_rel, GetLatestSnapshot(), 0, 0);
537-
hypercore_scan_set_skip_compressed(scan, true);
538-
ScanDirection scan_dir = uncompressed_chunk_rel->rd_tableam == hypercore_routine() ?
539-
ForwardScanDirection :
540-
BackwardScanDirection;
541-
TupleTableSlot *slot = table_slot_create(uncompressed_chunk_rel, NULL);
542-
543-
/* Doing a backwards scan with assumption that newly inserted tuples
544-
* are most likely at the end of the heap.
545-
*/
546-
bool has_tuples = false;
547-
if (table_scan_getnextslot(scan, scan_dir, slot))
548-
{
549-
has_tuples = true;
550-
}
551-
552-
ExecDropSingleTupleTableSlot(slot);
553-
table_endscan(scan);
554-
555-
if (!has_tuples)
556-
{
557-
if (ts_chunk_clear_status(uncompressed_chunk,
558-
CHUNK_STATUS_COMPRESSED_UNORDERED |
559-
CHUNK_STATUS_COMPRESSED_PARTIAL))
560-
ereport(DEBUG1,
561-
(errmsg("cleared chunk status for recompression: \"%s.%s\"",
562-
NameStr(uncompressed_chunk->fd.schema_name),
563-
NameStr(uncompressed_chunk->fd.table_name))));
564-
565-
/* changed chunk status, so invalidate any plans involving this chunk */
566-
CacheInvalidateRelcacheByRelid(uncompressed_chunk_id);
567-
}
546+
try_updating_chunk_status(uncompressed_chunk, uncompressed_chunk_rel);
568547
}
569548
else if (has_unique_constraints)
570549
{
@@ -575,13 +554,44 @@ recompress_chunk_segmentwise_impl(Chunk *uncompressed_chunk)
575554
* and speculative insertion could potentially cause false negatives during
576555
* constraint checking. For now, our best option here is to bail.
577556
*
578-
* This can be improved by using a spin lock to wait for the ExclusiveLock
579-
* or bail out if we can't get it in time.
557+
* We use a spin lock to wait for the ExclusiveLock or bail out if we can't get it in time.
580558
*/
581-
ereport(ERROR,
582-
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
583-
errmsg("aborting recompression due to concurrent DML on uncompressed "
584-
"data, retrying with next policy run")));
559+
560+
int lock_retry = 0;
561+
while(true)
562+
{
563+
if (ConditionalLockRelation(uncompressed_chunk_rel, ExclusiveLock))
564+
{
565+
try_updating_chunk_status(uncompressed_chunk, uncompressed_chunk_rel);
566+
break;
567+
}
568+
569+
/*
570+
* Check for interrupts while trying to (re-)acquire the exclusive
571+
* lock.
572+
*/
573+
CHECK_FOR_INTERRUPTS();
574+
575+
if (++lock_retry >
576+
(RECOMPRESS_EXCLUSIVE_LOCK_TIMEOUT / RECOMPRESS_EXCLUSIVE_LOCK_WAIT_INTERVAL))
577+
{
578+
/*
579+
* We failed to establish the lock in the specified number of
580+
* retries. This means we give up trying to get the exclusive lock are abort the recompression operation
581+
*/
582+
ereport(ERROR,
583+
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
584+
errmsg("aborting recompression due to concurrent DML on uncompressed "
585+
"data, retrying with next policy run")));
586+
break;
587+
}
588+
589+
(void) WaitLatch(MyLatch,
590+
WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH,
591+
RECOMPRESS_EXCLUSIVE_LOCK_WAIT_INTERVAL,
592+
WAIT_EVENT_VACUUM_TRUNCATE);
593+
ResetLatch(MyLatch);
594+
}
585595
}
586596

587597
table_close(uncompressed_chunk_rel, NoLock);
@@ -866,3 +876,46 @@ delete_tuple_for_recompression(Relation rel, ItemPointer tid, Snapshot snapshot)
866876

867877
return result == TM_Ok;
868878
}
879+
880+
/* Check if we can update the chunk status to fully compressed after segmentwise recompression
881+
* We can only do this if there were no concurrent DML operations, so we check to see if there are
882+
* any uncompressed tuples in the chunk after compression.
883+
* If there aren't, we can update the chunk status
884+
*
885+
* Note: Caller is expected to have an ExclusiveLock on the uncompressed_chunk
886+
*/
887+
static void try_updating_chunk_status(Chunk *uncompressed_chunk, Relation uncompressed_chunk_rel)
888+
{
889+
TableScanDesc scan = table_beginscan(uncompressed_chunk_rel, GetLatestSnapshot(), 0, 0);
890+
hypercore_scan_set_skip_compressed(scan, true);
891+
ScanDirection scan_dir = uncompressed_chunk_rel->rd_tableam == hypercore_routine() ?
892+
ForwardScanDirection :
893+
BackwardScanDirection;
894+
TupleTableSlot *slot = table_slot_create(uncompressed_chunk_rel, NULL);
895+
896+
/* Doing a backwards scan with assumption that newly inserted tuples
897+
* are most likely at the end of the heap.
898+
*/
899+
bool has_tuples = false;
900+
if (table_scan_getnextslot(scan, scan_dir, slot))
901+
{
902+
has_tuples = true;
903+
}
904+
905+
ExecDropSingleTupleTableSlot(slot);
906+
table_endscan(scan);
907+
908+
if (!has_tuples)
909+
{
910+
if (ts_chunk_clear_status(uncompressed_chunk,
911+
CHUNK_STATUS_COMPRESSED_UNORDERED |
912+
CHUNK_STATUS_COMPRESSED_PARTIAL))
913+
ereport(DEBUG1,
914+
(errmsg("cleared chunk status for recompression: \"%s.%s\"",
915+
NameStr(uncompressed_chunk->fd.schema_name),
916+
NameStr(uncompressed_chunk->fd.table_name))));
917+
918+
/* changed chunk status, so invalidate any plans involving this chunk */
919+
CacheInvalidateRelcacheByRelid(uncompressed_chunk->table_id);
920+
}
921+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,164 @@
1+
# This file and its contents are licensed under the Timescale License.
2+
# Please see the included NOTICE for copyright information and
3+
# LICENSE-TIMESCALE for a copy of the license.
4+
5+
# This TAP test verifies the behavior when recompression is attempted on a chunk
6+
# with unique constraints while concurrent DML is happening
7+
8+
use strict;
9+
use warnings;
10+
use TimescaleNode;
11+
use Data::Dumper;
12+
use Test::More;
13+
14+
# Test setup
15+
my $node = TimescaleNode->create('node');
16+
17+
# Create table with a unique constraint
18+
my $result = $node->safe_psql(
19+
'postgres', q{
20+
CREATE TABLE sensor_data (
21+
time timestamptz not null,
22+
sensor_id integer not null,
23+
cpu double precision null,
24+
temperature double precision null,
25+
UNIQUE(time, sensor_id)
26+
);
27+
}
28+
);
29+
is($result, '', 'create table with unique constraint');
30+
31+
# Create hypertable
32+
$result = $node->safe_psql(
33+
'postgres', q{
34+
SELECT FROM create_hypertable('sensor_data','time', chunk_time_interval => INTERVAL '1 month');
35+
}
36+
);
37+
is($result, '', 'create hypertable');
38+
39+
# Insert data
40+
$result = $node->safe_psql(
41+
'postgres', q{
42+
INSERT INTO sensor_data
43+
SELECT
44+
time + (INTERVAL '1 minute' * random()) AS time,
45+
sensor_id,
46+
random() AS cpu,
47+
random()* 100 AS temperature
48+
FROM
49+
generate_series('2022-01-01', '2022-01-07', INTERVAL '1 hour') AS g1(time),
50+
generate_series(1, 50, 1) AS g2(sensor_id)
51+
ORDER BY time;
52+
}
53+
);
54+
is($result, '', 'insert data');
55+
56+
# Define count query
57+
my $count_query = "SELECT count(*) FROM sensor_data;";
58+
59+
# Count inserted rows
60+
my $num_rows = $node->safe_psql('postgres', $count_query);
61+
is($num_rows, 7250, 'validate inserted rows');
62+
63+
# Enable compression
64+
$result = $node->safe_psql(
65+
'postgres', q{
66+
ALTER TABLE sensor_data SET (
67+
timescaledb.compress,
68+
timescaledb.compress_segmentby = 'sensor_id',
69+
timescaledb.compress_orderby = 'time'
70+
);
71+
}
72+
);
73+
is($result, '', 'enable compression');
74+
75+
# Compress the chunk
76+
my $compress_query = "SELECT count(*) FROM (SELECT compress_chunk(show_chunks('sensor_data')));";
77+
$result = $node->safe_psql('postgres', $compress_query);
78+
is($result, '1', 'compress chunk');
79+
80+
# Insert more data to make the chunk partial
81+
$result = $node->safe_psql(
82+
'postgres', q{
83+
INSERT INTO sensor_data
84+
SELECT
85+
time + (INTERVAL '1 minute' * random()) AS time,
86+
sensor_id,
87+
random() AS cpu,
88+
random()* 100 AS temperature
89+
FROM
90+
generate_series('2022-01-01', '2022-01--7', INTERVAL '1 hour') AS g1(time),
91+
generate_series(51, 55, 1) AS g2(sensor_id)
92+
ORDER BY time;
93+
}
94+
);
95+
is($result, '', 'insert more data to make the chunk partial');
96+
97+
# Create psql sessions
98+
my $s1 = $node->background_psql('postgres');
99+
my $s2 = $node->background_psql('postgres');
100+
101+
# Enable segmentwise recompression
102+
$s1->query_safe("SET timescaledb.enable_segmentwise_recompression TO on;");
103+
104+
# Enable waiting to acquire an exlusive lock
105+
$s1->query_safe("SET timescaledb.enable_recompress_waiting TO on;");
106+
107+
# TEST 1:
108+
# Session 1 tries to acquire an exclusive lock at the end of recompression but is blocked due to the inserts by Session 2
109+
# However
110+
# Begin txns in both sessions
111+
112+
$result = $s1->query_safe("BEGIN;");
113+
isnt($result, '', "session 1: begin");
114+
115+
$result = $s2->query_safe("BEGIN;");
116+
isnt($result, '', "session 2: begin");
117+
118+
# Get lock data
119+
$result = $node->safe_psql(
120+
'postgres',
121+
"SELECT relation::regclass::text, mode FROM pg_locks WHERE relation::regclass::text LIKE '%hyper_1_%chunk' and granted;"
122+
);
123+
is($result, '', "verify no locks exist on the chunk");
124+
125+
# Session 2: Insert rows into the chunk
126+
# This blocks until session 2 releases its locks
127+
$s2->query_until('', q{
128+
INSERT INTO sensor_data VALUES
129+
('2022-01-05 12:00:00', 100, 0.5, 25.5),
130+
('2022-01-05 13:00:00', 101, 0.6, 26.5);
131+
});
132+
133+
# We have to use 'query_until('', ...)' so that the test immediately fires the next query
134+
# Otherwise s1 times out throws an error
135+
$s1->query_until('', q{
136+
SELECT compress_chunk(show_chunks('sensor_data'));
137+
});
138+
139+
# Session 2 immediately aborts, releasing the RowExclusiveLock on the table
140+
$result = $s2->query_safe("ABORT");
141+
isnt($result, '', "session 2: abort");
142+
143+
# Verify ExclusiveLock on uncompressed chunk
144+
$result = $node->safe_psql(
145+
'postgres',
146+
"SELECT relation::regclass::text FROM pg_locks WHERE relation::regclass::text LIKE '%hyper_1_%chunk' AND granted AND mode = 'ExclusiveLock';"
147+
);
148+
is($result, '_timescaledb_internal._hyper_1_1_chunk', "verify ExclusiveLock on uncompressed chunk");
149+
150+
# Verify AccessShareLock on compressed chunk
151+
$result = $node->safe_psql(
152+
'postgres',
153+
"SELECT relation::regclass::text FROM pg_locks WHERE relation::regclass::text LIKE '%hyper_1_%chunk' AND granted AND mode = 'AccessShareLock'"
154+
);
155+
is($result, "_timescaledb_internal._hyper_1_1_chunk", "verify AccessShareLock on internal compressed chunk");
156+
157+
# Clean up
158+
$result = $s1->query_safe("ROLLBACK;");
159+
isnt($result, '', "session 1: rollback");
160+
161+
$s2->quit();
162+
$s1->quit();
163+
164+
done_testing();

tsl/test/t/CMakeLists.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ set(PROVE_DEBUG_TEST_FILES 003_mvcc_cagg.pl)
77
# versions.
88

99
if((${PG_VERSION_MAJOR} GREATER_EQUAL "16"))
10-
list(APPEND PROVE_TEST_FILES 004_truncate_or_delete_spin_lock.pl)
10+
list(APPEND PROVE_TEST_FILES 004_truncate_or_delete_spin_lock.pl 005_recompression_spin_lock_test.pl)
1111
endif()
1212

1313
if(CMAKE_BUILD_TYPE MATCHES Debug)

0 commit comments

Comments
 (0)