Skip to content

Commit 6c38c60

Browse files
Markos Fountoulakismfundul
Markos Fountoulakis
authored andcommitted
Repair numeric partial state on the fly
The numeric format changed between PG13 and PG14 to include infinities. As a result the serialized partial state of numeric aggregates also changed format. If a user that has stored partials (e.g. by using Continuous Aggregates) upgrades to PG14 then the partial state deserialization will lead to errors due to the mismatch with the PG14 code. Repair the deserialization process on the fly by appending zeroed plus-infinity and minus-infinity counts for the numeric aggregate state to use. Fixes #4427
1 parent 93e9d42 commit 6c38c60

File tree

4 files changed

+228
-48
lines changed

4 files changed

+228
-48
lines changed

tsl/src/partialize_finalize.c

+123-2
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include <parser/parse_coerce.h>
1515
#include <utils/builtins.h>
1616
#include <utils/datum.h>
17+
#include <utils/fmgroids.h>
1718
#include <utils/syscache.h>
1819

1920
#include "partialize_finalize.h"
@@ -147,6 +148,99 @@ collation_oid_from_name(char *schema_name, char *collation_name)
147148
namel = lappend(namel, makeString(collation_name));
148149
return get_collation_oid(namel, false);
149150
}
151+
152+
#if PG14_GE
153+
/*
154+
* =======================================
155+
* Record serialized partials errata here:
156+
* =======================================
157+
*
158+
* ================================================================================================
159+
* The numeric format changed between PG13 and PG14 to include infinities. Consequently the
160+
* internal aggregate for the combine functions changed as well, which lead to the serialized
161+
* partial state of numeric aggregates also changing format.
162+
*
163+
* If a user that has stored partials (by using Continuous Aggregates or calling
164+
* _timescaledb_internal.finalize_agg()) upgrades to PG14 then the partial state deserialization
165+
* will lead to errors due to the mismatch with the PG14 code.
166+
*
167+
* For F_NUMERIC_AVG_DESERIALIZE and F_NUMERIC_DESERIALIZE the length of the serialized aggregate
168+
* state is 16 bytes longer than the previous versions. It suffices to zero out the extra bytes of
169+
* pInfcount and nInfcount in the numeric aggregate combine state. Those fields are only there to
170+
* support infinity and -infinity in the numeric type, which wasn't supported in older PostgreSQL
171+
* versions.
172+
*
173+
* PostgreSQL versions < 14
174+
* F_NUMERIC_AVG_DESERIALIZE serialized partial length = X
175+
* F_NUMERIC_DESERIALIZE serialized partial length = X + 10
176+
*
177+
* PostgreSQL versions >= 14
178+
* F_NUMERIC_AVG_DESERIALIZE serialized partial length = X + 16
179+
* F_NUMERIC_DESERIALIZE serialized partial length = X + 10 + 16
180+
*
181+
* For more information see: https://www.postgresql.org/message-id/[email protected]
182+
*
183+
* Non-numeric aggregate functions affected:
184+
* 1. var_pop(int8)
185+
* 2. var_samp(int8)
186+
* 3. variance(int8)
187+
* 4. stddev_pop(int8)
188+
* 5. stddev_samp(int8)
189+
* 6. stddev(int8)
190+
* ================================================================================================
191+
* F_NUMERIC_POLY_DESERIALIZE depends on compiler support for HAVE_INT128. As a result, recompiling
192+
* the same versions of PostgreSQL and TimescaleDB with a different compiler can lead to corruption
193+
* if the same database is reused.
194+
*
195+
* Non-numeric aggregate functions affected:
196+
* 1. var_pop(int4)
197+
* 2. var_pop(int2)
198+
* 3. var_samp(int4)
199+
* 4. var_samp(int2)
200+
* 5. variance(int4)
201+
* 6. variance(int2)
202+
* 7. stddev_pop(int4)
203+
* 8. stddev_pop(int2)
204+
* 9. stddev_samp(int4)
205+
* 10. stddev_samp(int2)
206+
* 11. stddev(int4)
207+
* 12. stddev(int2)
208+
*/
209+
#define NUMERIC_PARTIAL_MISSING_LENGTH (16)
210+
211+
static bytea *
212+
zero_fill_bytearray(bytea *serialized_partial, size_t missing_length)
213+
{
214+
size_t original_length = VARSIZE_ANY_EXHDR(serialized_partial);
215+
size_t desired_length = original_length + missing_length;
216+
217+
bytea *new_bytea = repalloc(serialized_partial, desired_length + VARHDRSZ);
218+
219+
SET_VARSIZE(new_bytea, desired_length + VARHDRSZ);
220+
void *end_of_serialized_partial = VARDATA(new_bytea) + original_length;
221+
memset(end_of_serialized_partial, 0, missing_length);
222+
223+
return new_bytea;
224+
}
225+
#endif
226+
227+
/* Only call this function if the partial is known to be problematic. */
228+
static bytea *
229+
sanitize_serialized_partial(Oid deserialfnoid, bytea *serialized_partial)
230+
{
231+
#if PG14_GE
232+
if ((deserialfnoid == F_NUMERIC_DESERIALIZE) || (deserialfnoid == F_NUMERIC_AVG_DESERIALIZE))
233+
/*
234+
* Always add NUMERIC_PARTIAL_MISSING_LENGTH extra bytes because the length is not fixed.
235+
* This is only safe to do when the partial state is known to be short, otherwise an
236+
* exception is thrown if the serialized_partial is not fully consumed by deserialfn().
237+
*/
238+
return zero_fill_bytearray(serialized_partial, NUMERIC_PARTIAL_MISSING_LENGTH);
239+
#endif
240+
241+
return serialized_partial;
242+
}
243+
150244
/*
151245
* deserialize from the internal format in which data is stored in bytea
152246
* parameter. Callers need to check deserialized_isnull . Only if this is set to false,
@@ -169,8 +263,35 @@ inner_agg_deserialize(FACombineFnMeta *combine_meta, bytea *serialized_partial,
169263

170264
FC_ARG(deser_fcinfo, 0) = PointerGetDatum(serialized_partial);
171265
FC_NULL(deser_fcinfo, 0) = serialized_isnull;
172-
combine_meta->deserialfn_fcinfo->isnull = false;
173-
deserialized = FunctionCallInvoke(deser_fcinfo);
266+
deser_fcinfo->isnull = false;
267+
268+
/*
269+
* When an exception is thrown and longjmp() is called, CurrentMemoryContext is potentially
270+
* different than what it was inside the PG_TRY() block below.
271+
*
272+
* Restore it to the old value so that the code in the subsequent PG_CATCH() block does not
273+
* corrupt the memory.
274+
*
275+
* No need for volatile variables since we don't modify any of this function's stack frame
276+
* inside the PG_TRY() block.
277+
*/
278+
MemoryContext oldcontext = CurrentMemoryContext;
279+
PG_TRY();
280+
{
281+
deserialized = FunctionCallInvoke(deser_fcinfo);
282+
}
283+
PG_CATCH();
284+
{
285+
CurrentMemoryContext = oldcontext;
286+
FlushErrorState();
287+
/* attempt to repair the serialized partial */
288+
serialized_partial =
289+
sanitize_serialized_partial(combine_meta->deserialfnoid, serialized_partial);
290+
FC_ARG(deser_fcinfo, 0) = PointerGetDatum(serialized_partial);
291+
deser_fcinfo->isnull = false;
292+
deserialized = FunctionCallInvoke(deser_fcinfo);
293+
}
294+
PG_END_TRY();
174295
*deserialized_isnull = deser_fcinfo->isnull;
175296
}
176297
else if (!serialized_isnull)

tsl/test/expected/partialize_finalize.out

+49-26
Original file line numberDiff line numberDiff line change
@@ -142,44 +142,66 @@ select a, _timescaledb_internal.finalize_agg( 'sum(double precision)', null, nul
142142
drop table t1;
143143
drop view v1;
144144
drop table foo;
145-
create table foo (a integer, b numeric , c text, d timestamptz);
146-
insert into foo values( 1 , 10 , 'hello', '2010-01-01 09:00:00-08');
147-
insert into foo values( 1 , 20 , 'abc', '2010-01-02 09:00:00-08');
148-
insert into foo values( 1 , 30 , 'abcd', '2010-01-03 09:00:00-08');
149-
insert into foo values( 1 , 40 , 'abcde', NULL );
150-
insert into foo values( 1 , 50 , NULL, '2010-01-01 09:00:00-08');
145+
create table foo (a integer, b numeric , c text, d timestamptz, e bigint);
146+
insert into foo values( 1 , 10 , 'hello', '2010-01-01 09:00:00-08', 10);
147+
insert into foo values( 1 , 20 , 'abc', '2010-01-02 09:00:00-08', 20);
148+
insert into foo values( 1 , 30 , 'abcd', '2010-01-03 09:00:00-08', 30);
149+
insert into foo values( 1 , 40 , 'abcde', NULL, 40);
150+
insert into foo values( 1 , 50 , NULL, '2010-01-01 09:00:00-08', 50);
151151
--group with all values for c and d same
152-
insert into foo values( 2 , 10 , 'hello', '2010-01-01 09:00:00-08');
153-
insert into foo values( 2 , 20 , 'hello', '2010-01-01 09:00:00-08');
154-
insert into foo values( 2 , 30 , 'hello', '2010-01-01 09:00:00-08');
152+
insert into foo values( 2 , 10 , 'hello', '2010-01-01 09:00:00-08', 10);
153+
insert into foo values( 2 , 20 , 'hello', '2010-01-01 09:00:00-08', 20);
154+
insert into foo values( 2 , 30 , 'hello', '2010-01-01 09:00:00-08', 30);
155155
--group with all values for c and d NULL
156-
insert into foo values( 3 , 40 , NULL, NULL);
157-
insert into foo values( 3 , 50 , NULL, NULL);
158-
insert into foo values(11, NULL, NULL, NULL);
159-
insert into foo values(11, NULL, 'hello', '2010-01-02 09:00:00-05');
156+
insert into foo values( 3 , 40 , NULL, NULL, 40);
157+
insert into foo values( 3 , 50 , NULL, NULL, 50);
158+
insert into foo values(11, NULL, NULL, NULL, NULL);
159+
insert into foo values(11, NULL, 'hello', '2010-01-02 09:00:00-05', NULL);
160160
--group with all values for c and d NULL and later add non-null.
161-
insert into foo values(12, NULL, NULL, NULL);
162-
create or replace view v1(a , b, partialb, partialc, partiald)
161+
insert into foo values(12, NULL, NULL, NULL, NULL);
162+
create or replace view v1(a , b, partialb, partialc, partiald, partiale, partialf)
163163
as
164-
SELECT a, b, _timescaledb_internal.partialize_agg( sum(b)) , _timescaledb_internal.partialize_agg( min(c)) , _timescaledb_internal.partialize_agg(max(d)) from foo group by a, b ;
164+
SELECT a, b, _timescaledb_internal.partialize_agg(sum(b))
165+
, _timescaledb_internal.partialize_agg(min(c))
166+
, _timescaledb_internal.partialize_agg(max(d))
167+
, _timescaledb_internal.partialize_agg(stddev(b))
168+
, _timescaledb_internal.partialize_agg(stddev(e)) from foo group by a, b ;
165169
create table t1 as select * from v1;
166170
--sum 2114, collid 0, min(text) 2145, collid 100, max(ts) 2127
167-
insert into foo values(12, 10, 'hello', '2010-01-02 09:00:00-05');
171+
insert into foo values(12, 10, 'hello', '2010-01-02 09:00:00-05', 10);
168172
insert into t1 select * from v1 where (a = 12 and b = 10) ;
169-
--select a, sum(b), min(c) , max(d) from foo group by a order by a;
173+
--select a, sum(b), min(c) , max(d), stddev(b), stddev(e) from foo group by a order by a;
170174
--results should match above query
175+
CREATE OR REPLACE VIEW vfinal(a , sumb, minc, maxd, stddevb, stddeve)
176+
AS
171177
select a, _timescaledb_internal.finalize_agg( 'sum(numeric)', null, null, null, partialb, null::numeric ) sumb
172178
, _timescaledb_internal.finalize_agg( 'min(text)', 'pg_catalog', 'default', null, partialc, null::text ) minc
173-
, _timescaledb_internal.finalize_agg( 'max(timestamp with time zone)', null, null, null, partiald, null::timestamptz ) maxd from t1 group by a order by a ;
174-
a | sumb | minc | maxd
175-
----+------+-------+------------------------------
176-
1 | 150 | abc | Sun Jan 03 09:00:00 2010 PST
177-
2 | 60 | hello | Fri Jan 01 09:00:00 2010 PST
178-
3 | 90 | |
179-
11 | | hello | Sat Jan 02 06:00:00 2010 PST
180-
12 | 10 | hello | Sat Jan 02 06:00:00 2010 PST
179+
, _timescaledb_internal.finalize_agg( 'max(timestamp with time zone)', null, null, null, partiald, null::timestamptz ) maxd
180+
, _timescaledb_internal.finalize_agg( 'stddev(numeric)', null, null, null, partiale, null::numeric ) stddevb
181+
, _timescaledb_internal.finalize_agg( 'stddev(int8)', null, null, null, partialf, null::numeric ) stddeve
182+
from t1 group by a order by a ;
183+
SELECT * FROM vfinal;
184+
a | sumb | minc | maxd | stddevb | stddeve
185+
----+------+-------+------------------------------+---------------------+---------------------
186+
1 | 150 | abc | Sun Jan 03 09:00:00 2010 PST | 15.8113883008418967 | 15.8113883008418967
187+
2 | 60 | hello | Fri Jan 01 09:00:00 2010 PST | 10.0000000000000000 | 10.0000000000000000
188+
3 | 90 | | | 7.0710678118654752 | 7.0710678118654752
189+
11 | | hello | Sat Jan 02 06:00:00 2010 PST | |
190+
12 | 10 | hello | Sat Jan 02 06:00:00 2010 PST | |
181191
(5 rows)
182192

193+
CREATE TABLE vfinal_res AS SELECT * FROM vfinal;
194+
-- overwrite partials with dumped binary values from PostrgeSQL 13 --
195+
TRUNCATE TABLE t1;
196+
\COPY t1 FROM data/partialize_finalize_data.csv WITH CSV HEADER
197+
--repeat query to verify partial serialization sanitization works for versions PG >= 14
198+
CREATE TABLE vfinal_dump_res AS SELECT * FROM vfinal;
199+
-- compare results to verify there is no difference
200+
(SELECT * FROM vfinal_res) EXCEPT (SELECT * FROM vfinal_dump_res);
201+
a | sumb | minc | maxd | stddevb | stddeve
202+
---+------+------+------+---------+---------
203+
(0 rows)
204+
183205
--with having clause --
184206
select a, b , _timescaledb_internal.finalize_agg( 'min(text)', 'pg_catalog', 'default', null, partialc, null::text ) minc, _timescaledb_internal.finalize_agg( 'max(timestamp with time zone)', null, null, null, partiald, null::timestamptz ) maxd from t1 where b is not null group by a, b having _timescaledb_internal.finalize_agg( 'max(timestamp with time zone)', null, null, null, partiald, null::timestamptz ) is not null order by a, b;
185207
a | b | minc | maxd
@@ -195,6 +217,7 @@ select a, b , _timescaledb_internal.finalize_agg( 'min(text)', 'pg_catalog', 'd
195217
(8 rows)
196218

197219
--TEST5 test with TOAST data
220+
drop view vfinal;
198221
drop table t1;
199222
drop view v1;
200223
drop table foo;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
a,b,partialb,partialc,partiald,partiale,partialf
2+
1,10,\x00000000000000010001000000000000000a0000000000000000000000010000000000000000,\x68656c6c6f,\x00011f1c121ac400,\x00000000000000010001000000000000000a000100000000000000640000000000000000000000010000000000000000,\x00000000000000010001000000000000000a000100000000000000640000000000000000000000010000000000000000
3+
2,10,\x00000000000000010001000000000000000a0000000000000000000000010000000000000000,\x68656c6c6f,\x00011f1c121ac400,\x00000000000000010001000000000000000a000100000000000000640000000000000000000000010000000000000000,\x00000000000000010001000000000000000a000100000000000000640000000000000000000000010000000000000000
4+
1,30,\x00000000000000010001000000000000001e0000000000000000000000010000000000000000,\x61626364,\x00011f444dc98400,\x00000000000000010001000000000000001e000100000000000003840000000000000000000000010000000000000000,\x00000000000000010001000000000000001e000100000000000003840000000000000000000000010000000000000000
5+
12,,\x000000000000000000000000000000000000000000000000000000000000000000000000,,,\x0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000,\x0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
6+
1,40,\x0000000000000001000100000000000000280000000000000000000000010000000000000000,\x6162636465,,\x000000000000000100010000000000000028000100000000000006400000000000000000000000010000000000000000,\x000000000000000100010000000000000028000100000000000006400000000000000000000000010000000000000000
7+
1,50,\x0000000000000001000100000000000000320000000000000000000000010000000000000000,,\x00011f1c121ac400,\x000000000000000100010000000000000032000100000000000009c40000000000000000000000010000000000000000,\x000000000000000100010000000000000032000100000000000009c40000000000000000000000010000000000000000
8+
11,,\x000000000000000000000000000000000000000000000000000000000000000000000000,\x68656c6c6f,\x00011f2dac373800,\x0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000,\x0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
9+
2,20,\x0000000000000001000100000000000000140000000000000000000000010000000000000000,\x68656c6c6f,\x00011f1c121ac400,\x000000000000000100010000000000000014000100000000000001900000000000000000000000010000000000000000,\x000000000000000100010000000000000014000100000000000001900000000000000000000000010000000000000000
10+
3,40,\x0000000000000001000100000000000000280000000000000000000000010000000000000000,,,\x000000000000000100010000000000000028000100000000000006400000000000000000000000010000000000000000,\x000000000000000100010000000000000028000100000000000006400000000000000000000000010000000000000000
11+
3,50,\x0000000000000001000100000000000000320000000000000000000000010000000000000000,,,\x000000000000000100010000000000000032000100000000000009c40000000000000000000000010000000000000000,\x000000000000000100010000000000000032000100000000000009c40000000000000000000000010000000000000000
12+
2,30,\x00000000000000010001000000000000001e0000000000000000000000010000000000000000,\x68656c6c6f,\x00011f1c121ac400,\x00000000000000010001000000000000001e000100000000000003840000000000000000000000010000000000000000,\x00000000000000010001000000000000001e000100000000000003840000000000000000000000010000000000000000
13+
1,20,\x0000000000000001000100000000000000140000000000000000000000010000000000000000,\x616263,\x00011f302ff22400,\x000000000000000100010000000000000014000100000000000001900000000000000000000000010000000000000000,\x000000000000000100010000000000000014000100000000000001900000000000000000000000010000000000000000
14+
12,10,\x00000000000000010001000000000000000a0000000000000000000000010000000000000000,\x68656c6c6f,\x00011f2dac373800,\x00000000000000010001000000000000000a000100000000000000640000000000000000000000010000000000000000,\x00000000000000010001000000000000000a000100000000000000640000000000000000000000010000000000000000

0 commit comments

Comments
 (0)