Skip to content

Commit 4ba80c9

Browse files
feat(expr): add jsonb_populate_record(set) function (#13421)
Signed-off-by: Runji Wang <[email protected]>
1 parent 546773a commit 4ba80c9

File tree

9 files changed

+319
-33
lines changed

9 files changed

+319
-33
lines changed

proto/expr.proto

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -274,6 +274,8 @@ message ExprNode {
274274
JSONB_PATH_MATCH = 621;
275275
JSONB_PATH_QUERY_ARRAY = 622;
276276
JSONB_PATH_QUERY_FIRST = 623;
277+
JSONB_POPULATE_RECORD = 629;
278+
JSONB_TO_RECORD = 630;
277279

278280
// Non-pure functions below (> 1000)
279281
// ------------------------
@@ -328,6 +330,8 @@ message TableFunction {
328330
JSONB_EACH_TEXT = 13;
329331
JSONB_OBJECT_KEYS = 14;
330332
JSONB_PATH_QUERY = 15;
333+
JSONB_POPULATE_RECORDSET = 16;
334+
JSONB_TO_RECORDSET = 17;
331335
// User defined table function
332336
UDTF = 100;
333337
}

src/common/src/types/jsonb.rs

Lines changed: 106 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,9 @@ use bytes::Buf;
1919
use jsonbb::{Value, ValueRef};
2020
use risingwave_common_estimate_size::EstimateSize;
2121

22-
use crate::types::{Scalar, ScalarRef};
22+
use super::{Datum, IntoOrdered, ListValue, ScalarImpl, StructRef, ToOwnedDatum, F64};
23+
use crate::types::{DataType, Scalar, ScalarRef, StructType, StructValue};
24+
use crate::util::iter_util::ZipEqDebug;
2325

2426
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
2527
pub struct JsonbVal(pub(crate) Value);
@@ -297,11 +299,12 @@ impl<'a> JsonbRef<'a> {
297299
///
298300
/// According to RFC 8259, only number within IEEE 754 binary64 (double precision) has good
299301
/// interoperability. We do not support arbitrary precision like PostgreSQL `numeric` right now.
300-
pub fn as_number(&self) -> Result<f64, String> {
302+
pub fn as_number(&self) -> Result<F64, String> {
301303
self.0
302304
.as_number()
303305
.ok_or_else(|| format!("cannot cast jsonb {} to type number", self.type_name()))?
304306
.as_f64()
307+
.map(|f| f.into_ordered())
305308
.ok_or_else(|| "jsonb number out of range".into())
306309
}
307310

@@ -380,6 +383,107 @@ impl<'a> JsonbRef<'a> {
380383
self.0.serialize(&mut ser).map_err(|_| std::fmt::Error)
381384
}
382385

386+
/// Convert the jsonb value to a datum.
387+
pub fn to_datum(self, ty: &DataType) -> Result<Datum, String> {
388+
if !matches!(
389+
ty,
390+
DataType::Jsonb
391+
| DataType::Boolean
392+
| DataType::Int16
393+
| DataType::Int32
394+
| DataType::Int64
395+
| DataType::Float32
396+
| DataType::Float64
397+
| DataType::Varchar
398+
| DataType::List(_)
399+
| DataType::Struct(_)
400+
) {
401+
return Err(format!("cannot cast jsonb to {ty}"));
402+
}
403+
if self.0.as_null().is_some() {
404+
return Ok(None);
405+
}
406+
Ok(Some(match ty {
407+
DataType::Jsonb => ScalarImpl::Jsonb(self.into()),
408+
DataType::Boolean => ScalarImpl::Bool(self.as_bool()?),
409+
DataType::Int16 => ScalarImpl::Int16(self.as_number()?.try_into()?),
410+
DataType::Int32 => ScalarImpl::Int32(self.as_number()?.try_into()?),
411+
DataType::Int64 => ScalarImpl::Int64(self.as_number()?.try_into()?),
412+
DataType::Float32 => ScalarImpl::Float32(self.as_number()?.try_into()?),
413+
DataType::Float64 => ScalarImpl::Float64(self.as_number()?),
414+
DataType::Varchar => ScalarImpl::Utf8(self.force_string().into()),
415+
DataType::List(t) => ScalarImpl::List(self.to_list(t)?),
416+
DataType::Struct(s) => ScalarImpl::Struct(self.to_struct(s)?),
417+
_ => unreachable!(),
418+
}))
419+
}
420+
421+
/// Convert the jsonb value to a list value.
422+
pub fn to_list(self, elem_type: &DataType) -> Result<ListValue, String> {
423+
let array = self
424+
.0
425+
.as_array()
426+
.ok_or_else(|| format!("expected JSON array, but found {self}"))?;
427+
let mut builder = elem_type.create_array_builder(array.len());
428+
for v in array.iter() {
429+
builder.append(Self(v).to_datum(elem_type)?);
430+
}
431+
Ok(ListValue::new(builder.finish()))
432+
}
433+
434+
/// Convert the jsonb value to a struct value.
435+
pub fn to_struct(self, ty: &StructType) -> Result<StructValue, String> {
436+
let object = self.0.as_object().ok_or_else(|| {
437+
format!(
438+
"cannot call populate_composite on a jsonb {}",
439+
self.type_name()
440+
)
441+
})?;
442+
let mut fields = Vec::with_capacity(ty.len());
443+
for (name, ty) in ty.iter() {
444+
let datum = match object.get(name) {
445+
Some(v) => Self(v).to_datum(ty)?,
446+
None => None,
447+
};
448+
fields.push(datum);
449+
}
450+
Ok(StructValue::new(fields))
451+
}
452+
453+
/// Expands the top-level JSON object to a row having the struct type of the `base` argument.
454+
pub fn populate_struct(
455+
self,
456+
ty: &StructType,
457+
base: Option<StructRef<'_>>,
458+
) -> Result<StructValue, String> {
459+
let Some(base) = base else {
460+
return self.to_struct(ty);
461+
};
462+
let object = self.0.as_object().ok_or_else(|| {
463+
format!(
464+
"cannot call populate_composite on a jsonb {}",
465+
self.type_name()
466+
)
467+
})?;
468+
let mut fields = Vec::with_capacity(ty.len());
469+
for ((name, ty), base_field) in ty.iter().zip_eq_debug(base.iter_fields_ref()) {
470+
let datum = match object.get(name) {
471+
Some(v) => match ty {
472+
// recursively populate the nested struct
473+
DataType::Struct(s) => Some(
474+
Self(v)
475+
.populate_struct(s, base_field.map(|s| s.into_struct()))?
476+
.into(),
477+
),
478+
_ => Self(v).to_datum(ty)?,
479+
},
480+
None => base_field.to_owned_datum(),
481+
};
482+
fields.push(datum);
483+
}
484+
Ok(StructValue::new(fields))
485+
}
486+
383487
/// Returns the capacity of the underlying buffer.
384488
pub fn capacity(self) -> usize {
385489
self.0.capacity()

src/expr/impl/src/scalar/cast.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ use itertools::Itertools;
2121
use risingwave_common::array::{ArrayImpl, DataChunk, ListRef, ListValue, StructRef, StructValue};
2222
use risingwave_common::cast;
2323
use risingwave_common::row::OwnedRow;
24-
use risingwave_common::types::{Int256, IntoOrdered, JsonbRef, ToText, F64};
24+
use risingwave_common::types::{Int256, JsonbRef, ToText, F64};
2525
use risingwave_common::util::iter_util::ZipEqFast;
2626
use risingwave_expr::expr::{build_func, Context, ExpressionBoxExt, InputRefExpression};
2727
use risingwave_expr::{function, ExprError, Result};
@@ -79,7 +79,6 @@ pub fn jsonb_to_bool(v: JsonbRef<'_>) -> Result<bool> {
7979
pub fn jsonb_to_number<T: TryFrom<F64>>(v: JsonbRef<'_>) -> Result<T> {
8080
v.as_number()
8181
.map_err(|e| ExprError::Parse(e.into()))?
82-
.into_ordered()
8382
.try_into()
8483
.map_err(|_| ExprError::NumericOutOfRange)
8584
}
Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
// Copyright 2024 RisingWave Labs
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
use risingwave_common::types::{JsonbRef, StructRef, StructValue};
16+
use risingwave_expr::expr::Context;
17+
use risingwave_expr::{function, ExprError, Result};
18+
19+
/// Expands the top-level JSON object to a row having the composite type of the base argument.
20+
/// The JSON object is scanned for fields whose names match column names of the output row type,
21+
/// and their values are inserted into those columns of the output. (Fields that do not correspond
22+
/// to any output column name are ignored.) In typical use, the value of base is just NULL, which
23+
/// means that any output columns that do not match any object field will be filled with nulls.
24+
/// However, if base isn't NULL then the values it contains will be used for unmatched columns.
25+
///
26+
/// # Examples
27+
///
28+
/// ```slt
29+
/// query ITT
30+
/// select (jsonb_populate_record(
31+
/// null::struct<a int, b text[], c struct<d int, e text>>,
32+
/// '{"a": 1, "b": ["2", "a b"], "c": {"d": 4, "e": "a b c"}, "x": "foo"}'
33+
/// )).*;
34+
/// ----
35+
/// 1 {2,"a b"} (4,"a b c")
36+
///
37+
/// query ITT
38+
/// select (jsonb_populate_record(
39+
/// row(1, null, row(4, '5'))::struct<a int, b text[], c struct<d int, e text>>,
40+
/// '{"b": ["2", "a b"], "c": {"e": "a b c"}, "x": "foo"}'
41+
/// )).*;
42+
/// ----
43+
/// 1 {2,"a b"} (4,"a b c")
44+
/// ```
45+
#[function("jsonb_populate_record(struct, jsonb) -> struct")]
46+
fn jsonb_populate_record(
47+
base: Option<StructRef<'_>>,
48+
jsonb: JsonbRef<'_>,
49+
ctx: &Context,
50+
) -> Result<StructValue> {
51+
let output_type = ctx.return_type.as_struct();
52+
jsonb.populate_struct(output_type, base).map_err(parse_err)
53+
}
54+
55+
/// Expands the top-level JSON array of objects to a set of rows having the composite type of the
56+
/// base argument. Each element of the JSON array is processed as described above for
57+
/// `jsonb_populate_record`.
58+
///
59+
/// # Examples
60+
///
61+
/// ```slt
62+
/// query II
63+
/// select * from jsonb_populate_recordset(
64+
/// null::struct<a int, b int>,
65+
/// '[{"a":1,"b":2}, {"a":3,"b":4}]'::jsonb
66+
/// );
67+
/// ----
68+
/// 1 2
69+
/// 3 4
70+
///
71+
/// query II
72+
/// select * from jsonb_populate_recordset(
73+
/// row(0, 0)::struct<a int, b int>,
74+
/// '[{}, {"a":1}, {"b":2}, {"a":1,"b":2}]'::jsonb
75+
/// );
76+
/// ----
77+
/// 0 0
78+
/// 1 0
79+
/// 0 2
80+
/// 1 2
81+
/// ```
82+
#[function("jsonb_populate_recordset(struct, jsonb) -> setof struct")]
83+
fn jsonb_populate_recordset<'a>(
84+
base: Option<StructRef<'a>>,
85+
jsonb: JsonbRef<'a>,
86+
ctx: &'a Context,
87+
) -> Result<impl Iterator<Item = Result<StructValue>> + 'a> {
88+
let output_type = ctx.return_type.as_struct();
89+
Ok(jsonb
90+
.array_elements()
91+
.map_err(parse_err)?
92+
.map(move |elem| elem.populate_struct(output_type, base).map_err(parse_err)))
93+
}
94+
95+
/// Expands the top-level JSON object to a row having the composite type defined by an AS clause.
96+
/// The output record is filled from fields of the JSON object, in the same way as described above
97+
/// for `jsonb_populate_record`. Since there is no input record value, unmatched columns are always
98+
/// filled with nulls.
99+
///
100+
/// # Examples
101+
///
102+
/// // FIXME(runji): this query is blocked by parser and frontend support.
103+
/// ```slt,ignore
104+
/// query T
105+
/// select * from jsonb_to_record('{"a":1,"b":[1,2,3],"c":[1,2,3],"e":"bar","r": {"a": 123, "b": "a b c"}}')
106+
/// as x(a int, b text, c int[], d text, r struct<a int, b text>);
107+
/// ----
108+
/// 1 [1,2,3] {1,2,3} NULL (123,"a b c")
109+
/// ```
110+
#[function("jsonb_to_record(jsonb) -> struct", type_infer = "panic")]
111+
fn jsonb_to_record(jsonb: JsonbRef<'_>, ctx: &Context) -> Result<StructValue> {
112+
let output_type = ctx.return_type.as_struct();
113+
jsonb.to_struct(output_type).map_err(parse_err)
114+
}
115+
116+
/// Expands the top-level JSON array of objects to a set of rows having the composite type defined
117+
/// by an AS clause. Each element of the JSON array is processed as described above for
118+
/// `jsonb_populate_record`.
119+
///
120+
/// # Examples
121+
///
122+
/// // FIXME(runji): this query is blocked by parser and frontend support.
123+
/// ```slt,ignore
124+
/// query IT
125+
/// select * from jsonb_to_recordset('[{"a":1,"b":"foo"}, {"a":"2","c":"bar"}]') as x(a int, b text);
126+
/// ----
127+
/// 1 foo
128+
/// 2 NULL
129+
/// ```
130+
#[function("jsonb_to_recordset(jsonb) -> setof struct", type_infer = "panic")]
131+
fn jsonb_to_recordset<'a>(
132+
jsonb: JsonbRef<'a>,
133+
ctx: &'a Context,
134+
) -> Result<impl Iterator<Item = Result<StructValue>> + 'a> {
135+
let output_type = ctx.return_type.as_struct();
136+
Ok(jsonb
137+
.array_elements()
138+
.map_err(parse_err)?
139+
.map(|elem| elem.to_struct(output_type).map_err(parse_err)))
140+
}
141+
142+
/// Construct a parse error from String.
143+
fn parse_err(s: String) -> ExprError {
144+
ExprError::Parse(s.into())
145+
}

src/expr/impl/src/scalar/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ mod jsonb_delete;
5757
mod jsonb_info;
5858
mod jsonb_object;
5959
mod jsonb_path;
60+
mod jsonb_record;
6061
mod length;
6162
mod lower;
6263
mod make_time;

0 commit comments

Comments
 (0)