Skip to content

Commit a03820c

Browse files
committed
feat: support ||, position and overlay bytea functions
1 parent 7c53f07 commit a03820c

File tree

7 files changed

+289
-8
lines changed

7 files changed

+289
-8
lines changed

proto/expr.proto

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,7 @@ message ExprNode {
108108
CHAR_LENGTH = 225;
109109
REPEAT = 226;
110110
CONCAT_OP = 227;
111+
BYTEA_CONCAT_OP = 290;
111112
CONCAT = 286;
112113
CONCAT_VARIADIC = 287;
113114
// BOOL_OUT is different from CAST-bool-to-varchar in PostgreSQL.

src/expr/impl/src/scalar/concat_op.rs

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,34 @@ pub fn concat_op(left: &str, right: &str, writer: &mut impl Write) {
2222
writer.write_str(right).unwrap();
2323
}
2424

25+
/// Concatenates the two binary strings.
26+
///
27+
/// # Example
28+
///
29+
/// ```slt
30+
/// query I
31+
/// select '\x123456'::bytea || '\x789a00bcde'::bytea;
32+
/// ----
33+
/// \x123456789a00bcde
34+
///
35+
/// query I
36+
/// select '\x123456'::bytea || '\x789a00bcde';
37+
/// ----
38+
/// \x123456789a00bcde
39+
///
40+
/// query I
41+
/// select '\x123456'::bytea || ''::bytea;
42+
/// ----
43+
/// \x123456
44+
/// ```
45+
#[function("bytea_concat_op(bytea, bytea) -> bytea")]
46+
pub fn bytea_concat_op(left: &[u8], right: &[u8]) -> Box<[u8]> {
47+
let mut result = Vec::with_capacity(left.len() + right.len());
48+
result.extend_from_slice(left);
49+
result.extend_from_slice(right);
50+
result.into_boxed_slice()
51+
}
52+
2553
#[cfg(test)]
2654
mod tests {
2755
use super::*;
@@ -32,4 +60,12 @@ mod tests {
3260
concat_op("114", "514", &mut s);
3361
assert_eq!(s, "114514")
3462
}
63+
64+
#[test]
65+
fn test_bytea_concat_op() {
66+
let left = b"\x01\x02\x03";
67+
let right = b"\x04\x05";
68+
let result = bytea_concat_op(left, right);
69+
assert_eq!(&*result, b"\x01\x02\x03\x04\x05");
70+
}
3571
}

src/expr/impl/src/scalar/overlay.rs

Lines changed: 202 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,115 @@ pub fn overlay_for(
123123
Ok(())
124124
}
125125

126+
/// Replaces a subsequence of the given bytea with a new bytea value.
127+
///
128+
/// # Example
129+
///
130+
/// ```slt
131+
/// query T
132+
/// select overlay('\x616263646566'::bytea placing '\x9999'::bytea from 3);
133+
/// ----
134+
/// \x616299996566
135+
/// ```
136+
#[function("overlay(bytea, bytea, int4) -> bytea")]
137+
pub fn overlay_bytea(s: &[u8], new_sub_str: &[u8], start: i32) -> Result<Box<[u8]>> {
138+
let count = new_sub_str
139+
.len()
140+
.try_into()
141+
.map_err(|_| ExprError::NumericOutOfRange)?;
142+
overlay_for_bytea(s, new_sub_str, start, count)
143+
}
144+
145+
/// Replaces a range of bytes in a bytea value with another bytea.
146+
///
147+
/// ```slt
148+
/// statement error not positive
149+
/// select overlay('\x616263'::bytea placing '\x313233'::bytea from 0);
150+
///
151+
/// query T
152+
/// select overlay('\x616263'::bytea placing '\x313233'::bytea from 10);
153+
/// ----
154+
/// \x616263313233
155+
///
156+
/// query T
157+
/// select overlay('\x616263646566'::bytea placing '\x313233'::bytea from 4 for 2);
158+
/// ----
159+
/// \x61626331323366
160+
///
161+
/// query T
162+
/// select overlay('\x616263646566'::bytea placing '\x313233'::bytea from 4);
163+
/// ----
164+
/// \x616263313233
165+
///
166+
/// query T
167+
/// select overlay('\x616263646566'::bytea placing '\x313233'::bytea from 2 for 4);
168+
/// ----
169+
/// \x6131323366
170+
///
171+
/// query T
172+
/// select overlay('\x616263646566'::bytea placing '\x313233'::bytea from 2 for 7);
173+
/// ----
174+
/// \x61313233
175+
///
176+
/// query T
177+
/// select overlay('\x616263646566'::bytea placing '\x313233'::bytea from 4 for 0);
178+
/// ----
179+
/// \x616263313233646566
180+
///
181+
/// query T
182+
/// select overlay('\x616263646566'::bytea placing '\x313233'::bytea from 4 for -2);
183+
/// ----
184+
/// \x6162633132336263646566
185+
///
186+
/// query T
187+
/// select overlay('\x616263646566'::bytea placing '\x313233'::bytea from 4 for -1000);
188+
/// ----
189+
/// \x616263313233616263646566
190+
/// ```
191+
#[function("overlay(bytea, bytea, int4, int4) -> bytea")]
192+
pub fn overlay_for_bytea(
193+
s: &[u8],
194+
new_sub_str: &[u8],
195+
start: i32,
196+
count: i32,
197+
) -> Result<Box<[u8]>> {
198+
if start <= 0 {
199+
return Err(ExprError::InvalidParam {
200+
name: "start",
201+
reason: format!("{start} is not positive").into(),
202+
});
203+
}
204+
205+
// write the substring_bytea before the overlay.
206+
let start_idx = (start - 1) as usize;
207+
let mut result = Vec::with_capacity(s.len() + new_sub_str.len());
208+
if start_idx >= s.len() {
209+
result.extend_from_slice(s);
210+
} else {
211+
result.extend_from_slice(&s[..start_idx]);
212+
}
213+
214+
// write the new substring_bytea.
215+
result.extend_from_slice(new_sub_str);
216+
217+
if count < 0 {
218+
// For negative `count`, which is rare in practice, we hand over to `substr_bytea`
219+
let start_right = start
220+
.checked_add(count)
221+
.ok_or(ExprError::NumericOutOfRange)?;
222+
result.extend_from_slice(&super::substr::substr_start_bytea(s, start_right));
223+
return Ok(result.into_boxed_slice());
224+
};
225+
226+
// write the substring_bytea after the overlay.
227+
let count = count as usize;
228+
let skip_end = start_idx.saturating_add(count);
229+
if skip_end <= s.len() {
230+
result.extend_from_slice(&s[skip_end..]);
231+
}
232+
Ok(result.into_boxed_slice())
233+
}
234+
126235
#[cfg(test)]
127236
mod tests {
128237
use super::*;
@@ -158,4 +267,97 @@ mod tests {
158267
assert_eq!(writer, expected);
159268
}
160269
}
270+
271+
#[test]
272+
fn test_overlay_bytea() {
273+
// (input, replace, start, count, expected)
274+
case(
275+
b"\x61\x61\x61\x5f\x5f\x61\x61\x61",
276+
b"\x58\x59",
277+
4,
278+
None,
279+
b"\x61\x61\x61\x58\x59\x61\x61\x61",
280+
);
281+
// Place at end
282+
case(
283+
b"\x61\x61\x61",
284+
b"\x58\x59",
285+
4,
286+
None,
287+
b"\x61\x61\x61\x58\x59",
288+
);
289+
// Place at start
290+
case(
291+
b"\x61\x61\x61",
292+
b"\x58\x59",
293+
1,
294+
Some(0),
295+
b"\x58\x59\x61\x61\x61",
296+
);
297+
// Replace shorter string
298+
case(
299+
b"\x61\x61\x61\x5f\x61\x61\x61",
300+
b"\x58\x59\x5A",
301+
4,
302+
Some(1),
303+
b"\x61\x61\x61\x58\x59\x5A\x61\x61\x61",
304+
);
305+
case(
306+
b"\x61\x61\x61\x61\x61\x61",
307+
b"\x58\x59\x5A",
308+
4,
309+
Some(0),
310+
b"\x61\x61\x61\x58\x59\x5A\x61\x61\x61",
311+
);
312+
// Replace longer string
313+
case(
314+
b"\x61\x61\x61\x5f\x5f\x5f\x61\x61\x61",
315+
b"\x58",
316+
4,
317+
Some(3),
318+
b"\x61\x61\x61\x58\x61\x61\x61",
319+
);
320+
// start too large
321+
case(
322+
b"\x61\x61\x61",
323+
b"\x58\x59",
324+
123,
325+
None,
326+
b"\x61\x61\x61\x58\x59",
327+
);
328+
// count too small or large
329+
case(
330+
b"\x61\x61\x61",
331+
b"\x58",
332+
4,
333+
Some(-123),
334+
b"\x61\x61\x61\x58\x61\x61\x61",
335+
);
336+
case(
337+
b"\x61\x61\x61\x5f",
338+
b"\x58",
339+
4,
340+
Some(123),
341+
b"\x61\x61\x61\x58",
342+
);
343+
// very large start and count
344+
case(
345+
b"\x61\x61\x61",
346+
b"\x58",
347+
i32::MAX,
348+
Some(i32::MAX),
349+
b"\x61\x61\x61\x58",
350+
);
351+
352+
#[track_caller]
353+
fn case(s: &[u8], new_sub_str: &[u8], start: i32, count: Option<i32>, expected: &[u8]) {
354+
let result: Box<[u8]> = match count {
355+
None => overlay_bytea(s, new_sub_str, start),
356+
Some(count) => overlay_for_bytea(s, new_sub_str, start, count),
357+
}
358+
.unwrap();
359+
let expected_boxed: Box<[u8]> = expected.into();
360+
assert_eq!(result, expected_boxed);
361+
}
362+
}
161363
}

src/expr/impl/src/scalar/position.rs

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,42 @@ pub fn position(str: &str, sub_str: &str) -> i32 {
5454
}
5555
}
5656

57+
/// Returns the index of the first occurrence of the specified bytea substring in the input bytea,
58+
/// or zero if the substring is not present.
59+
///
60+
/// # Example
61+
///
62+
/// ```slt
63+
/// query I
64+
/// select position('\x6c6f'::bytea in '\x68656c6c6f2c20776f726c64'::bytea);
65+
/// ----
66+
/// 4
67+
///
68+
/// query I
69+
/// select position('\x6967'::bytea in '\x68696768'::bytea);
70+
/// ----
71+
/// 2
72+
///
73+
/// query I
74+
/// select position('\x64'::bytea in '\x616263'::bytea);
75+
/// ----
76+
/// 0
77+
/// ```
78+
#[function("position(bytea, bytea) -> int4")]
79+
pub fn bytea_position(bytea: &[u8], sub_bytea: &[u8]) -> i32 {
80+
if sub_bytea.is_empty() {
81+
return 1;
82+
}
83+
let mut i = 0;
84+
while i <= bytea.len().saturating_sub(sub_bytea.len()) {
85+
if &bytea[i..i + sub_bytea.len()] == sub_bytea {
86+
return (i + 1) as i32;
87+
}
88+
i += 1;
89+
}
90+
0
91+
}
92+
5793
#[cfg(test)]
5894
mod tests {
5995

@@ -71,4 +107,13 @@ mod tests {
71107
assert_eq!(position(str, sub_str), expected)
72108
}
73109
}
110+
111+
#[test]
112+
fn test_bytea_position() {
113+
let cases = [(b"\x01\x02\x03", b"\x03", 3), (b"\x01\x02\x03", b"\x04", 0)];
114+
115+
for (bytea, sub_bytea, expected) in cases {
116+
assert_eq!(bytea_position(bytea, sub_bytea), expected)
117+
}
118+
}
74119
}

src/frontend/src/binder/expr/binary_op.rs

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -180,14 +180,9 @@ impl Binder {
180180
| (None, Some(DataType::Jsonb)) => ExprType::JsonbConcat,
181181

182182
// bytea (and varbit, tsvector, tsquery)
183-
(Some(t @ DataType::Bytea), Some(DataType::Bytea))
184-
| (Some(t @ DataType::Bytea), None)
185-
| (None, Some(t @ DataType::Bytea)) => {
186-
return Err(ErrorCode::BindError(format!(
187-
"operator not implemented yet: {t} || {t}"
188-
))
189-
.into());
190-
}
183+
(Some(DataType::Bytea), Some(DataType::Bytea))
184+
| (Some(DataType::Bytea), None)
185+
| (None, Some(DataType::Bytea)) => ExprType::ByteaConcatOp,
191186

192187
// string concatenation
193188
(None, _) | (_, None) => ExprType::ConcatOp,

src/frontend/src/expr/pure.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@ impl ExprVisitor for ImpureAnalyzer {
105105
| Type::CharLength
106106
| Type::Repeat
107107
| Type::ConcatOp
108+
| Type::ByteaConcatOp
108109
| Type::Concat
109110
| Type::ConcatVariadic
110111
| Type::BoolOut

src/frontend/src/optimizer/plan_expr_visitor/strong.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,7 @@ impl Strong {
162162
| ExprType::CharLength
163163
| ExprType::Repeat
164164
| ExprType::ConcatOp
165+
| ExprType::ByteaConcatOp
165166
| ExprType::BoolOut
166167
| ExprType::OctetLength
167168
| ExprType::BitLength

0 commit comments

Comments
 (0)