Skip to content

Commit 0069678

Browse files
feat: add more string functions (risingwavelabs#8767)
Signed-off-by: Runji Wang <[email protected]>
1 parent f4e2bdc commit 0069678

File tree

4 files changed

+354
-0
lines changed

4 files changed

+354
-0
lines changed

proto/expr.proto

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,15 @@ message ExprNode {
9595
REGEXP_MATCH = 232;
9696
POW = 233;
9797
EXP = 234;
98+
CHR = 235;
99+
STARTS_WITH = 236;
100+
INITCAP = 237;
101+
LPAD = 238;
102+
RPAD = 239;
103+
REVERSE = 240;
104+
STRPOS = 241;
105+
TO_ASCII = 242;
106+
TO_HEX = 243;
98107

99108
// Boolean comparison
100109
IS_TRUE = 301;

src/expr/src/vector_op/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ pub mod repeat;
3636
pub mod replace;
3737
pub mod round;
3838
pub mod split_part;
39+
pub mod string;
3940
pub mod substr;
4041
pub mod timestamptz;
4142
pub mod to_char;

src/expr/src/vector_op/string.rs

Lines changed: 335 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,335 @@
1+
// Copyright 2023 RisingWave Labs
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
//! String functions
16+
//!
17+
//! <https://www.postgresql.org/docs/current/functions-string.html>
18+
19+
use std::fmt::Write;
20+
21+
use risingwave_expr_macro::function;
22+
23+
/// Returns the character with the specified Unicode code point.
24+
///
25+
/// # Example
26+
///
27+
/// ```slt
28+
/// query T
29+
/// select chr(65);
30+
/// ----
31+
/// A
32+
/// ```
33+
#[function("chr(int32) -> varchar")]
34+
pub fn chr(code: i32, writer: &mut dyn Write) {
35+
if let Some(c) = std::char::from_u32(code as u32) {
36+
write!(writer, "{}", c).unwrap();
37+
}
38+
}
39+
40+
/// Returns true if the given string starts with the specified prefix.
41+
///
42+
/// # Example
43+
///
44+
/// ```slt
45+
/// query T
46+
/// select starts_with('abcdef', 'abc');
47+
/// ----
48+
/// t
49+
/// ```
50+
#[function("starts_with(varchar, varchar) -> boolean")]
51+
pub fn starts_with(s: &str, prefix: &str) -> bool {
52+
s.starts_with(prefix)
53+
}
54+
55+
/// Capitalizes the first letter of each word in the given string.
56+
///
57+
/// # Example
58+
///
59+
/// ```slt
60+
/// query T
61+
/// select initcap('the quick brown fox');
62+
/// ----
63+
/// The Quick Brown Fox
64+
/// ```
65+
#[function("initcap(varchar) -> varchar")]
66+
pub fn initcap(s: &str, writer: &mut dyn Write) {
67+
let mut capitalize_next = true;
68+
for c in s.chars() {
69+
if capitalize_next {
70+
write!(writer, "{}", c.to_uppercase()).unwrap();
71+
capitalize_next = false;
72+
} else {
73+
write!(writer, "{}", c.to_lowercase()).unwrap();
74+
}
75+
if c.is_whitespace() {
76+
capitalize_next = true;
77+
}
78+
}
79+
}
80+
81+
/// Extends the given string on the left until it is at least the specified length,
82+
/// using the specified fill character (or a space by default).
83+
///
84+
/// # Example
85+
///
86+
/// ```slt
87+
/// query T
88+
/// select lpad('abc', 5);
89+
/// ----
90+
/// abc
91+
///
92+
/// query T
93+
/// select lpad('abcdef', 3);
94+
/// ----
95+
/// abc
96+
/// ```
97+
#[function("lpad(varchar, int32) -> varchar")]
98+
pub fn lpad(s: &str, length: i32, writer: &mut dyn Write) {
99+
lpad_fill(s, length, " ", writer);
100+
}
101+
102+
/// Extends the string to the specified length by prepending the characters fill.
103+
/// If the string is already longer than the specified length, it is truncated on the right.
104+
///
105+
/// # Example
106+
///
107+
/// ```slt
108+
/// query T
109+
/// select lpad('hi', 5, 'xy');
110+
/// ----
111+
/// xyxhi
112+
/// ```
113+
#[function("lpad(varchar, int32, varchar) -> varchar")]
114+
pub fn lpad_fill(s: &str, length: i32, fill: &str, writer: &mut dyn Write) {
115+
let s_len = s.chars().count();
116+
let fill_len = fill.chars().count();
117+
118+
if length <= 0 {
119+
return;
120+
}
121+
if s_len >= length as usize {
122+
for c in s.chars().take(length as usize) {
123+
write!(writer, "{c}").unwrap();
124+
}
125+
} else {
126+
let mut remaining_length = length as usize - s_len;
127+
while remaining_length >= fill_len {
128+
write!(writer, "{fill}").unwrap();
129+
remaining_length -= fill_len;
130+
}
131+
for c in fill.chars().take(remaining_length) {
132+
write!(writer, "{c}").unwrap();
133+
}
134+
write!(writer, "{s}").unwrap();
135+
}
136+
}
137+
138+
/// Extends the given string on the right until it is at least the specified length,
139+
/// using the specified fill character (or a space by default).
140+
///
141+
/// # Example
142+
///
143+
/// ```slt
144+
/// query T
145+
/// select rpad('abc', 5);
146+
/// ----
147+
/// abc
148+
///
149+
/// query T
150+
/// select rpad('abcdef', 3);
151+
/// ----
152+
/// abc
153+
/// ```
154+
#[function("rpad(varchar, int32) -> varchar")]
155+
pub fn rpad(s: &str, length: i32, writer: &mut dyn Write) {
156+
rpad_fill(s, length, " ", writer);
157+
}
158+
159+
/// Extends the given string on the right until it is at least the specified length,
160+
/// using the specified fill string, truncating the string if it is already longer
161+
/// than the specified length.
162+
///
163+
/// # Example
164+
///
165+
/// ```slt
166+
/// query T
167+
/// select rpad('hi', 5, 'xy');
168+
/// ----
169+
/// hixyx
170+
///
171+
/// query T
172+
/// select rpad('abc', 5, '😀');
173+
/// ----
174+
/// abc😀😀
175+
///
176+
/// query T
177+
/// select rpad('abcdef', 3, '0');
178+
/// ----
179+
/// abc
180+
#[function("rpad(varchar, int32, varchar) -> varchar")]
181+
pub fn rpad_fill(s: &str, length: i32, fill: &str, writer: &mut dyn Write) {
182+
let s_len = s.chars().count();
183+
let fill_len = fill.chars().count();
184+
185+
if length <= 0 {
186+
return;
187+
}
188+
189+
if s_len >= length as usize {
190+
for c in s.chars().take(length as usize) {
191+
write!(writer, "{c}").unwrap();
192+
}
193+
} else {
194+
write!(writer, "{s}").unwrap();
195+
let mut remaining_length = length as usize - s_len;
196+
while remaining_length >= fill_len {
197+
write!(writer, "{fill}").unwrap();
198+
remaining_length -= fill_len;
199+
}
200+
for c in fill.chars().take(remaining_length) {
201+
write!(writer, "{c}").unwrap();
202+
}
203+
}
204+
}
205+
206+
/// Reverses the characters in the given string.
207+
///
208+
/// # Example
209+
///
210+
/// ```slt
211+
/// query T
212+
/// select reverse('abcdef');
213+
/// ----
214+
/// fedcba
215+
/// ```
216+
#[function("reverse(varchar) -> varchar")]
217+
pub fn reverse(s: &str, writer: &mut dyn Write) {
218+
for c in s.chars().rev() {
219+
write!(writer, "{}", c).unwrap();
220+
}
221+
}
222+
223+
/// Returns the index of the first occurrence of the specified substring in the input string,
224+
/// or zero if the substring is not present.
225+
///
226+
/// # Example
227+
///
228+
/// ```slt
229+
/// query T
230+
/// select strpos('hello, world', 'lo');
231+
/// ----
232+
/// 4
233+
///
234+
/// query T
235+
/// select strpos('high', 'ig');
236+
/// ----
237+
/// 2
238+
///
239+
/// query T
240+
/// select strpos('abc', 'def');
241+
/// ----
242+
/// 0
243+
/// ```
244+
#[function("strpos(varchar, varchar) -> int32")]
245+
pub fn strpos(s: &str, substr: &str) -> i32 {
246+
if let Some(pos) = s.find(substr) {
247+
pos as i32 + 1
248+
} else {
249+
0
250+
}
251+
}
252+
253+
/// Converts the input string to ASCII by dropping accents, assuming that the input string
254+
/// is encoded in one of the supported encodings (Latin1, Latin2, Latin9, or WIN1250).
255+
///
256+
/// # Example
257+
///
258+
/// ```slt
259+
/// query T
260+
/// select to_ascii('Karél');
261+
/// ----
262+
/// Karel
263+
/// ```
264+
#[function("to_ascii(varchar) -> varchar")]
265+
pub fn to_ascii(s: &str, writer: &mut dyn Write) {
266+
for c in s.chars() {
267+
let ascii = match c {
268+
'Á' | 'À' | 'Â' | 'Ã' => 'A',
269+
'á' | 'à' | 'â' | 'ã' => 'a',
270+
'Č' | 'Ć' | 'Ç' => 'C',
271+
'č' | 'ć' | 'ç' => 'c',
272+
'Ď' => 'D',
273+
'ď' => 'd',
274+
'É' | 'È' | 'Ê' | 'Ẽ' => 'E',
275+
'é' | 'è' | 'ê' | 'ẽ' => 'e',
276+
'Í' | 'Ì' | 'Î' | 'Ĩ' => 'I',
277+
'í' | 'ì' | 'î' | 'ĩ' => 'i',
278+
'Ľ' => 'L',
279+
'ľ' => 'l',
280+
'Ň' => 'N',
281+
'ň' => 'n',
282+
'Ó' | 'Ò' | 'Ô' | 'Õ' => 'O',
283+
'ó' | 'ò' | 'ô' | 'õ' => 'o',
284+
'Ŕ' => 'R',
285+
'ŕ' => 'r',
286+
'Š' | 'Ś' => 'S',
287+
'š' | 'ś' => 's',
288+
'Ť' => 'T',
289+
'ť' => 't',
290+
'Ú' | 'Ù' | 'Û' | 'Ũ' => 'U',
291+
'ú' | 'ù' | 'û' | 'ũ' => 'u',
292+
'Ý' | 'Ỳ' => 'Y',
293+
'ý' | 'ỳ' => 'y',
294+
'Ž' | 'Ź' | 'Ż' => 'Z',
295+
'ž' | 'ź' | 'ż' => 'z',
296+
_ => c,
297+
};
298+
write!(writer, "{}", ascii).unwrap();
299+
}
300+
}
301+
302+
/// Converts the given integer to its equivalent hexadecimal representation.
303+
///
304+
/// # Example
305+
///
306+
/// ```slt
307+
/// query T
308+
/// select to_hex(2147483647);
309+
/// ----
310+
/// 7fffffff
311+
///
312+
/// query T
313+
/// select to_hex(-2147483648);
314+
/// ----
315+
/// 80000000
316+
///
317+
/// query T
318+
/// select to_hex(9223372036854775807);
319+
/// ----
320+
/// 7fffffffffffffff
321+
///
322+
/// query T
323+
/// select to_hex(-9223372036854775808);
324+
/// ----
325+
/// 8000000000000000
326+
/// ```
327+
#[function("to_hex(int32) -> varchar")]
328+
pub fn to_hex_i32(n: i32, writer: &mut dyn Write) {
329+
write!(writer, "{:x}", n).unwrap();
330+
}
331+
332+
#[function("to_hex(int64) -> varchar")]
333+
pub fn to_hex_i64(n: i64, writer: &mut dyn Write) {
334+
write!(writer, "{:x}", n).unwrap();
335+
}

src/frontend/src/binder/expr/function.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -380,6 +380,15 @@ impl Binder {
380380
("octet_length", raw_call(ExprType::OctetLength)),
381381
("bit_length", raw_call(ExprType::BitLength)),
382382
("regexp_match", raw_call(ExprType::RegexpMatch)),
383+
("chr", raw_call(ExprType::Chr)),
384+
("starts_with", raw_call(ExprType::StartsWith)),
385+
("initcap", raw_call(ExprType::Initcap)),
386+
("lpad", raw_call(ExprType::Lpad)),
387+
("rpad", raw_call(ExprType::Rpad)),
388+
("reverse", raw_call(ExprType::Reverse)),
389+
("strpos", raw_call(ExprType::Strpos)),
390+
("to_ascii", raw_call(ExprType::ToAscii)),
391+
("to_hex", raw_call(ExprType::ToHex)),
383392
// array
384393
("array_cat", raw_call(ExprType::ArrayCat)),
385394
("array_append", raw_call(ExprType::ArrayAppend)),

0 commit comments

Comments
 (0)