|
| 1 | +// Copyright 2023 RisingWave Labs |
| 2 | +// |
| 3 | +// Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | +// you may not use this file except in compliance with the License. |
| 5 | +// You may obtain a copy of the License at |
| 6 | +// |
| 7 | +// http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | +// |
| 9 | +// Unless required by applicable law or agreed to in writing, software |
| 10 | +// distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | +// See the License for the specific language governing permissions and |
| 13 | +// limitations under the License. |
| 14 | + |
| 15 | +//! String functions |
| 16 | +//! |
| 17 | +//! <https://www.postgresql.org/docs/current/functions-string.html> |
| 18 | +
|
| 19 | +use std::fmt::Write; |
| 20 | + |
| 21 | +use risingwave_expr_macro::function; |
| 22 | + |
| 23 | +/// Returns the character with the specified Unicode code point. |
| 24 | +/// |
| 25 | +/// # Example |
| 26 | +/// |
| 27 | +/// ```slt |
| 28 | +/// query T |
| 29 | +/// select chr(65); |
| 30 | +/// ---- |
| 31 | +/// A |
| 32 | +/// ``` |
| 33 | +#[function("chr(int32) -> varchar")] |
| 34 | +pub fn chr(code: i32, writer: &mut dyn Write) { |
| 35 | + if let Some(c) = std::char::from_u32(code as u32) { |
| 36 | + write!(writer, "{}", c).unwrap(); |
| 37 | + } |
| 38 | +} |
| 39 | + |
| 40 | +/// Returns true if the given string starts with the specified prefix. |
| 41 | +/// |
| 42 | +/// # Example |
| 43 | +/// |
| 44 | +/// ```slt |
| 45 | +/// query T |
| 46 | +/// select starts_with('abcdef', 'abc'); |
| 47 | +/// ---- |
| 48 | +/// t |
| 49 | +/// ``` |
| 50 | +#[function("starts_with(varchar, varchar) -> boolean")] |
| 51 | +pub fn starts_with(s: &str, prefix: &str) -> bool { |
| 52 | + s.starts_with(prefix) |
| 53 | +} |
| 54 | + |
| 55 | +/// Capitalizes the first letter of each word in the given string. |
| 56 | +/// |
| 57 | +/// # Example |
| 58 | +/// |
| 59 | +/// ```slt |
| 60 | +/// query T |
| 61 | +/// select initcap('the quick brown fox'); |
| 62 | +/// ---- |
| 63 | +/// The Quick Brown Fox |
| 64 | +/// ``` |
| 65 | +#[function("initcap(varchar) -> varchar")] |
| 66 | +pub fn initcap(s: &str, writer: &mut dyn Write) { |
| 67 | + let mut capitalize_next = true; |
| 68 | + for c in s.chars() { |
| 69 | + if capitalize_next { |
| 70 | + write!(writer, "{}", c.to_uppercase()).unwrap(); |
| 71 | + capitalize_next = false; |
| 72 | + } else { |
| 73 | + write!(writer, "{}", c.to_lowercase()).unwrap(); |
| 74 | + } |
| 75 | + if c.is_whitespace() { |
| 76 | + capitalize_next = true; |
| 77 | + } |
| 78 | + } |
| 79 | +} |
| 80 | + |
| 81 | +/// Extends the given string on the left until it is at least the specified length, |
| 82 | +/// using the specified fill character (or a space by default). |
| 83 | +/// |
| 84 | +/// # Example |
| 85 | +/// |
| 86 | +/// ```slt |
| 87 | +/// query T |
| 88 | +/// select lpad('abc', 5); |
| 89 | +/// ---- |
| 90 | +/// abc |
| 91 | +/// |
| 92 | +/// query T |
| 93 | +/// select lpad('abcdef', 3); |
| 94 | +/// ---- |
| 95 | +/// abc |
| 96 | +/// ``` |
| 97 | +#[function("lpad(varchar, int32) -> varchar")] |
| 98 | +pub fn lpad(s: &str, length: i32, writer: &mut dyn Write) { |
| 99 | + lpad_fill(s, length, " ", writer); |
| 100 | +} |
| 101 | + |
| 102 | +/// Extends the string to the specified length by prepending the characters fill. |
| 103 | +/// If the string is already longer than the specified length, it is truncated on the right. |
| 104 | +/// |
| 105 | +/// # Example |
| 106 | +/// |
| 107 | +/// ```slt |
| 108 | +/// query T |
| 109 | +/// select lpad('hi', 5, 'xy'); |
| 110 | +/// ---- |
| 111 | +/// xyxhi |
| 112 | +/// ``` |
| 113 | +#[function("lpad(varchar, int32, varchar) -> varchar")] |
| 114 | +pub fn lpad_fill(s: &str, length: i32, fill: &str, writer: &mut dyn Write) { |
| 115 | + let s_len = s.chars().count(); |
| 116 | + let fill_len = fill.chars().count(); |
| 117 | + |
| 118 | + if length <= 0 { |
| 119 | + return; |
| 120 | + } |
| 121 | + if s_len >= length as usize { |
| 122 | + for c in s.chars().take(length as usize) { |
| 123 | + write!(writer, "{c}").unwrap(); |
| 124 | + } |
| 125 | + } else { |
| 126 | + let mut remaining_length = length as usize - s_len; |
| 127 | + while remaining_length >= fill_len { |
| 128 | + write!(writer, "{fill}").unwrap(); |
| 129 | + remaining_length -= fill_len; |
| 130 | + } |
| 131 | + for c in fill.chars().take(remaining_length) { |
| 132 | + write!(writer, "{c}").unwrap(); |
| 133 | + } |
| 134 | + write!(writer, "{s}").unwrap(); |
| 135 | + } |
| 136 | +} |
| 137 | + |
| 138 | +/// Extends the given string on the right until it is at least the specified length, |
| 139 | +/// using the specified fill character (or a space by default). |
| 140 | +/// |
| 141 | +/// # Example |
| 142 | +/// |
| 143 | +/// ```slt |
| 144 | +/// query T |
| 145 | +/// select rpad('abc', 5); |
| 146 | +/// ---- |
| 147 | +/// abc |
| 148 | +/// |
| 149 | +/// query T |
| 150 | +/// select rpad('abcdef', 3); |
| 151 | +/// ---- |
| 152 | +/// abc |
| 153 | +/// ``` |
| 154 | +#[function("rpad(varchar, int32) -> varchar")] |
| 155 | +pub fn rpad(s: &str, length: i32, writer: &mut dyn Write) { |
| 156 | + rpad_fill(s, length, " ", writer); |
| 157 | +} |
| 158 | + |
| 159 | +/// Extends the given string on the right until it is at least the specified length, |
| 160 | +/// using the specified fill string, truncating the string if it is already longer |
| 161 | +/// than the specified length. |
| 162 | +/// |
| 163 | +/// # Example |
| 164 | +/// |
| 165 | +/// ```slt |
| 166 | +/// query T |
| 167 | +/// select rpad('hi', 5, 'xy'); |
| 168 | +/// ---- |
| 169 | +/// hixyx |
| 170 | +/// |
| 171 | +/// query T |
| 172 | +/// select rpad('abc', 5, '😀'); |
| 173 | +/// ---- |
| 174 | +/// abc😀😀 |
| 175 | +/// |
| 176 | +/// query T |
| 177 | +/// select rpad('abcdef', 3, '0'); |
| 178 | +/// ---- |
| 179 | +/// abc |
| 180 | +#[function("rpad(varchar, int32, varchar) -> varchar")] |
| 181 | +pub fn rpad_fill(s: &str, length: i32, fill: &str, writer: &mut dyn Write) { |
| 182 | + let s_len = s.chars().count(); |
| 183 | + let fill_len = fill.chars().count(); |
| 184 | + |
| 185 | + if length <= 0 { |
| 186 | + return; |
| 187 | + } |
| 188 | + |
| 189 | + if s_len >= length as usize { |
| 190 | + for c in s.chars().take(length as usize) { |
| 191 | + write!(writer, "{c}").unwrap(); |
| 192 | + } |
| 193 | + } else { |
| 194 | + write!(writer, "{s}").unwrap(); |
| 195 | + let mut remaining_length = length as usize - s_len; |
| 196 | + while remaining_length >= fill_len { |
| 197 | + write!(writer, "{fill}").unwrap(); |
| 198 | + remaining_length -= fill_len; |
| 199 | + } |
| 200 | + for c in fill.chars().take(remaining_length) { |
| 201 | + write!(writer, "{c}").unwrap(); |
| 202 | + } |
| 203 | + } |
| 204 | +} |
| 205 | + |
| 206 | +/// Reverses the characters in the given string. |
| 207 | +/// |
| 208 | +/// # Example |
| 209 | +/// |
| 210 | +/// ```slt |
| 211 | +/// query T |
| 212 | +/// select reverse('abcdef'); |
| 213 | +/// ---- |
| 214 | +/// fedcba |
| 215 | +/// ``` |
| 216 | +#[function("reverse(varchar) -> varchar")] |
| 217 | +pub fn reverse(s: &str, writer: &mut dyn Write) { |
| 218 | + for c in s.chars().rev() { |
| 219 | + write!(writer, "{}", c).unwrap(); |
| 220 | + } |
| 221 | +} |
| 222 | + |
| 223 | +/// Returns the index of the first occurrence of the specified substring in the input string, |
| 224 | +/// or zero if the substring is not present. |
| 225 | +/// |
| 226 | +/// # Example |
| 227 | +/// |
| 228 | +/// ```slt |
| 229 | +/// query T |
| 230 | +/// select strpos('hello, world', 'lo'); |
| 231 | +/// ---- |
| 232 | +/// 4 |
| 233 | +/// |
| 234 | +/// query T |
| 235 | +/// select strpos('high', 'ig'); |
| 236 | +/// ---- |
| 237 | +/// 2 |
| 238 | +/// |
| 239 | +/// query T |
| 240 | +/// select strpos('abc', 'def'); |
| 241 | +/// ---- |
| 242 | +/// 0 |
| 243 | +/// ``` |
| 244 | +#[function("strpos(varchar, varchar) -> int32")] |
| 245 | +pub fn strpos(s: &str, substr: &str) -> i32 { |
| 246 | + if let Some(pos) = s.find(substr) { |
| 247 | + pos as i32 + 1 |
| 248 | + } else { |
| 249 | + 0 |
| 250 | + } |
| 251 | +} |
| 252 | + |
| 253 | +/// Converts the input string to ASCII by dropping accents, assuming that the input string |
| 254 | +/// is encoded in one of the supported encodings (Latin1, Latin2, Latin9, or WIN1250). |
| 255 | +/// |
| 256 | +/// # Example |
| 257 | +/// |
| 258 | +/// ```slt |
| 259 | +/// query T |
| 260 | +/// select to_ascii('Karél'); |
| 261 | +/// ---- |
| 262 | +/// Karel |
| 263 | +/// ``` |
| 264 | +#[function("to_ascii(varchar) -> varchar")] |
| 265 | +pub fn to_ascii(s: &str, writer: &mut dyn Write) { |
| 266 | + for c in s.chars() { |
| 267 | + let ascii = match c { |
| 268 | + 'Á' | 'À' | 'Â' | 'Ã' => 'A', |
| 269 | + 'á' | 'à' | 'â' | 'ã' => 'a', |
| 270 | + 'Č' | 'Ć' | 'Ç' => 'C', |
| 271 | + 'č' | 'ć' | 'ç' => 'c', |
| 272 | + 'Ď' => 'D', |
| 273 | + 'ď' => 'd', |
| 274 | + 'É' | 'È' | 'Ê' | 'Ẽ' => 'E', |
| 275 | + 'é' | 'è' | 'ê' | 'ẽ' => 'e', |
| 276 | + 'Í' | 'Ì' | 'Î' | 'Ĩ' => 'I', |
| 277 | + 'í' | 'ì' | 'î' | 'ĩ' => 'i', |
| 278 | + 'Ľ' => 'L', |
| 279 | + 'ľ' => 'l', |
| 280 | + 'Ň' => 'N', |
| 281 | + 'ň' => 'n', |
| 282 | + 'Ó' | 'Ò' | 'Ô' | 'Õ' => 'O', |
| 283 | + 'ó' | 'ò' | 'ô' | 'õ' => 'o', |
| 284 | + 'Ŕ' => 'R', |
| 285 | + 'ŕ' => 'r', |
| 286 | + 'Š' | 'Ś' => 'S', |
| 287 | + 'š' | 'ś' => 's', |
| 288 | + 'Ť' => 'T', |
| 289 | + 'ť' => 't', |
| 290 | + 'Ú' | 'Ù' | 'Û' | 'Ũ' => 'U', |
| 291 | + 'ú' | 'ù' | 'û' | 'ũ' => 'u', |
| 292 | + 'Ý' | 'Ỳ' => 'Y', |
| 293 | + 'ý' | 'ỳ' => 'y', |
| 294 | + 'Ž' | 'Ź' | 'Ż' => 'Z', |
| 295 | + 'ž' | 'ź' | 'ż' => 'z', |
| 296 | + _ => c, |
| 297 | + }; |
| 298 | + write!(writer, "{}", ascii).unwrap(); |
| 299 | + } |
| 300 | +} |
| 301 | + |
| 302 | +/// Converts the given integer to its equivalent hexadecimal representation. |
| 303 | +/// |
| 304 | +/// # Example |
| 305 | +/// |
| 306 | +/// ```slt |
| 307 | +/// query T |
| 308 | +/// select to_hex(2147483647); |
| 309 | +/// ---- |
| 310 | +/// 7fffffff |
| 311 | +/// |
| 312 | +/// query T |
| 313 | +/// select to_hex(-2147483648); |
| 314 | +/// ---- |
| 315 | +/// 80000000 |
| 316 | +/// |
| 317 | +/// query T |
| 318 | +/// select to_hex(9223372036854775807); |
| 319 | +/// ---- |
| 320 | +/// 7fffffffffffffff |
| 321 | +/// |
| 322 | +/// query T |
| 323 | +/// select to_hex(-9223372036854775808); |
| 324 | +/// ---- |
| 325 | +/// 8000000000000000 |
| 326 | +/// ``` |
| 327 | +#[function("to_hex(int32) -> varchar")] |
| 328 | +pub fn to_hex_i32(n: i32, writer: &mut dyn Write) { |
| 329 | + write!(writer, "{:x}", n).unwrap(); |
| 330 | +} |
| 331 | + |
| 332 | +#[function("to_hex(int64) -> varchar")] |
| 333 | +pub fn to_hex_i64(n: i64, writer: &mut dyn Write) { |
| 334 | + write!(writer, "{:x}", n).unwrap(); |
| 335 | +} |
0 commit comments