Skip to content

Commit b31fcc6

Browse files
authored
refactor(padding): follow python string padding conventions (#10096)
BREAKING CHANGE: String padding operations now follow Python semantics and leave strings greater than the padding length untouched.
1 parent e50642d commit b31fcc6

File tree

16 files changed

+206
-38
lines changed

16 files changed

+206
-38
lines changed
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
SELECT
2-
LPAD(`t0`.`string_col`, 1, 'a') AS `LPad(string_col, 1, 'a')`
2+
LPAD(`t0`.`string_col`, GREATEST(LENGTH(`t0`.`string_col`), 1), 'a') AS `LPad(string_col, 1, 'a')`
33
FROM `functional_alltypes` AS `t0`
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
SELECT
2-
LPAD(`t0`.`string_col`, 25, ' ') AS `LPad(string_col, 25, ' ')`
2+
LPAD(`t0`.`string_col`, GREATEST(LENGTH(`t0`.`string_col`), 25), ' ') AS `LPad(string_col, 25, ' ')`
33
FROM `functional_alltypes` AS `t0`
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
SELECT
2-
RPAD(`t0`.`string_col`, 1, 'a') AS `RPad(string_col, 1, 'a')`
2+
RPAD(`t0`.`string_col`, GREATEST(LENGTH(`t0`.`string_col`), 1), 'a') AS `RPad(string_col, 1, 'a')`
33
FROM `functional_alltypes` AS `t0`
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
SELECT
2-
RPAD(`t0`.`string_col`, 25, ' ') AS `RPad(string_col, 25, ' ')`
2+
RPAD(`t0`.`string_col`, GREATEST(LENGTH(`t0`.`string_col`), 25), ' ') AS `RPad(string_col, 25, ' ')`
33
FROM `functional_alltypes` AS `t0`

ibis/backends/impala/tests/test_exprs.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -295,9 +295,9 @@ def test_decimal_builtins_2(con, func, expected):
295295
(L("0123").translate("012", "abc"), "abc3"),
296296
(L("abcd").find("a"), 0),
297297
(L("baaaab").find("b", 2), 5),
298-
(L("abcd").lpad(1, "-"), "a"),
298+
(L("abcd").lpad(1, "-"), "abcd"),
299299
(L("abcd").lpad(5), " abcd"),
300-
(L("abcd").rpad(1, "-"), "a"),
300+
(L("abcd").rpad(1, "-"), "abcd"),
301301
(L("abcd").rpad(5), "abcd "),
302302
(L("abcd").find_in_set(["a", "b", "abcd"]), 2),
303303
(L(", ").join(["a", "b"]), "a, b"),

ibis/backends/sql/compilers/base.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -331,7 +331,6 @@ class SQLGlotCompiler(abc.ABC):
331331
ops.IsInf: "isinf",
332332
ops.IsNan: "isnan",
333333
ops.JSONGetItem: "json_extract",
334-
ops.LPad: "lpad",
335334
LastValue: "last_value",
336335
ops.Levenshtein: "levenshtein",
337336
ops.Ln: "ln",
@@ -347,7 +346,6 @@ class SQLGlotCompiler(abc.ABC):
347346
ops.PercentRank: "percent_rank",
348347
ops.Pi: "pi",
349348
ops.Power: "pow",
350-
ops.RPad: "rpad",
351349
ops.Radians: "radians",
352350
ops.RegexSearch: "regexp_like",
353351
ops.RegexSplit: "regexp_split",
@@ -985,6 +983,12 @@ def visit_RStrip(self, op, *, arg):
985983
def visit_LStrip(self, op, *, arg):
986984
return self.f.ltrim(arg, string.whitespace)
987985

986+
def visit_LPad(self, op, *, arg, length, pad):
987+
return self.f.lpad(arg, self.f.greatest(self.f.length(arg), length), pad)
988+
989+
def visit_RPad(self, op, *, arg, length, pad):
990+
return self.f.rpad(arg, self.f.greatest(self.f.length(arg), length), pad)
991+
988992
def visit_Substring(self, op, *, arg, start, length):
989993
if isinstance(op.length, ops.Literal) and (value := op.length.value) < 0:
990994
raise com.IbisInputError(

ibis/backends/sql/compilers/bigquery/__init__.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -183,8 +183,6 @@ class BigQueryCompiler(SQLGlotCompiler):
183183
ops.IsInf: "is_inf",
184184
ops.IsNan: "is_nan",
185185
ops.Log10: "log10",
186-
ops.LPad: "lpad",
187-
ops.RPad: "rpad",
188186
ops.Levenshtein: "edit_distance",
189187
ops.Modulus: "mod",
190188
ops.RegexReplace: "regexp_replace",

ibis/backends/sql/compilers/clickhouse.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -480,6 +480,16 @@ def visit_Strip(self, op, *, arg):
480480
this=arg, position="BOTH", expression=sge.Literal.string(whitespace)
481481
)
482482

483+
def visit_LPad(self, op, *, arg, length, pad):
484+
return self.f.leftPadUTF8(
485+
arg, self.f.greatest(self.f.lengthUTF8(arg), length), pad
486+
)
487+
488+
def visit_RPad(self, op, *, arg, length, pad):
489+
return self.f.rightPadUTF8(
490+
arg, self.f.greatest(self.f.lengthUTF8(arg), length), pad
491+
)
492+
483493
def visit_DayOfWeekIndex(self, op, *, arg):
484494
weekdays = len(calendar.day_name)
485495
return (((self.f.toDayOfWeek(arg) - 1) % weekdays) + weekdays) % weekdays

ibis/backends/sql/compilers/datafusion.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,20 @@ def visit_RegexSearch(self, op, *, arg, pattern):
207207
def visit_StringContains(self, op, *, haystack, needle):
208208
return self.f.strpos(haystack, needle) > sg.exp.convert(0)
209209

210+
def visit_LPad(self, op, *, arg, length, pad):
211+
return self.if_(
212+
length <= self.f.length(arg),
213+
arg,
214+
self.f.concat(self.f.repeat(pad, length - self.f.length(arg)), arg),
215+
)
216+
217+
def visit_RPad(self, op, *, arg, length, pad):
218+
return self.if_(
219+
length <= self.f.length(arg),
220+
arg,
221+
self.f.concat(arg, self.f.repeat(pad, length - self.f.length(arg))),
222+
)
223+
210224
def visit_ExtractFragment(self, op, *, arg):
211225
return self.f.extract_url_field(arg, "fragment")
212226

ibis/backends/sql/compilers/duckdb.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -568,6 +568,20 @@ def visit_Hash(self, op, *, arg):
568568
def visit_StringConcat(self, op, *, arg):
569569
return reduce(lambda x, y: sge.DPipe(this=x, expression=y), arg)
570570

571+
def visit_LPad(self, op, *, arg, length, pad):
572+
return self.if_(
573+
length <= self.f.length(arg),
574+
arg,
575+
self.f.concat(self.f.repeat(pad, length - self.f.length(arg)), arg),
576+
)
577+
578+
def visit_RPad(self, op, *, arg, length, pad):
579+
return self.if_(
580+
length <= self.f.length(arg),
581+
arg,
582+
self.f.concat(arg, self.f.repeat(pad, length - self.f.length(arg))),
583+
)
584+
571585
def visit_StringSlice(self, op, *, arg, start, end):
572586
if start is not None:
573587
start += 1

ibis/backends/sql/compilers/flink.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -518,6 +518,20 @@ def visit_StringFind(self, op, *, arg, substr, start, end):
518518

519519
return self.f.instr(arg, substr)
520520

521+
def visit_LPad(self, op, *, arg, length, pad):
522+
return self.if_(
523+
length <= self.f.length(arg),
524+
arg,
525+
self.f.concat(self.f.repeat(pad, length - self.f.length(arg)), arg),
526+
)
527+
528+
def visit_RPad(self, op, *, arg, length, pad):
529+
return self.if_(
530+
length <= self.f.length(arg),
531+
arg,
532+
self.f.concat(arg, self.f.repeat(pad, length - self.f.length(arg))),
533+
)
534+
521535
def visit_StartsWith(self, op, *, arg, start):
522536
return self.f.left(arg, self.f.char_length(start)).eq(start)
523537

ibis/backends/sql/compilers/mssql.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -528,7 +528,7 @@ def visit_LPad(self, op, *, arg, length, pad):
528528
return self.if_(
529529
length <= self.f.length(arg),
530530
arg,
531-
self.f.left(
531+
self.f.right(
532532
self.f.concat(self.f.replicate(pad, length - self.f.length(arg)), arg),
533533
length,
534534
),

ibis/backends/sql/compilers/oracle.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -82,8 +82,6 @@ class OracleCompiler(SQLGlotCompiler):
8282
ops.BitXor: "bit_xor_agg",
8383
ops.BitwiseAnd: "bitand",
8484
ops.Hash: "ora_hash",
85-
ops.LPad: "lpad",
86-
ops.RPad: "rpad",
8785
ops.StringAscii: "ascii",
8886
ops.Mode: "stats_mode",
8987
}
@@ -275,6 +273,12 @@ def visit_StringContains(self, op, *, haystack, needle):
275273
def visit_StringJoin(self, op, *, arg, sep):
276274
return self.f.concat(*toolz.interpose(sep, arg))
277275

276+
def visit_LPad(self, op, *, arg, length, pad):
277+
return self.f.lpad(arg, self.f.greatest(self.f.length(arg), length), pad)
278+
279+
def visit_RPad(self, op, *, arg, length, pad):
280+
return self.f.rpad(arg, self.f.greatest(self.f.length(arg), length), pad)
281+
278282
## Aggregate stuff
279283

280284
def visit_Correlation(self, op, *, left, right, where, how):

ibis/backends/sqlite/udf.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -258,12 +258,12 @@ def _ibis_string_ascii(string):
258258

259259
@udf
260260
def _ibis_rpad(string, width, pad):
261-
return string.ljust(width, pad)[:width]
261+
return string.ljust(width, pad)
262262

263263

264264
@udf
265265
def _ibis_lpad(string, width, pad):
266-
return string.rjust(width, pad)[:width]
266+
return string.rjust(width, pad)
267267

268268

269269
@udf

ibis/backends/tests/sql/snapshots/test_sql/test_no_cart_join/out.sql

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,22 @@ FROM (
1919
"t1"."ancestor_level_number",
2020
"t1"."ancestor_node_sort_order",
2121
"t1"."descendant_node_natural_key",
22-
LPAD('-', (
23-
"t1"."ancestor_level_number" - 1
24-
) * 7, '-') || "t1"."ancestor_level_name" AS "product_level_name"
22+
CASE
23+
WHEN (
24+
(
25+
"t1"."ancestor_level_number" - 1
26+
) * 7
27+
) <= LENGTH('-')
28+
THEN '-'
29+
ELSE CONCAT(
30+
REPEAT('-', (
31+
(
32+
"t1"."ancestor_level_number" - 1
33+
) * 7
34+
) - LENGTH('-')),
35+
'-'
36+
)
37+
END || "t1"."ancestor_level_name" AS "product_level_name"
2538
FROM "products" AS "t1"
2639
) AS "t4"
2740
ON "t2"."product_id" = "t4"."descendant_node_natural_key"

0 commit comments

Comments
 (0)