Skip to content

Commit 64d2957

Browse files
cpcloudgforsyth
authored andcommitted
fix(mssql): compute the length of strings correctly
1 parent f6d9baf commit 64d2957

File tree

2 files changed

+33
-2
lines changed

2 files changed

+33
-2
lines changed

ibis/backends/mssql/registry.py

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,23 @@ def _not(t, op):
145145
return sa.case((arg == 0, True), else_=False)
146146

147147

148+
def _len(x):
149+
"""The MSSQL LEN function doesn't count trailing spaces.
150+
151+
Also, DATALENGTH (the suggested alternative) counts bytes and thus its
152+
result depends on the string's encoding.
153+
154+
https://learn.microsoft.com/en-us/sql/t-sql/functions/len-transact-sql?view=sql-server-ver16#remarks
155+
156+
The solution is to add a character to the beginning and end of the string
157+
that are guaranteed to have one character in length and are not spaces, and
158+
then subtract 2 from the result of `LEN` of that input.
159+
160+
Thanks to @arkanovicz for this glorious hack.
161+
"""
162+
return sa.func.len("A" + x + "Z") - 2
163+
164+
148165
operation_registry = sqlalchemy_operation_registry.copy()
149166
operation_registry.update(sqlalchemy_window_functions_registry)
150167

@@ -162,7 +179,7 @@ def _not(t, op):
162179
ops.Capitalize: unary(
163180
lambda arg: sa.func.concat(
164181
sa.func.upper(sa.func.substring(arg, 1, 1)),
165-
sa.func.lower(sa.func.substring(arg, 2, sa.func.datalength(arg) - 1)),
182+
sa.func.lower(sa.func.substring(arg, 2, _len(arg) - 1)),
166183
)
167184
),
168185
ops.LStrip: unary(sa.func.ltrim),
@@ -171,7 +188,7 @@ def _not(t, op):
171188
ops.Repeat: fixed_arity(sa.func.replicate, 2),
172189
ops.Reverse: unary(sa.func.reverse),
173190
ops.StringFind: _string_find,
174-
ops.StringLength: unary(sa.func.datalength),
191+
ops.StringLength: unary(_len),
175192
ops.StringReplace: fixed_arity(sa.func.replace, 3),
176193
ops.Strip: unary(sa.func.trim),
177194
ops.Uppercase: unary(sa.func.upper),

ibis/backends/mssql/tests/test_client.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,3 +126,17 @@ def count_big(x, where: bool = True) -> int:
126126

127127
expr = count_big(ft.id, where=ft.id == 1)
128128
assert expr.execute() == ft[ft.id == 1].count().execute()
129+
130+
131+
@pytest.mark.parametrize("string", ["a", " ", "a ", " a", ""])
132+
def test_glorious_length_function_hack(con, string):
133+
"""Test that the length function works as expected.
134+
135+
Why wouldn't it, you ask?
136+
137+
https://learn.microsoft.com/en-us/sql/t-sql/functions/len-transact-sql?view=sql-server-ver16#remarks
138+
"""
139+
lit = ibis.literal(string)
140+
expr = lit.length()
141+
result = con.execute(expr)
142+
assert result == len(string)

0 commit comments

Comments
 (0)