Skip to content

Commit 74de349

Browse files
cpcloudkszucs
authored andcommitted
feat(api): add array to string join operation
1 parent fd6ea5b commit 74de349

File tree

10 files changed

+132
-2
lines changed

10 files changed

+132
-2
lines changed

ibis/backends/bigquery/registry.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -723,6 +723,7 @@ def _interval_multiply(t, op):
723723
ops.RandomScalar: fixed_arity("RAND", 0),
724724
ops.NthValue: _nth_value,
725725
ops.JSONGetItem: lambda t, op: f"{t.translate(op.arg)}[{t.translate(op.index)}]",
726+
ops.ArrayStringJoin: lambda t, op: f"ARRAY_TO_STRING({t.translate(op.arg)}, {t.translate(op.sep)})",
726727
}
727728

728729
_invalid_operations = {

ibis/backends/clickhouse/compiler/values.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1323,3 +1323,10 @@ def _extract_query(op, **kw):
13231323
def _extract_fragment(op, **kw):
13241324
arg = translate_val(op.arg, **kw)
13251325
return f"nullIf(fragment({arg}), '')"
1326+
1327+
1328+
@translate_val.register(ops.ArrayStringJoin)
1329+
def _array_string_join(op, **kw):
1330+
arg = translate_val(op.arg, **kw)
1331+
sep = translate_val(op.sep, **kw)
1332+
return f"arrayStringConcat({arg}, {sep})"

ibis/backends/duckdb/registry.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,9 @@ def _struct_column(t, op):
275275
ops.IntervalAdd: fixed_arity(operator.add, 2),
276276
ops.IntervalSubtract: fixed_arity(operator.sub, 2),
277277
ops.Capitalize: alchemy.sqlalchemy_operation_registry[ops.Capitalize],
278+
ops.ArrayStringJoin: fixed_arity(
279+
lambda sep, arr: sa.func.array_aggr(arr, sa.text("'string_agg'"), sep), 2
280+
),
278281
}
279282
)
280283

ibis/backends/postgres/registry.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -617,5 +617,8 @@ def translate(t, op: ops.ArgMin | ops.ArgMax) -> str:
617617
else_=sa.null(),
618618
)
619619
),
620+
ops.ArrayStringJoin: fixed_arity(
621+
lambda sep, arr: sa.func.array_to_string(arr, sep), 2
622+
),
620623
}
621624
)

ibis/backends/pyspark/compiler.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1917,3 +1917,10 @@ def compile_argmin(t, op, **kwargs):
19171917
@compiles(ops.ArgMax)
19181918
def compile_argmax(t, op, **kwargs):
19191919
return compile_aggregator(t, op, fn=F.max_by, **kwargs)
1920+
1921+
1922+
@compiles(ops.ArrayStringJoin)
1923+
def compile_array_string_join(t, op, **kwargs):
1924+
arg = t.translate(op.arg, **kwargs)
1925+
sep = t.translate(op.sep, raw=True, **kwargs)
1926+
return F.concat_ws(sep, arg)

ibis/backends/tests/test_string.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -542,3 +542,15 @@ def test_capitalize(con):
542542
expected = "Abc"
543543
expr = s.capitalize()
544544
assert con.execute(expr) == expected
545+
546+
547+
@pytest.mark.notimpl(["dask", "datafusion", "pandas", "polars"])
548+
@pytest.mark.notyet(["impala", "mssql", "mysql", "sqlite"], reason="no arrays")
549+
def test_array_string_join(con):
550+
s = ibis.array(["a", "b", "c"])
551+
expected = "a,b,c"
552+
expr = ibis.literal(",").join(s)
553+
assert con.execute(expr) == expected
554+
555+
expr = s.join(",")
556+
assert con.execute(expr) == expected

ibis/backends/trino/registry.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -317,6 +317,9 @@ def _cot(t, op):
317317
),
318318
ops.TypeOf: unary(sa.func.typeof),
319319
ops.Unnest: _unnest,
320+
ops.ArrayStringJoin: fixed_arity(
321+
lambda sep, arr: sa.func.array_join(arr, sep), 2
322+
),
320323
}
321324
)
322325

ibis/expr/operations/strings.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,15 @@ def output_shape(self):
137137
return rlz.highest_precedence_shape(self.arg)
138138

139139

140+
@public
141+
class ArrayStringJoin(Value):
142+
sep = rlz.string
143+
arg = rlz.value(dt.Array(dt.string))
144+
145+
output_dtype = dt.string
146+
output_shape = rlz.shape_like("args")
147+
148+
140149
@public
141150
class StartsWith(Value):
142151
arg = rlz.string

ibis/expr/types/arrays.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -341,6 +341,53 @@ def unnest(self) -> ir.Value:
341341
except com.ExpressionError:
342342
return expr
343343

344+
def join(self, sep: str | ir.StringValue) -> ir.StringValue:
345+
"""Join the elements of this array expression with `sep`.
346+
347+
Parameters
348+
----------
349+
sep
350+
Separator to use for joining array elements
351+
352+
Returns
353+
-------
354+
StringValue
355+
Elements of `self` joined with `sep`
356+
357+
Examples
358+
--------
359+
>>> import ibis
360+
>>> ibis.options.interactive = True
361+
>>> t = ibis.memtable({"arr": [["a", "b", "c"], None, [], ["b", None]]})
362+
>>> t
363+
┏━━━━━━━━━━━━━━━━━━━━━━┓
364+
┃ arr ┃
365+
┡━━━━━━━━━━━━━━━━━━━━━━┩
366+
│ array<string> │
367+
├──────────────────────┤
368+
│ ['a', 'b', ... +1] │
369+
│ ∅ │
370+
│ [] │
371+
│ ['b', None] │
372+
└──────────────────────┘
373+
>>> t.arr.join("|")
374+
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
375+
┃ ArrayStringJoin('|', arr) ┃
376+
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
377+
│ string │
378+
├───────────────────────────┤
379+
│ a|b|c │
380+
│ ∅ │
381+
│ ∅ │
382+
│ b │
383+
└───────────────────────────┘
384+
385+
See Also
386+
--------
387+
[`StringValue.join`][ibis.expr.types.strings.StringValue.join]
388+
"""
389+
return ops.ArrayStringJoin(sep, self).to_expr()
390+
344391

345392
@public
346393
class ArrayScalar(Scalar, ArrayValue):

ibis/expr/types/strings.py

Lines changed: 40 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -383,7 +383,7 @@ def find_in_set(self, str_list: Sequence[str]) -> ir.IntegerValue:
383383
"""
384384
return ops.FindInSet(self, str_list).to_expr()
385385

386-
def join(self, strings: Sequence[str | StringValue]) -> StringValue:
386+
def join(self, strings: Sequence[str | StringValue] | ir.ArrayValue) -> StringValue:
387387
"""Join a list of strings using `self` as the separator.
388388
389389
Parameters
@@ -401,8 +401,46 @@ def join(self, strings: Sequence[str | StringValue]) -> StringValue:
401401
-------
402402
StringValue
403403
Joined string
404+
405+
Examples
406+
--------
407+
>>> import ibis
408+
>>> ibis.options.interactive = True
409+
>>> t = ibis.memtable({"arr": [["a", "b", "c"], None, [], ["b", None]]})
410+
>>> t
411+
┏━━━━━━━━━━━━━━━━━━━━━━┓
412+
┃ arr ┃
413+
┡━━━━━━━━━━━━━━━━━━━━━━┩
414+
│ array<string> │
415+
├──────────────────────┤
416+
│ ['a', 'b', ... +1] │
417+
│ ∅ │
418+
│ [] │
419+
│ ['b', None] │
420+
└──────────────────────┘
421+
>>> ibis.literal("|").join(t.arr)
422+
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
423+
┃ ArrayStringJoin('|', arr) ┃
424+
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
425+
│ string │
426+
├───────────────────────────┤
427+
│ a|b|c │
428+
│ ∅ │
429+
│ ∅ │
430+
│ b │
431+
└───────────────────────────┘
432+
433+
See Also
434+
--------
435+
[`ArrayValue.join`][ibis.expr.types.arrays.ArrayValue.join]
404436
"""
405-
return ops.StringJoin(self, strings).to_expr()
437+
import ibis.expr.types as ir
438+
439+
if isinstance(strings, ir.ArrayValue):
440+
cls = ops.ArrayStringJoin
441+
else:
442+
cls = ops.StringJoin
443+
return cls(self, strings).to_expr()
406444

407445
def startswith(self, start: str | StringValue) -> ir.BooleanValue:
408446
"""Determine whether `self` starts with `end`.

0 commit comments

Comments
 (0)