Skip to content

Commit e6e995c

Browse files
cpcloudkszucs
authored andcommitted
feat(api): add ArrayValue.flatten method and operation
1 parent 537ddaf commit e6e995c

File tree

2 files changed

+112
-0
lines changed

2 files changed

+112
-0
lines changed

ibis/expr/operations/arrays.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,3 +186,19 @@ def dtype(self):
186186
}
187187
)
188188
)
189+
190+
191+
@public
192+
class ArrayFlatten(Value):
193+
"""Flatten a nested array one level.
194+
195+
The input expression must have at least one level of nesting for flattening
196+
to make sense.
197+
"""
198+
199+
arg: Value[dt.Array[dt.Array]]
200+
shape = rlz.shape_like("arg")
201+
202+
@property
203+
def dtype(self):
204+
return self.arg.dtype.value_type

ibis/expr/types/arrays.py

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -901,6 +901,102 @@ def zip(self, other: ArrayValue, *others: ArrayValue) -> ArrayValue:
901901

902902
return ops.ArrayZip((self, other, *others)).to_expr()
903903

904+
def flatten(self) -> ir.ArrayValue:
905+
"""Remove one level of nesting from an array expression.
906+
907+
Returns
908+
-------
909+
ArrayValue
910+
Flattened array expression
911+
912+
Examples
913+
--------
914+
>>> import ibis
915+
>>> import ibis.selectors as s
916+
>>> from ibis import _
917+
>>> ibis.options.interactive = True
918+
>>> schema = {
919+
... "empty": "array<array<int>>",
920+
... "happy": "array<array<string>>",
921+
... "nulls_only": "array<array<struct<a: array<string>>>>",
922+
... "mixed_nulls": "array<array<string>>",
923+
... }
924+
>>> data = {
925+
... "empty": [[], [], []],
926+
... "happy": [[["abc"]], [["bcd"]], [["def"]]],
927+
... "nulls_only": [None, None, None],
928+
... "mixed_nulls": [[], None, [None]],
929+
... }
930+
>>> import pyarrow as pa
931+
>>> t = ibis.memtable(
932+
... pa.Table.from_pydict(
933+
... data,
934+
... schema=ibis.schema(schema).to_pyarrow(),
935+
... )
936+
... )
937+
>>> t
938+
┏━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━┓
939+
┃ empty ┃ happy ┃ nulls_only ┃ … ┃
940+
┡━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━┩
941+
│ array<array<int64>> │ array<array<string>> │ array<arr… │ … │
942+
├──────────────────────┼──────────────────────┼────────────┼───┤
943+
│ [] │ [[...]] │ NULL │ … │
944+
│ [] │ [[...]] │ NULL │ … │
945+
│ [] │ [[...]] │ NULL │ … │
946+
└──────────────────────┴──────────────────────┴────────────┴───┘
947+
>>> t.empty.flatten()
948+
┏━━━━━━━━━━━━━━━━━━━━━━┓
949+
┃ ArrayFlatten(empty) ┃
950+
┡━━━━━━━━━━━━━━━━━━━━━━┩
951+
│ array<int64> │
952+
├──────────────────────┤
953+
│ [] │
954+
│ [] │
955+
│ [] │
956+
└──────────────────────┘
957+
>>> t.happy.flatten()
958+
┏━━━━━━━━━━━━━━━━━━━━━━┓
959+
┃ ArrayFlatten(happy) ┃
960+
┡━━━━━━━━━━━━━━━━━━━━━━┩
961+
│ array<string> │
962+
├──────────────────────┤
963+
│ ['abc'] │
964+
│ ['bcd'] │
965+
│ ['def'] │
966+
└──────────────────────┘
967+
>>> t.nulls_only.flatten()
968+
┏━━━━━━━━━━━━━━━━━━━━━━━━━━┓
969+
┃ ArrayFlatten(nulls_only) ┃
970+
┡━━━━━━━━━━━━━━━━━━━━━━━━━━┩
971+
│ array<struct<a: array<s… │
972+
├──────────────────────────┤
973+
│ NULL │
974+
│ NULL │
975+
│ NULL │
976+
└──────────────────────────┘
977+
>>> t.mixed_nulls.flatten()
978+
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
979+
┃ ArrayFlatten(mixed_nulls) ┃
980+
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
981+
│ array<string> │
982+
├───────────────────────────┤
983+
│ [] │
984+
│ NULL │
985+
│ [] │
986+
└───────────────────────────┘
987+
>>> t.select(s.across(s.all(), _.flatten()))
988+
┏━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━┓
989+
┃ empty ┃ happy ┃ nulls_only ┃ … ┃
990+
┡━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━┩
991+
│ array<int64> │ array<string> │ array<str… │ … │
992+
├──────────────────────┼──────────────────────┼────────────┼───┤
993+
│ [] │ ['abc'] │ NULL │ … │
994+
│ [] │ ['bcd'] │ NULL │ … │
995+
│ [] │ ['def'] │ NULL │ … │
996+
└──────────────────────┴──────────────────────┴────────────┴───┘
997+
"""
998+
return ops.ArrayFlatten(self).to_expr()
999+
9041000

9051001
@public
9061002
class ArrayScalar(Scalar, ArrayValue):

0 commit comments

Comments
 (0)