Skip to content

Commit c5fdb68

Browse files
Remove dead code for pyarrow < 15.0.0 (#7023)
* Remove dead code related to pa.concat_tables * Remove dead code related to pa.FixedSizeListArray
1 parent 9ccc1f3 commit c5fdb68

File tree

2 files changed

+16
-68
lines changed

2 files changed

+16
-68
lines changed

src/datasets/packaged_modules/webdataset/webdataset.py

+1-4
Original file line numberDiff line numberDiff line change
@@ -82,10 +82,7 @@ def _split_generators(self, dl_manager):
8282
pa.Table.from_pylist(cast_to_python_objects([example], only_1d_for_numpy=True))
8383
for example in first_examples
8484
]
85-
if datasets.config.PYARROW_VERSION.major < 14:
86-
inferred_arrow_schema = pa.concat_tables(pa_tables, promote=True).schema
87-
else:
88-
inferred_arrow_schema = pa.concat_tables(pa_tables, promote_options="default").schema
85+
inferred_arrow_schema = pa.concat_tables(pa_tables, promote_options="default").schema
8986
features = datasets.Features.from_arrow_schema(inferred_arrow_schema)
9087

9188
# Set Image types

src/datasets/table.py

+15-64
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
import pyarrow.compute as pc
1010
import pyarrow.types
1111

12-
from . import config
1312
from .utils.logging import get_logger
1413

1514

@@ -1320,22 +1319,16 @@ def __setstate__(self, state):
13201319
if schema is not None and table.schema != schema:
13211320
# We fix the columns by concatenating with an empty table with the right columns
13221321
empty_table = pa.Table.from_batches([], schema=schema)
1323-
# we set promote=True to fill missing columns with null values
1324-
if config.PYARROW_VERSION.major < 14:
1325-
table = pa.concat_tables([table, empty_table], promote=True)
1326-
else:
1327-
table = pa.concat_tables([table, empty_table], promote_options="default")
1322+
# We set promote_options="default" to fill missing columns with null values
1323+
table = pa.concat_tables([table, empty_table], promote_options="default")
13281324
ConcatenationTable.__init__(self, table, blocks=blocks)
13291325

13301326
@staticmethod
13311327
def _concat_blocks(blocks: List[Union[TableBlock, pa.Table]], axis: int = 0) -> pa.Table:
13321328
pa_tables = [table.table if hasattr(table, "table") else table for table in blocks]
13331329
if axis == 0:
1334-
# we set promote=True to fill missing columns with null values
1335-
if config.PYARROW_VERSION.major < 14:
1336-
return pa.concat_tables(pa_tables, promote=True)
1337-
else:
1338-
return pa.concat_tables(pa_tables, promote_options="default")
1330+
# We set promote_options="default" to fill missing columns with null values
1331+
return pa.concat_tables(pa_tables, promote_options="default")
13391332
elif axis == 1:
13401333
for i, table in enumerate(pa_tables):
13411334
if i == 0:
@@ -1906,17 +1899,9 @@ def array_cast(
19061899
else:
19071900
array = pc.list_slice(array, 0, pa_type.list_size, return_fixed_size_list=True)
19081901
array_values = array.values
1909-
if config.PYARROW_VERSION.major < 15:
1910-
return pa.Array.from_buffers(
1911-
pa_type,
1912-
len(array),
1913-
[array.is_valid().buffers()[1]],
1914-
children=[_c(array_values, pa_type.value_type)],
1915-
)
1916-
else:
1917-
return pa.FixedSizeListArray.from_arrays(
1918-
_c(array_values, pa_type.value_type), pa_type.list_size, mask=array.is_null()
1919-
)
1902+
return pa.FixedSizeListArray.from_arrays(
1903+
_c(array_values, pa_type.value_type), pa_type.list_size, mask=array.is_null()
1904+
)
19201905
else:
19211906
array_values = array.values[
19221907
array.offset * pa_type.list_size : (array.offset + len(array)) * pa_type.list_size
@@ -1932,17 +1917,9 @@ def array_cast(
19321917
array_values = array.values[
19331918
array.offset * array.type.list_size : (array.offset + len(array)) * array.type.list_size
19341919
]
1935-
if config.PYARROW_VERSION.major < 15:
1936-
return pa.Array.from_buffers(
1937-
pa_type,
1938-
len(array),
1939-
[array.is_valid().buffers()[1]],
1940-
children=[_c(array_values, pa_type.value_type)],
1941-
)
1942-
else:
1943-
return pa.FixedSizeListArray.from_arrays(
1944-
_c(array_values, pa_type.value_type), pa_type.list_size, mask=array.is_null()
1945-
)
1920+
return pa.FixedSizeListArray.from_arrays(
1921+
_c(array_values, pa_type.value_type), pa_type.list_size, mask=array.is_null()
1922+
)
19461923
elif pa.types.is_list(pa_type):
19471924
array_offsets = (np.arange(len(array) + 1) + array.offset) * array.type.list_size
19481925
return pa.ListArray.from_arrays(array_offsets, _c(array.values, pa_type.value_type), mask=array.is_null())
@@ -2055,17 +2032,9 @@ def cast_array_to_feature(
20552032
array = pc.list_slice(array, 0, feature.length, return_fixed_size_list=True)
20562033
array_values = array.values
20572034
casted_array_values = _c(array_values, feature.feature)
2058-
if config.PYARROW_VERSION.major < 15:
2059-
return pa.Array.from_buffers(
2060-
pa.list_(casted_array_values.type, feature.length),
2061-
len(array),
2062-
[array.is_valid().buffers()[1]],
2063-
children=[casted_array_values],
2064-
)
2065-
else:
2066-
return pa.FixedSizeListArray.from_arrays(
2067-
casted_array_values, feature.length, mask=array.is_null()
2068-
)
2035+
return pa.FixedSizeListArray.from_arrays(
2036+
casted_array_values, feature.length, mask=array.is_null()
2037+
)
20692038
else:
20702039
array_values = array.values[
20712040
array.offset * feature.length : (array.offset + len(array)) * feature.length
@@ -2091,17 +2060,7 @@ def cast_array_to_feature(
20912060
array.offset * array.type.list_size : (array.offset + len(array)) * array.type.list_size
20922061
]
20932062
casted_array_values = _c(array_values, feature.feature)
2094-
if config.PYARROW_VERSION.major < 15:
2095-
return pa.Array.from_buffers(
2096-
pa.list_(casted_array_values.type, feature.length),
2097-
len(array),
2098-
[array.is_valid().buffers()[1]],
2099-
children=[casted_array_values],
2100-
)
2101-
else:
2102-
return pa.FixedSizeListArray.from_arrays(
2103-
casted_array_values, feature.length, mask=array.is_null()
2104-
)
2063+
return pa.FixedSizeListArray.from_arrays(casted_array_values, feature.length, mask=array.is_null())
21052064
else:
21062065
array_offsets = (np.arange(len(array) + 1) + array.offset) * array.type.list_size
21072066
return pa.ListArray.from_arrays(array_offsets, _c(array.values, feature.feature), mask=array.is_null())
@@ -2176,15 +2135,7 @@ def embed_array_storage(array: pa.Array, feature: "FeatureType"):
21762135
array.offset * array.type.list_size : (array.offset + len(array)) * array.type.list_size
21772136
]
21782137
embedded_array_values = _e(array_values, feature.feature)
2179-
if config.PYARROW_VERSION.major < 15:
2180-
return pa.Array.from_buffers(
2181-
pa.list_(array_values.type, feature.length),
2182-
len(array),
2183-
[array.is_valid().buffers()[1]],
2184-
children=[embedded_array_values],
2185-
)
2186-
else:
2187-
return pa.FixedSizeListArray.from_arrays(embedded_array_values, feature.length, mask=array.is_null())
2138+
return pa.FixedSizeListArray.from_arrays(embedded_array_values, feature.length, mask=array.is_null())
21882139
if not isinstance(feature, (Sequence, dict, list, tuple)):
21892140
return array
21902141
raise TypeError(f"Couldn't embed array of type\n{_short_str(array.type)}\nwith\n{_short_str(feature)}")

0 commit comments

Comments
 (0)