9
9
import pyarrow .compute as pc
10
10
import pyarrow .types
11
11
12
- from . import config
13
12
from .utils .logging import get_logger
14
13
15
14
@@ -1320,22 +1319,16 @@ def __setstate__(self, state):
1320
1319
if schema is not None and table .schema != schema :
1321
1320
# We fix the columns by concatenating with an empty table with the right columns
1322
1321
empty_table = pa .Table .from_batches ([], schema = schema )
1323
- # we set promote=True to fill missing columns with null values
1324
- if config .PYARROW_VERSION .major < 14 :
1325
- table = pa .concat_tables ([table , empty_table ], promote = True )
1326
- else :
1327
- table = pa .concat_tables ([table , empty_table ], promote_options = "default" )
1322
+ # We set promote_options="default" to fill missing columns with null values
1323
+ table = pa .concat_tables ([table , empty_table ], promote_options = "default" )
1328
1324
ConcatenationTable .__init__ (self , table , blocks = blocks )
1329
1325
1330
1326
@staticmethod
1331
1327
def _concat_blocks (blocks : List [Union [TableBlock , pa .Table ]], axis : int = 0 ) -> pa .Table :
1332
1328
pa_tables = [table .table if hasattr (table , "table" ) else table for table in blocks ]
1333
1329
if axis == 0 :
1334
- # we set promote=True to fill missing columns with null values
1335
- if config .PYARROW_VERSION .major < 14 :
1336
- return pa .concat_tables (pa_tables , promote = True )
1337
- else :
1338
- return pa .concat_tables (pa_tables , promote_options = "default" )
1330
+ # We set promote_options="default" to fill missing columns with null values
1331
+ return pa .concat_tables (pa_tables , promote_options = "default" )
1339
1332
elif axis == 1 :
1340
1333
for i , table in enumerate (pa_tables ):
1341
1334
if i == 0 :
@@ -1906,17 +1899,9 @@ def array_cast(
1906
1899
else :
1907
1900
array = pc .list_slice (array , 0 , pa_type .list_size , return_fixed_size_list = True )
1908
1901
array_values = array .values
1909
- if config .PYARROW_VERSION .major < 15 :
1910
- return pa .Array .from_buffers (
1911
- pa_type ,
1912
- len (array ),
1913
- [array .is_valid ().buffers ()[1 ]],
1914
- children = [_c (array_values , pa_type .value_type )],
1915
- )
1916
- else :
1917
- return pa .FixedSizeListArray .from_arrays (
1918
- _c (array_values , pa_type .value_type ), pa_type .list_size , mask = array .is_null ()
1919
- )
1902
+ return pa .FixedSizeListArray .from_arrays (
1903
+ _c (array_values , pa_type .value_type ), pa_type .list_size , mask = array .is_null ()
1904
+ )
1920
1905
else :
1921
1906
array_values = array .values [
1922
1907
array .offset * pa_type .list_size : (array .offset + len (array )) * pa_type .list_size
@@ -1932,17 +1917,9 @@ def array_cast(
1932
1917
array_values = array .values [
1933
1918
array .offset * array .type .list_size : (array .offset + len (array )) * array .type .list_size
1934
1919
]
1935
- if config .PYARROW_VERSION .major < 15 :
1936
- return pa .Array .from_buffers (
1937
- pa_type ,
1938
- len (array ),
1939
- [array .is_valid ().buffers ()[1 ]],
1940
- children = [_c (array_values , pa_type .value_type )],
1941
- )
1942
- else :
1943
- return pa .FixedSizeListArray .from_arrays (
1944
- _c (array_values , pa_type .value_type ), pa_type .list_size , mask = array .is_null ()
1945
- )
1920
+ return pa .FixedSizeListArray .from_arrays (
1921
+ _c (array_values , pa_type .value_type ), pa_type .list_size , mask = array .is_null ()
1922
+ )
1946
1923
elif pa .types .is_list (pa_type ):
1947
1924
array_offsets = (np .arange (len (array ) + 1 ) + array .offset ) * array .type .list_size
1948
1925
return pa .ListArray .from_arrays (array_offsets , _c (array .values , pa_type .value_type ), mask = array .is_null ())
@@ -2055,17 +2032,9 @@ def cast_array_to_feature(
2055
2032
array = pc .list_slice (array , 0 , feature .length , return_fixed_size_list = True )
2056
2033
array_values = array .values
2057
2034
casted_array_values = _c (array_values , feature .feature )
2058
- if config .PYARROW_VERSION .major < 15 :
2059
- return pa .Array .from_buffers (
2060
- pa .list_ (casted_array_values .type , feature .length ),
2061
- len (array ),
2062
- [array .is_valid ().buffers ()[1 ]],
2063
- children = [casted_array_values ],
2064
- )
2065
- else :
2066
- return pa .FixedSizeListArray .from_arrays (
2067
- casted_array_values , feature .length , mask = array .is_null ()
2068
- )
2035
+ return pa .FixedSizeListArray .from_arrays (
2036
+ casted_array_values , feature .length , mask = array .is_null ()
2037
+ )
2069
2038
else :
2070
2039
array_values = array .values [
2071
2040
array .offset * feature .length : (array .offset + len (array )) * feature .length
@@ -2091,17 +2060,7 @@ def cast_array_to_feature(
2091
2060
array .offset * array .type .list_size : (array .offset + len (array )) * array .type .list_size
2092
2061
]
2093
2062
casted_array_values = _c (array_values , feature .feature )
2094
- if config .PYARROW_VERSION .major < 15 :
2095
- return pa .Array .from_buffers (
2096
- pa .list_ (casted_array_values .type , feature .length ),
2097
- len (array ),
2098
- [array .is_valid ().buffers ()[1 ]],
2099
- children = [casted_array_values ],
2100
- )
2101
- else :
2102
- return pa .FixedSizeListArray .from_arrays (
2103
- casted_array_values , feature .length , mask = array .is_null ()
2104
- )
2063
+ return pa .FixedSizeListArray .from_arrays (casted_array_values , feature .length , mask = array .is_null ())
2105
2064
else :
2106
2065
array_offsets = (np .arange (len (array ) + 1 ) + array .offset ) * array .type .list_size
2107
2066
return pa .ListArray .from_arrays (array_offsets , _c (array .values , feature .feature ), mask = array .is_null ())
@@ -2176,15 +2135,7 @@ def embed_array_storage(array: pa.Array, feature: "FeatureType"):
2176
2135
array .offset * array .type .list_size : (array .offset + len (array )) * array .type .list_size
2177
2136
]
2178
2137
embedded_array_values = _e (array_values , feature .feature )
2179
- if config .PYARROW_VERSION .major < 15 :
2180
- return pa .Array .from_buffers (
2181
- pa .list_ (array_values .type , feature .length ),
2182
- len (array ),
2183
- [array .is_valid ().buffers ()[1 ]],
2184
- children = [embedded_array_values ],
2185
- )
2186
- else :
2187
- return pa .FixedSizeListArray .from_arrays (embedded_array_values , feature .length , mask = array .is_null ())
2138
+ return pa .FixedSizeListArray .from_arrays (embedded_array_values , feature .length , mask = array .is_null ())
2188
2139
if not isinstance (feature , (Sequence , dict , list , tuple )):
2189
2140
return array
2190
2141
raise TypeError (f"Couldn't embed array of type\n { _short_str (array .type )} \n with\n { _short_str (feature )} " )
0 commit comments