Skip to content

Commit 8ee878c

Browse files
committed
readability
1 parent 6d249ad commit 8ee878c

File tree

2 files changed

+5
-17
lines changed

2 files changed

+5
-17
lines changed

python/pyspark/sql/pandas/serializers.py

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -211,20 +211,16 @@ class ArrowBatchUDFSerializer(ArrowStreamUDFSerializer):
211211

212212
def __init__(
213213
self,
214-
assign_cols_by_name,
215214
input_types,
216-
struct_in_pandas="row",
217-
ndarray_as_list=True,
218-
return_type=None,
219215
prefers_large_var_types=False,
220216
):
221217
super(ArrowBatchUDFSerializer, self).__init__()
222-
self._assign_cols_by_name = assign_cols_by_name
223218
self._input_types = input_types
224-
self._struct_in_pandas = struct_in_pandas
225-
self._ndarray_as_list = ndarray_as_list
226-
self._return_type = return_type
227219
self._prefers_large_var_types = prefers_large_var_types
220+
self._assign_cols_by_name = False
221+
self._struct_in_pandas = "row"
222+
self._ndarray_as_list = True
223+
self._return_type = None
228224

229225
def convert_arrow_to_rows(self, *args):
230226
"""

python/pyspark/worker.py

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2003,14 +2003,7 @@ def read_udfs(pickleSer, infile, eval_type):
20032003
ser = ArrowStreamArrowUDFSerializer(timezone, safecheck, _assign_cols_by_name, False)
20042004
elif eval_type == PythonEvalType.SQL_ARROW_BATCHED_UDF and not use_legacy_pandas_udf_conversion(runner_conf):
20052005
input_types = ([f.dataType for f in _parse_datatype_json_string(utf8_deserializer.loads(infile))])
2006-
ser = ArrowBatchUDFSerializer(
2007-
False,
2008-
input_types,
2009-
"row",
2010-
True,
2011-
None,
2012-
use_large_var_types(runner_conf)
2013-
)
2006+
ser = ArrowBatchUDFSerializer(input_types, use_large_var_types(runner_conf))
20142007
else:
20152008
# Scalar Pandas UDF handles struct type arguments as pandas DataFrames instead of
20162009
# pandas Series. See SPARK-27240.
@@ -2388,7 +2381,6 @@ def mapper(a):
23882381
df2_keys = [a[1][o] for o in parsed_offsets[1][0]]
23892382
df2_vals = [a[1][o] for o in parsed_offsets[1][1]]
23902383
return f(df1_keys, df1_vals, df2_keys, df2_vals)
2391-
23922384
elif eval_type == PythonEvalType.SQL_COGROUPED_MAP_ARROW_UDF:
23932385
import pyarrow as pa
23942386

0 commit comments

Comments
 (0)