Skip to content

Commit d26ace9

Browse files
committed
Fix some str accessor methods
1 parent 4bef69c commit d26ace9

File tree

9 files changed

+177
-43
lines changed

9 files changed

+177
-43
lines changed

pandas/_libs/lib.pyx

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2786,17 +2786,21 @@ def maybe_convert_objects(ndarray[object] objects,
27862786
seen.object_ = True
27872787

27882788
elif seen.str_:
2789-
if convert_to_nullable_dtype and is_string_array(objects, skipna=True):
2790-
from pandas.core.arrays.string_ import StringDtype
2789+
if is_string_array(objects, skipna=True):
2790+
if convert_to_nullable_dtype:
2791+
from pandas.core.arrays.string_ import StringDtype
27912792

2792-
dtype = StringDtype()
2793-
return dtype.construct_array_type()._from_sequence(objects, dtype=dtype)
2793+
if using_string_dtype():
2794+
dtype = StringDtype(na_value=np.nan)
2795+
else:
2796+
dtype = StringDtype()
2797+
return dtype.construct_array_type()._from_sequence(objects, dtype=dtype)
27942798

2795-
elif using_string_dtype() and is_string_array(objects, skipna=True):
2796-
from pandas.core.arrays.string_ import StringDtype
2799+
elif using_string_dtype():
2800+
from pandas.core.arrays.string_ import StringDtype
27972801

2798-
dtype = StringDtype(na_value=np.nan)
2799-
return dtype.construct_array_type()._from_sequence(objects, dtype=dtype)
2802+
dtype = StringDtype(na_value=np.nan)
2803+
return dtype.construct_array_type()._from_sequence(objects, dtype=dtype)
28002804

28012805
seen.object_ = True
28022806
elif seen.interval_:

pandas/core/arrays/datetimelike.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -973,7 +973,13 @@ def _cmp_method(self, other, op):
973973
try:
974974
other = self._validate_comparison_value(other)
975975
except InvalidComparison:
976-
return invalid_comparison(self, other, op)
976+
res = invalid_comparison(self, other, op)
977+
if get_option("mode.pdep16_data_types"):
978+
res = pd_array(res)
979+
o_mask = isna(other)
980+
mask = self._isnan | o_mask
981+
res[mask] = res.dtype.na_value
982+
return res
977983

978984
dtype = getattr(other, "dtype", None)
979985
if is_object_dtype(dtype):
@@ -982,12 +988,18 @@ def _cmp_method(self, other, op):
982988
result = ops.comp_method_OBJECT_ARRAY(
983989
op, np.asarray(self.astype(object)), other
984990
)
991+
if get_option("mode.pdep16_data_types"):
992+
result = pd_array(result)
993+
result[self.isna()] = result.dtype.na_value
985994
return result
986995
if other is NaT:
987996
if op is operator.ne:
988997
result = np.ones(self.shape, dtype=bool)
989998
else:
990999
result = np.zeros(self.shape, dtype=bool)
1000+
if get_option("mode.pdep16_data_types"):
1001+
result = pd_array(result)
1002+
result[self.isna()] = result.dtype.na_value
9911003
return result
9921004

9931005
if not isinstance(self.dtype, PeriodDtype):
@@ -1018,6 +1030,10 @@ def _cmp_method(self, other, op):
10181030
nat_result = op is operator.ne
10191031
np.putmask(result, mask, nat_result)
10201032

1033+
if get_option("mode.pdep16_data_types"):
1034+
result = pd_array(result)
1035+
if mask.any():
1036+
result[mask] = result.dtype.na_value
10211037
return result
10221038

10231039
# pow is invalid for all three subclasses; TimedeltaArray will override

pandas/core/arrays/string_.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -444,6 +444,9 @@ def _str_map(
444444
elif dtype == np.dtype("bool"):
445445
# GH#55736
446446
na_value = bool(na_value)
447+
448+
dtype = pandas_dtype(dtype)
449+
pass_dtype = dtype.numpy_dtype
447450
result = lib.map_infer_mask(
448451
arr,
449452
f,
@@ -453,7 +456,7 @@ def _str_map(
453456
# error: Argument 1 to "dtype" has incompatible type
454457
# "Union[ExtensionDtype, str, dtype[Any], Type[object]]"; expected
455458
# "Type[object]"
456-
dtype=np.dtype(cast(type, dtype)),
459+
dtype=np.dtype(cast(type, pass_dtype)),
457460
)
458461

459462
if not na_value_is_na:

pandas/core/arrays/timedeltas.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99

1010
import numpy as np
1111

12+
from pandas._config import get_option
13+
1214
from pandas._libs import (
1315
lib,
1416
tslibs,
@@ -59,6 +61,7 @@
5961
from pandas.core.arrays import datetimelike as dtl
6062
from pandas.core.arrays._ranges import generate_regular_range
6163
import pandas.core.common as com
64+
from pandas.core.construction import array as pd_array
6265
from pandas.core.ops.common import unpack_zerodim_and_defer
6366

6467
if TYPE_CHECKING:
@@ -528,10 +531,13 @@ def _scalar_divlike_op(self, other, op):
528531
# specifically timedelta64-NaT
529532
res = np.empty(self.shape, dtype=np.float64)
530533
res.fill(np.nan)
531-
return res
532534

533-
# otherwise, dispatch to Timedelta implementation
534-
return op(self._ndarray, other)
535+
else:
536+
# otherwise, dispatch to Timedelta implementation
537+
res = op(self._ndarray, other)
538+
if get_option("mode.pdep16_data_types"):
539+
res = pd_array(res)
540+
return res
535541

536542
else:
537543
# caller is responsible for checking lib.is_scalar(other)
@@ -585,6 +591,8 @@ def _vector_divlike_op(self, other, op) -> np.ndarray | Self:
585591
result = result.astype(np.float64)
586592
np.putmask(result, mask, np.nan)
587593

594+
if get_option("mode.pdep16_data_types"):
595+
result = pd_array(result)
588596
return result
589597

590598
@unpack_zerodim_and_defer("__truediv__")

pandas/core/indexes/base.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7137,7 +7137,7 @@ def _cmp_method(self, other, op):
71377137
"""
71387138
Wrapper used to dispatch comparison operations.
71397139
"""
7140-
if self.is_(other):
7140+
if False: # self.is_(other):
71417141
# fastpath
71427142
if op in {operator.eq, operator.le, operator.ge}:
71437143
arr = np.ones(len(self), dtype=bool)

pandas/core/strings/object_array.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,10 @@ def _str_map(
7474
na_value = self.dtype.na_value # type: ignore[attr-defined]
7575

7676
if not len(self):
77+
if dtype == "Int64":
78+
from pandas.core.construction import array as pd_array
79+
80+
return pd_array([], dtype=dtype)
7781
return np.array([], dtype=dtype)
7882

7983
arr = np.asarray(self, dtype=object)
@@ -110,12 +114,17 @@ def g(x):
110114
np.putmask(result, mask, na_value)
111115
if convert and result.dtype == object:
112116
result = lib.maybe_convert_objects(result)
117+
118+
if dtype == "Int64":
119+
from pandas.core.construction import array as pd_array
120+
121+
return pd_array(result, dtype=dtype)
113122
return result
114123

115124
def _str_count(self, pat, flags: int = 0):
116125
regex = re.compile(pat, flags=flags)
117126
f = lambda x: len(regex.findall(x))
118-
return self._str_map(f, dtype="int64")
127+
return self._str_map(f, dtype="Int64")
119128

120129
def _str_pad(
121130
self,
@@ -298,7 +307,7 @@ def _str_find_(self, sub, start, end, side):
298307
f = lambda x: getattr(x, method)(sub, start)
299308
else:
300309
f = lambda x: getattr(x, method)(sub, start, end)
301-
return self._str_map(f, dtype="int64")
310+
return self._str_map(f, dtype="Int64")
302311

303312
def _str_findall(self, pat, flags: int = 0):
304313
regex = re.compile(pat, flags=flags)
@@ -319,14 +328,14 @@ def _str_index(self, sub, start: int = 0, end=None):
319328
f = lambda x: x.index(sub, start, end)
320329
else:
321330
f = lambda x: x.index(sub, start, end)
322-
return self._str_map(f, dtype="int64")
331+
return self._str_map(f, dtype="Int64")
323332

324333
def _str_rindex(self, sub, start: int = 0, end=None):
325334
if end:
326335
f = lambda x: x.rindex(sub, start, end)
327336
else:
328337
f = lambda x: x.rindex(sub, start, end)
329-
return self._str_map(f, dtype="int64")
338+
return self._str_map(f, dtype="Int64")
330339

331340
def _str_join(self, sep: str):
332341
return self._str_map(sep.join)
@@ -339,7 +348,7 @@ def _str_rpartition(self, sep: str, expand):
339348
return self._str_map(lambda x: x.rpartition(sep), dtype="object")
340349

341350
def _str_len(self):
342-
return self._str_map(len, dtype="int64")
351+
return self._str_map(len, dtype="Int64")
343352

344353
def _str_slice(self, start=None, stop=None, step=None):
345354
obj = slice(start, stop, step)

0 commit comments

Comments
 (0)