7
7
import warnings
8
8
from typing import (
9
9
Any ,
10
- Dict ,
11
10
Iterable ,
12
11
Literal ,
13
12
Mapping ,
16
15
Tuple ,
17
16
Type ,
18
17
Union ,
18
+ TypedDict ,
19
19
)
20
20
21
21
import polars as pl
22
22
from packaging import version
23
23
from polars ._typing import ColumnNameOrSelector , PythonDataType
24
24
from polars .datatypes import DataTypeClass
25
+ from typing_extensions import NotRequired
25
26
26
27
from pandera import dtypes , errors
27
28
from pandera .api .polars .types import PolarsData
38
39
pl .exceptions .ComputeError ,
39
40
)
40
41
41
-
42
42
SchemaDict = Mapping [str , PolarsDataType ]
43
43
44
44
@@ -565,6 +565,11 @@ def from_parametrized_dtype(cls, polars_dtype: pl.Duration):
565
565
###############################################################################
566
566
567
567
568
+ class _ArrayKwargs (TypedDict ):
569
+ shape : NotRequired [Union [int | tuple [int , ...]]]
570
+ width : NotRequired [Union [int , None ]]
571
+
572
+
568
573
@Engine .register_dtype (equivalents = [pl .Array ])
569
574
@immutable (init = True )
570
575
class Array (DataType ):
@@ -580,13 +585,15 @@ def __init__( # pylint:disable=super-init-not-called
580
585
width : Optional [int ] = None ,
581
586
) -> None :
582
587
583
- kwargs : Dict [str , Union [int , Tuple [int , ...]]] = {}
584
- if width is not None :
588
+ kwargs : _ArrayKwargs = {}
589
+ if (
590
+ width is not None
591
+ ): # width deprecated in polars 0.20.31, replaced by shape
585
592
kwargs ["shape" ] = width
586
593
elif shape is not None :
587
594
kwargs ["shape" ] = shape
588
595
589
- if inner or shape or width :
596
+ if inner and ( shape or width ) :
590
597
object .__setattr__ (self , "type" , pl .Array (inner = inner , ** kwargs ))
591
598
592
599
@classmethod
@@ -677,11 +684,9 @@ class Categorical(DataType):
677
684
678
685
type = pl .Categorical
679
686
680
- ordering = None
681
-
682
687
def __init__ ( # pylint:disable=super-init-not-called
683
688
self ,
684
- ordering : Literal ["physical" , "lexical" ] = "physical" ,
689
+ ordering : Optional [ Literal ["physical" , "lexical" ] ] = "physical" ,
685
690
) -> None :
686
691
object .__setattr__ (self , "ordering" , ordering )
687
692
object .__setattr__ (self , "type" , pl .Categorical (ordering = ordering ))
@@ -706,8 +711,9 @@ def __init__( # pylint:disable=super-init-not-called
706
711
self ,
707
712
categories : Union [pl .Series , Iterable [str ], None ] = None ,
708
713
) -> None :
709
- object .__setattr__ (self , "categories" , categories )
710
- object .__setattr__ (self , "type" , pl .Enum (categories = categories ))
714
+ if categories is not None :
715
+ object .__setattr__ (self , "categories" , categories )
716
+ object .__setattr__ (self , "type" , pl .Enum (categories = categories ))
711
717
712
718
@classmethod
713
719
def from_parametrized_dtype (cls , polars_dtype : pl .Enum ):
@@ -781,8 +787,10 @@ def try_coerce(self, data_container: PolarsDataContainer) -> pl.LazyFrame:
781
787
return self .coerce (data_container )
782
788
except Exception as exc : # pylint:disable=broad-except
783
789
is_coercible : pl .LazyFrame = polars_object_coercible (
784
- data_container , self .type
785
- ) & self .__belongs_to_categories (
790
+ data_container ,
791
+ self .type ,
792
+ # TODO this is incorrect, appears not to be covered by tests
793
+ ) & self .__belongs_to_categories ( # type: ignore # TEMP ONLY!
786
794
data_container .lazyframe , key = data_container .key
787
795
)
788
796
@@ -800,7 +808,9 @@ def __belongs_to_categories(
800
808
lf : pl .LazyFrame ,
801
809
key : Optional [str ] = None ,
802
810
) -> pl .LazyFrame :
803
- return lf .select (pl .col (key or "*" ).is_in (self .categories ))
811
+ # self.categories not None here.
812
+ expr = pl .col (key or "*" ).is_in (self .categories ) # type: ignore[arg-type]
813
+ return lf .select (expr )
804
814
805
815
def __str__ (self ):
806
816
return "Category"
0 commit comments