We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Validating a column of time zone aware datetimes fail when there are multiple time zones.
Note: Please read this guide detailing how to provide the necessary information for us to reproduce your bug.
import pandera as pa from pandera.typing import Series import pandas as pd class Model(pa.SchemaModel): timestamp: Series[pd.DatetimeTZDtype] = pa.Field( dtype_kwargs={"unit": "ns", "tz": "America/Chicago"} ) class Config: coerce = True strict = False df = pd.DataFrame( [ [pd.to_datetime("2023-03-01 13:00:00").tz_localize("America/Chicago")], [pd.to_datetime("2023-03-01 13:00:00").tz_localize("America/New_York")], ], columns=["timestamp"], ) Model.validate(df) SchemaErrors Traceback (most recent call last) File ~/miniconda3/envs/dev/lib/python3.9/site-packages/pandera/backends/pandas/container.py:81, in DataFrameSchemaBackend.validate(self, check_obj, schema, head, tail, sample, random_state, lazy, inplace) 80 try: ---> 81 check_obj = parser(check_obj, *args) 82 except SchemaError as exc: File ~/miniconda3/envs/dev/lib/python3.9/site-packages/pandera/backends/pandas/container.py:532, in DataFrameSchemaBackend.coerce_dtype(self, check_obj, schema) 529 if error_handler.collected_errors: 530 # raise SchemaErrors if this method is called without an 531 # error_handler --> 532 raise SchemaErrors( 533 schema=schema, 534 schema_errors=error_handler.collected_errors, 535 data=check_obj, 536 ) 538 return check_obj SchemaErrors: Schema Model: A total of 1 schema errors were found. Error Counts ------------ - SchemaErrorReason.SCHEMA_COMPONENT_CHECK: 1 Schema Error Summary -------------------- Empty DataFrame Columns: [failure_cases, n_failure_cases] Index: [] Usage Tip --------- Directly inspect all errors by catching the exception: `` try: schema.validate(dataframe, lazy=True) except SchemaErrors as err: err.failure_cases # dataframe of schema errors err.data # invalid dataframe `` The above exception was the direct cause of the following exception: SchemaError Traceback (most recent call last) Cell In[1], line 23 13 strict = False 16 df = pd.DataFrame( 17 [ 18 [pd.to_datetime("2023-03-01 13:00:00").tz_localize("America/Chicago")], (...) 21 columns=["timestamp"], 22 ) ---> 23 Model.validate(df) File ~/miniconda3/envs/dev/lib/python3.9/site-packages/pandera/api/pandas/model.py:306, in DataFrameModel.validate(cls, check_obj, head, tail, sample, random_state, lazy, inplace) 291 @classmethod 292 @docstring_substitution(validate_doc=DataFrameSchema.validate.__doc__) 293 def validate( (...) 301 inplace: bool = False, 302 ) -> DataFrameBase[TDataFrameModel]: 303 """%(validate_doc)s""" 304 return cast( 305 DataFrameBase[TDataFrameModel], --> 306 cls.to_schema().validate( 307 check_obj, head, tail, sample, random_state, lazy, inplace 308 ), 309 ) File ~/miniconda3/envs/dev/lib/python3.9/site-packages/pandera/api/pandas/container.py:366, in DataFrameSchema.validate(self, check_obj, head, tail, sample, random_state, lazy, inplace) 354 check_obj = check_obj.map_partitions( # type: ignore [operator] 355 self._validate, 356 head=head, (...) 362 meta=check_obj, 363 ) 364 return check_obj.pandera.add_schema(self) --> 366 return self._validate( 367 check_obj=check_obj, 368 head=head, 369 tail=tail, 370 sample=sample, 371 random_state=random_state, 372 lazy=lazy, 373 inplace=inplace, 374 ) File ~/miniconda3/envs/dev/lib/python3.9/site-packages/pandera/api/pandas/container.py:395, in DataFrameSchema._validate(self, check_obj, head, tail, sample, random_state, lazy, inplace) 386 if self._is_inferred: 387 warnings.warn( 388 f"This {type(self)} is an inferred schema that hasn't been " 389 "modified. It's recommended that you refine the schema " (...) 392 UserWarning, 393 ) --> 395 return self.get_backend(check_obj).validate( 396 check_obj, 397 schema=self, 398 head=head, 399 tail=tail, 400 sample=sample, 401 random_state=random_state, 402 lazy=lazy, 403 inplace=inplace, 404 ) File ~/miniconda3/envs/dev/lib/python3.9/site-packages/pandera/backends/pandas/container.py:85, in DataFrameSchemaBackend.validate(self, check_obj, schema, head, tail, sample, random_state, lazy, inplace) 83 error_handler.collect_error(exc.reason_code, exc) 84 except SchemaErrors as exc: ---> 85 error_handler.collect_errors(exc) 87 # We may have modified columns, for example by 88 # add_missing_columns, so regenerate column info 89 column_info = self.collect_column_info(check_obj, schema) File ~/miniconda3/envs/dev/lib/python3.9/site-packages/pandera/error_handlers.py:63, in SchemaErrorHandler.collect_errors(self, schema_errors, original_exc) 56 """Collect schema errors from a SchemaErrors exception. 57 58 :param reason_code: string representing reason for error. 59 :param schema_error: ``SchemaError`` object. 60 :param original_exc: original exception associated with the SchemaError. 61 """ 62 for schema_error in schema_errors.schema_errors: ---> 63 self.collect_error( 64 schema_error.reason_code, 65 schema_error, 66 original_exc or schema_errors, 67 ) File ~/miniconda3/envs/dev/lib/python3.9/site-packages/pandera/error_handlers.py:38, in SchemaErrorHandler.collect_error(self, reason_code, schema_error, original_exc) 31 """Collect schema error, raising exception if lazy is False. 32 33 :param reason_code: string representing reason for error. 34 :param schema_error: ``SchemaError`` object. 35 :param original_exc: original exception associated with the SchemaError. 36 """ 37 if not self._lazy: ---> 38 raise schema_error from original_exc 40 # delete data of validated object from SchemaError object to prevent 41 # storing copies of the validated DataFrame/Series for every 42 # SchemaError collected. 43 del schema_error.data File ~/miniconda3/envs/dev/lib/python3.9/site-packages/pandera/backends/pandas/container.py:576, in DataFrameSchemaBackend._coerce_dtype_helper.<locals>._try_coercion(coerce_fn, obj) 574 def _try_coercion(coerce_fn, obj): 575 try: --> 576 return coerce_fn(obj) 577 except SchemaError as exc: 578 error_handler.collect_error( 579 SchemaErrorReason.DATATYPE_COERCION, 580 exc, 581 ) File ~/miniconda3/envs/dev/lib/python3.9/site-packages/pandera/api/pandas/array.py:146, in ArraySchema.coerce_dtype(self, check_obj) 136 def coerce_dtype( 137 self, 138 check_obj: Union[pd.Series, pd.Index], 139 ) -> Union[pd.Series, pd.Index]: 140 """Coerce type of a pd.Series by type specified in dtype. 141 142 :param pd.Series series: One-dimensional ndarray with axis labels 143 (including time series). 144 :returns: ``Series`` with coerced data type 145 """ --> 146 return self.get_backend(check_obj).coerce_dtype(check_obj, schema=self) File ~/miniconda3/envs/dev/lib/python3.9/site-packages/pandera/backends/pandas/components.py:194, in ColumnBackend.coerce_dtype(self, check_obj, schema) 190 # pylint: disable=super-with-arguments 191 # pylint: disable=fixme 192 # TODO: use singledispatchmethod here 193 if is_field(check_obj) or is_index(check_obj): --> 194 return super(ColumnBackend, self).coerce_dtype( 195 check_obj, 196 schema=schema, 197 ) 198 return check_obj.apply( 199 lambda x: super(ColumnBackend, self).coerce_dtype( 200 x, (...) 203 axis="columns", 204 ) File ~/miniconda3/envs/dev/lib/python3.9/site-packages/pandera/backends/pandas/array.py:177, in ArraySchemaBackend.coerce_dtype(self, check_obj, schema) 175 return schema.dtype.try_coerce(check_obj) 176 except ParserError as exc: --> 177 raise SchemaError( 178 schema=schema, 179 data=check_obj, 180 message=( 181 f"Error while coercing '{schema.name}' to type " 182 f"{schema.dtype}: {exc}:\n{exc.failure_cases}" 183 ), 184 failure_cases=exc.failure_cases, 185 check=f"coerce_dtype('{schema.dtype}')", 186 ) from exc SchemaError: Error while coercing 'timestamp' to type datetime64[ns, America/Chicago]: Could not coerce <class 'pandas.core.series.Series'> data_container into type datetime64[ns, America/Chicago]: Empty DataFrame Columns: [index, failure_case] Index: []
All the timezoe get converted ot the target time zone
If applicable, add screenshots to help explain your problem.
Add any other context about the problem here.
The text was updated successfully, but these errors were encountered:
bump on this. another example
import pandas as pd df_1 = pd.DataFrame({"ts":["2023-10-30T11:27:20.082372+01:00", "2023-10-30T10:02:24.800916+01:00", "2023-10-30T07:37:01.052617+01:00", "2023-10-30T07:03:02.975448+01:00", "2023-10-27T15:37:25.562608+02:00", "2023-10-27T15:10:05.190293+02:00", "2023-10-27T12:44:23.609281+02:00", "2023-10-27T12:32:16.41568+02:00"]}) pd.to_datetime(df_1['ts']) # succeeds from pandera import Column, DataFrameSchema, Timestamp schema = DataFrameSchema( { "ts": Column(Timestamp) }, coerce=True ) schema.validate(df_1) # raises df_2 = pd.DataFrame({"ts":["2023-10-30T11:27:20.082372+01:00", "2023-10-30T10:02:24.800916+01:00", "2023-10-30T07:37:01.052617+01:00", "2023-10-30T07:03:02.975448+01:00",]}) schema.validate(df_2) # succeeds
Sorry, something went wrong.
Successfully merging a pull request may close this issue.
Uh oh!
There was an error while loading. Please reload this page.
Validating a column of time zone aware datetimes fail when there are multiple time zones.
Note: Please read this guide detailing how to provide the necessary information for us to reproduce your bug.
Code Sample, a copy-pastable example
Expected behavior
All the timezoe get converted ot the target time zone
Desktop (please complete the following information):
Screenshots
If applicable, add screenshots to help explain your problem.
Additional context
Add any other context about the problem here.
The text was updated successfully, but these errors were encountered: