31
31
from ibis .backends .pyspark .client import PySparkTable
32
32
from ibis .backends .pyspark .compiler import PySparkExprTranslator
33
33
from ibis .backends .pyspark .datatypes import PySparkType
34
+ from ibis .common .temporal import normalize_timezone
35
+ from ibis .formats .pandas import PandasData
34
36
35
37
if TYPE_CHECKING :
36
38
from collections .abc import Mapping , Sequence
@@ -104,6 +106,18 @@ class PySparkCompiler(Compiler):
104
106
table_set_formatter_class = PySparkTableSetFormatter
105
107
106
108
109
+ class PySparkPandasData (PandasData ):
110
+ @classmethod
111
+ def convert_Timestamp_element (cls , dtype ):
112
+ def converter (value , dtype = dtype ):
113
+ if (tz := dtype .timezone ) is not None :
114
+ return value .astimezone (normalize_timezone (tz ))
115
+
116
+ return value .astimezone (normalize_timezone ("UTC" )).replace (tzinfo = None )
117
+
118
+ return converter
119
+
120
+
107
121
class Backend (BaseSQLBackend , CanCreateDatabase ):
108
122
compiler = PySparkCompiler
109
123
name = "pyspark"
@@ -219,7 +233,9 @@ def execute(self, expr: ir.Expr, **kwargs: Any) -> Any:
219
233
df = self .compile (table_expr , ** kwargs ).toPandas ()
220
234
221
235
# TODO: remove the extra conversion
222
- return expr .__pandas_result__ (table_expr .__pandas_result__ (df ))
236
+ return expr .__pandas_result__ (
237
+ PySparkPandasData .convert_table (df , table_expr .schema ())
238
+ )
223
239
224
240
def _fully_qualified_name (self , name , database ):
225
241
if is_fully_qualified (name ):
@@ -232,17 +248,15 @@ def close(self):
232
248
self ._context .stop ()
233
249
234
250
def fetch_from_cursor (self , cursor , schema ):
235
- df = cursor .query .toPandas () # blocks until finished
236
- return schema .apply_to (df )
251
+ return cursor .query .toPandas () # blocks until finished
237
252
238
253
def raw_sql (self , query : str ) -> _PySparkCursor :
239
254
query = self ._session .sql (query )
240
255
return _PySparkCursor (query )
241
256
242
257
def _get_schema_using_query (self , query ):
243
258
cursor = self .raw_sql (f"SELECT * FROM ({ query } ) t0 LIMIT 0" )
244
- struct = PySparkType .to_ibis (cursor .query .schema )
245
- return sch .Schema (struct )
259
+ return sch .Schema (PySparkType .to_ibis (cursor .query .schema ))
246
260
247
261
def _get_jtable (self , name , database = None ):
248
262
get_table = self ._catalog ._jcatalog .getTable
0 commit comments