11
11
from typing import TYPE_CHECKING , Any , Literal
12
12
13
13
import duckdb
14
- import pyarrow as pa
15
- import pyarrow_hotfix # noqa: F401
16
14
import sqlglot as sg
17
15
import sqlglot .expressions as sge
18
16
from packaging .version import parse as vparse
26
24
import ibis .expr .types as ir
27
25
from ibis import util
28
26
from ibis .backends import CanCreateDatabase , UrlFromPath
29
- from ibis .backends .duckdb .converter import DuckDBPandasData , DuckDBPyArrowData
30
27
from ibis .backends .sql import SQLBackend
31
28
from ibis .backends .sql .compilers .base import STAR , AlterTable , C , RenameTable
32
29
from ibis .common .dispatch import lazy_singledispatch
37
34
38
35
import pandas as pd
39
36
import polars as pl
37
+ import pyarrow as pa
38
+ import pyarrow_hotfix # noqa: F401
40
39
import torch
41
40
from fsspec import AbstractFileSystem
42
41
@@ -783,48 +782,17 @@ def read_parquet(
783
782
784
783
table_name = table_name or util .gen_name ("read_parquet" )
785
784
786
- # Default to using the native duckdb parquet reader
787
- # If that fails because of auth issues, fall back to ingesting via
788
- # pyarrow dataset
789
- try :
790
- self ._read_parquet_duckdb_native (paths , table_name , ** kwargs )
791
- except duckdb .IOException :
792
- self ._read_parquet_pyarrow_dataset (paths , table_name , ** kwargs )
793
-
794
- return self .table (table_name )
795
-
796
- def _read_parquet_duckdb_native (
797
- self , source_list : str | Iterable [str ], table_name : str , ** kwargs : Any
798
- ) -> None :
799
- if any (
800
- source .startswith (("http://" , "https://" , "s3://" ))
801
- for source in source_list
802
- ):
785
+ if any (path .startswith (("http://" , "https://" , "s3://" )) for path in paths ):
803
786
self ._load_extensions (["httpfs" ])
804
787
805
788
options = [
806
789
sg .to_identifier (key ).eq (sge .convert (val )) for key , val in kwargs .items ()
807
790
]
808
791
self ._create_temp_view (
809
792
table_name ,
810
- sg .select (STAR ).from_ (self .compiler .f .read_parquet (source_list , * options )),
793
+ sg .select (STAR ).from_ (self .compiler .f .read_parquet (paths , * options )),
811
794
)
812
-
813
- def _read_parquet_pyarrow_dataset (
814
- self , source_list : str | Iterable [str ], table_name : str , ** kwargs : Any
815
- ) -> None :
816
- import pyarrow .dataset as ds
817
-
818
- dataset = ds .dataset (list (map (ds .dataset , source_list )), ** kwargs )
819
- self ._load_extensions (["httpfs" ])
820
- # We don't create a view since DuckDB special cases Arrow Datasets
821
- # so if we also create a view we end up with both a "lazy table"
822
- # and a view with the same name
823
- self .con .register (table_name , dataset )
824
- # DuckDB normally auto-detects Arrow Datasets that are defined
825
- # in local variables but the `dataset` variable won't be local
826
- # by the time we execute against this so we register it
827
- # explicitly.
795
+ return self .table (table_name )
828
796
829
797
def read_delta (
830
798
self , path : str | Path , / , * , table_name : str | None = None , ** kwargs : Any
@@ -1288,6 +1256,9 @@ def to_pyarrow_batches(
1288
1256
chunk_size
1289
1257
The number of rows to fetch per batch
1290
1258
"""
1259
+ import pyarrow as pa
1260
+ import pyarrow_hotfix # noqa: F401
1261
+
1291
1262
self ._run_pre_execute_hooks (expr )
1292
1263
table = expr .as_table ()
1293
1264
sql = self .compile (table , limit = limit , params = params )
@@ -1309,6 +1280,8 @@ def to_pyarrow(
1309
1280
limit : int | str | None = None ,
1310
1281
** kwargs : Any ,
1311
1282
) -> pa .Table :
1283
+ from ibis .backends .duckdb .converter import DuckDBPyArrowData
1284
+
1312
1285
table = self ._to_duckdb_relation (
1313
1286
expr , params = params , limit = limit , ** kwargs
1314
1287
).arrow ()
@@ -1326,10 +1299,12 @@ def execute(
1326
1299
"""Execute an expression."""
1327
1300
import pandas as pd
1328
1301
import pyarrow .types as pat
1302
+ import pyarrow_hotfix # noqa: F401
1329
1303
1330
- table = self ._to_duckdb_relation (
1331
- expr , params = params , limit = limit , ** kwargs
1332
- ).arrow ()
1304
+ from ibis .backends .duckdb .converter import DuckDBPandasData
1305
+
1306
+ rel = self ._to_duckdb_relation (expr , params = params , limit = limit , ** kwargs )
1307
+ table = rel .arrow ()
1333
1308
1334
1309
df = pd .DataFrame (
1335
1310
{
0 commit comments