@@ -412,42 +412,55 @@ def memtable(
412
412
413
413
@lazy_singledispatch
414
414
def _memtable (
415
- data : pd . DataFrame | Any ,
415
+ data : Any ,
416
416
* ,
417
417
columns : Iterable [str ] | None = None ,
418
418
schema : SchemaLike | None = None ,
419
419
name : str | None = None ,
420
420
) -> Table :
421
- import pandas as pd
422
-
423
- from ibis . formats . pandas import PandasDataFrameProxy
421
+ if hasattr ( data , "__arrow_c_stream__" ):
422
+ # Support objects exposing arrow's PyCapsule interface
423
+ import pyarrow as pa
424
424
425
- if not isinstance (data , pd .DataFrame ):
426
- df = pd .DataFrame (data , columns = columns )
425
+ data = pa .table (data )
427
426
else :
428
- df = data
427
+ import pandas as pd
428
+
429
+ data = pd .DataFrame (data , columns = columns )
430
+ return _memtable (data , columns = columns , schema = schema , name = name )
431
+
432
+
433
+ @_memtable .register ("pandas.DataFrame" )
434
+ def _memtable_from_pandas_dataframe (
435
+ data : pd .DataFrame ,
436
+ * ,
437
+ columns : Iterable [str ] | None = None ,
438
+ schema : SchemaLike | None = None ,
439
+ name : str | None = None ,
440
+ ) -> Table :
441
+ from ibis .formats .pandas import PandasDataFrameProxy
429
442
430
- if df .columns .inferred_type != "string" :
431
- cols = df .columns
443
+ if data .columns .inferred_type != "string" :
444
+ cols = data .columns
432
445
newcols = getattr (
433
446
schema ,
434
447
"names" ,
435
448
(f"col{ i :d} " for i in builtins .range (len (cols ))),
436
449
)
437
- df = df .rename (columns = dict (zip (cols , newcols )))
450
+ data = data .rename (columns = dict (zip (cols , newcols )))
438
451
439
452
if columns is not None :
440
- if (provided_col := len (columns )) != (exist_col := len (df .columns )):
453
+ if (provided_col := len (columns )) != (exist_col := len (data .columns )):
441
454
raise ValueError (
442
455
"Provided `columns` must have an entry for each column in `data`.\n "
443
456
f"`columns` has { provided_col } elements but `data` has { exist_col } columns."
444
457
)
445
458
446
- df = df .rename (columns = dict (zip (df .columns , columns )))
459
+ data = data .rename (columns = dict (zip (data .columns , columns )))
447
460
448
461
# verify that the DataFrame has no duplicate column names because ibis
449
462
# doesn't allow that
450
- cols = df .columns
463
+ cols = data .columns
451
464
dupes = [name for name , count in Counter (cols ).items () if count > 1 ]
452
465
if dupes :
453
466
raise IbisInputError (
@@ -456,8 +469,8 @@ def _memtable(
456
469
457
470
op = ops .InMemoryTable (
458
471
name = name if name is not None else util .gen_name ("pandas_memtable" ),
459
- schema = sch .infer (df ) if schema is None else schema ,
460
- data = PandasDataFrameProxy (df ),
472
+ schema = sch .infer (data ) if schema is None else schema ,
473
+ data = PandasDataFrameProxy (data ),
461
474
)
462
475
return op .to_expr ()
463
476
@@ -499,6 +512,21 @@ def _memtable_from_pyarrow_dataset(
499
512
).to_expr ()
500
513
501
514
515
+ @_memtable .register ("pyarrow.RecordBatchReader" )
516
+ def _memtable_from_pyarrow_RecordBatchReader (
517
+ data : pa .Table ,
518
+ * ,
519
+ name : str | None = None ,
520
+ schema : SchemaLike | None = None ,
521
+ columns : Iterable [str ] | None = None ,
522
+ ):
523
+ raise TypeError (
524
+ "Creating an `ibis.memtable` from a `pyarrow.RecordBatchReader` would "
525
+ "load _all_ data into memory. If you want to do this, please do so "
526
+ "explicitly like `ibis.memtable(reader.read_all())`"
527
+ )
528
+
529
+
502
530
@_memtable .register ("polars.LazyFrame" )
503
531
def _memtable_from_polars_lazyframe (data : pl .LazyFrame , ** kwargs ):
504
532
return _memtable_from_polars_dataframe (data .collect (), ** kwargs )
0 commit comments