20
20
import ibis
21
21
import ibis .backends .sql .compilers as sc
22
22
import ibis .common .exceptions as exc
23
+ import ibis .expr .datatypes as dt
23
24
import ibis .expr .operations as ops
24
25
import ibis .expr .schema as sch
25
26
import ibis .expr .types as ir
@@ -637,27 +638,80 @@ def read_csv(
637
638
self ,
638
639
source_list : str | list [str ] | tuple [str ],
639
640
table_name : str | None = None ,
641
+ columns : Mapping [str , str | dt .DataType ] | None = None ,
642
+ types : Mapping [str , str | dt .DataType ] | None = None ,
640
643
** kwargs : Any ,
641
644
) -> ir .Table :
642
645
"""Register a CSV file as a table in the current database.
643
646
644
647
Parameters
645
648
----------
646
649
source_list
647
- The data source(s). May be a path to a file or directory of CSV files, or an
648
- iterable of CSV files.
650
+ The data source(s). May be a path to a file or directory of CSV
651
+ files, or an iterable of CSV files.
649
652
table_name
650
- An optional name to use for the created table. This defaults to
651
- a sequentially generated name.
653
+ An optional name to use for the created table. This defaults to a
654
+ sequentially generated name.
655
+ columns
656
+ An optional mapping of **all** column names to their types.
657
+ types
658
+ An optional mapping of a **subset** of column names to their types.
652
659
**kwargs
653
- Additional keyword arguments passed to DuckDB loading function.
654
- See https://duckdb.org/docs/data/csv for more information.
660
+ Additional keyword arguments passed to DuckDB loading function. See
661
+ https://duckdb.org/docs/data/csv for more information.
655
662
656
663
Returns
657
664
-------
658
665
ir.Table
659
666
The just-registered table
660
667
668
+ Examples
669
+ --------
670
+ Generate some data
671
+
672
+ >>> import tempfile
673
+ >>> data = b'''
674
+ ... lat,lon,geom
675
+ ... 1.0,2.0,POINT (1 2)
676
+ ... 2.0,3.0,POINT (2 3)
677
+ ... '''
678
+ >>> with tempfile.NamedTemporaryFile(delete=False) as f:
679
+ ... nbytes = f.write(data)
680
+
681
+ Import Ibis
682
+
683
+ >>> import ibis
684
+ >>> from ibis import _
685
+ >>> ibis.options.interactive = True
686
+ >>> con = ibis.duckdb.connect()
687
+
688
+ Read the raw CSV file
689
+
690
+ >>> t = con.read_csv(f.name)
691
+ >>> t
692
+ ┏━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━┓
693
+ ┃ lat ┃ lon ┃ geom ┃
694
+ ┡━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━┩
695
+ │ float64 │ float64 │ string │
696
+ ├─────────┼─────────┼─────────────┤
697
+ │ 1.0 │ 2.0 │ POINT (1 2) │
698
+ │ 2.0 │ 3.0 │ POINT (2 3) │
699
+ └─────────┴─────────┴─────────────┘
700
+
701
+ Load the `spatial` extension and read the CSV file again, using
702
+ specific column types
703
+
704
+ >>> con.load_extension("spatial")
705
+ >>> t = con.read_csv(f.name, types={"geom": "geometry"})
706
+ >>> t
707
+ ┏━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━┓
708
+ ┃ lat ┃ lon ┃ geom ┃
709
+ ┡━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━┩
710
+ │ float64 │ float64 │ geospatial:geometry │
711
+ ├─────────┼─────────┼──────────────────────┤
712
+ │ 1.0 │ 2.0 │ <POINT (1 2)> │
713
+ │ 2.0 │ 3.0 │ <POINT (2 3)> │
714
+ └─────────┴─────────┴──────────────────────┘
661
715
"""
662
716
source_list = util .normalize_filenames (source_list )
663
717
@@ -673,27 +727,35 @@ def read_csv(
673
727
self ._load_extensions (["httpfs" ])
674
728
675
729
kwargs .setdefault ("header" , True )
676
- kwargs ["auto_detect" ] = kwargs .pop ("auto_detect" , " columns" not in kwargs )
730
+ kwargs ["auto_detect" ] = kwargs .pop ("auto_detect" , columns is None )
677
731
# TODO: clean this up
678
732
# We want to _usually_ quote arguments but if we quote `columns` it messes
679
733
# up DuckDB's struct parsing.
680
- options = [
681
- sg .to_identifier (key ).eq (sge .convert (val )) for key , val in kwargs .items ()
682
- ]
683
-
684
- if (columns := kwargs .pop ("columns" , None )) is not None :
685
- options .append (
686
- sg .to_identifier ("columns" ).eq (
687
- sge .Struct (
688
- expressions = [
689
- sge .PropertyEQ (
690
- this = sge .convert (key ), expression = sge .convert (value )
691
- )
692
- for key , value in columns .items ()
693
- ]
694
- )
734
+ options = [C [key ].eq (sge .convert (val )) for key , val in kwargs .items ()]
735
+
736
+ def make_struct_argument (obj : Mapping [str , str | dt .DataType ]) -> sge .Struct :
737
+ expressions = []
738
+ geospatial = False
739
+ type_mapper = self .compiler .type_mapper
740
+
741
+ for name , typ in obj .items ():
742
+ typ = dt .dtype (typ )
743
+ geospatial |= typ .is_geospatial ()
744
+ sgtype = type_mapper .from_ibis (typ )
745
+ prop = sge .PropertyEQ (
746
+ this = sge .to_identifier (name ), expression = sge .convert (sgtype )
695
747
)
696
- )
748
+ expressions .append (prop )
749
+
750
+ if geospatial :
751
+ self ._load_extensions (["spatial" ])
752
+ return sge .Struct (expressions = expressions )
753
+
754
+ if columns is not None :
755
+ options .append (C .columns .eq (make_struct_argument (columns )))
756
+
757
+ if types is not None :
758
+ options .append (C .types .eq (make_struct_argument (types )))
697
759
698
760
self ._create_temp_view (
699
761
table_name ,
0 commit comments