refactor(api)!: align signatures of read_csv method; sources are positional-only, everything else is required-keyword

cpcloud · cpcloud · commit 681378c8b5d5 · 2025-02-05T05:21:20.000-05:00
diff --git a/ibis/backends/__init__.py b/ibis/backends/__init__.py
@@ -368,7 +368,7 @@ def read_parquet(
         )
 
     def read_csv(
-        self, path: str | Path, table_name: str | None = None, **kwargs: Any
+        self, path: str | Path, /, *, table_name: str | None = None, **kwargs: Any
     ) -> ir.Table:
         """Register a CSV file as a table in the current backend.
 
@@ -386,7 +386,6 @@ def read_csv(
         -------
         ir.Table
             The just-registered table
-
         """
         raise NotImplementedError(
             f"{self.name} does not support direct registration of CSV data."
diff --git a/ibis/backends/bigquery/__init__.py b/ibis/backends/bigquery/__init__.py
@@ -270,7 +270,7 @@ def read_parquet(
         )
 
     def read_csv(
-        self, path: str | Path, table_name: str | None = None, **kwargs: Any
+        self, path: str | Path, /, *, table_name: str | None = None, **kwargs: Any
     ) -> ir.Table:
         """Read CSV data into a BigQuery table.
 
@@ -288,7 +288,6 @@ def read_csv(
         -------
         Table
             An Ibis table expression
-
         """
         job_config = bq.LoadJobConfig(
             source_format=bq.SourceFormat.CSV,
diff --git a/ibis/backends/clickhouse/__init__.py b/ibis/backends/clickhouse/__init__.py
@@ -611,6 +611,8 @@ def read_parquet(
     def read_csv(
         self,
         path: str | Path,
+        /,
+        *,
         table_name: str | None = None,
         engine: str = "MergeTree",
         **kwargs: Any,
diff --git a/ibis/backends/datafusion/__init__.py b/ibis/backends/datafusion/__init__.py
@@ -394,15 +394,17 @@ def _register_in_memory_table(self, op: ops.InMemoryTable) -> None:
 
     def read_csv(
         self,
-        source_list: str | Path | list[str | Path] | tuple[str | Path],
+        paths: str | Path | list[str | Path] | tuple[str | Path],
+        /,
+        *,
         table_name: str | None = None,
         **kwargs: Any,
     ) -> ir.Table:
         """Register a CSV file as a table in the current database.
 
         Parameters
         ----------
-        source_list
+        paths
             The data source. A string or Path to the CSV file.
         table_name
             An optional name to use for the created table. This defaults to
@@ -414,9 +416,8 @@ def read_csv(
         -------
         ir.Table
             The just-registered table
-
         """
-        path = normalize_filenames(source_list)
+        path = normalize_filenames(paths)
         table_name = table_name or gen_name("read_csv")
         # Our other backends support overwriting views / tables when re-registering
         self.con.deregister_table(table_name)
diff --git a/ibis/backends/duckdb/__init__.py b/ibis/backends/duckdb/__init__.py
@@ -567,7 +567,9 @@ def read_json(
 
     def read_csv(
         self,
-        source_list: str | list[str] | tuple[str],
+        paths: str | list[str] | tuple[str],
+        /,
+        *,
         table_name: str | None = None,
         columns: Mapping[str, str | dt.DataType] | None = None,
         types: Mapping[str, str | dt.DataType] | None = None,
@@ -577,7 +579,7 @@ def read_csv(
 
         Parameters
         ----------
-        source_list
+        paths
             The data source(s). May be a path to a file or directory of CSV
             files, or an iterable of CSV files.
         table_name
@@ -644,17 +646,14 @@ def read_csv(
         │     2.0 │     3.0 │ <POINT (2 3)>        │
         └─────────┴─────────┴──────────────────────┘
         """
-        source_list = util.normalize_filenames(source_list)
+        paths = util.normalize_filenames(paths)
 
         if not table_name:
             table_name = util.gen_name("read_csv")
 
         # auto_detect and columns collide, so we set auto_detect=True
         # unless COLUMNS has been specified
-        if any(
-            source.startswith(("http://", "https://", "s3://"))
-            for source in source_list
-        ):
+        if any(source.startswith(("http://", "https://", "s3://")) for source in paths):
             self._load_extensions(["httpfs"])
 
         kwargs.setdefault("header", True)
@@ -694,7 +693,7 @@ def make_struct_argument(obj: Mapping[str, str | dt.DataType]) -> sge.Struct:
 
         self._create_temp_view(
             table_name,
-            sg.select(STAR).from_(self.compiler.f.read_csv(source_list, *options)),
+            sg.select(STAR).from_(self.compiler.f.read_csv(paths, *options)),
         )
 
         return self.table(table_name)
diff --git a/ibis/backends/flink/__init__.py b/ibis/backends/flink/__init__.py
@@ -846,6 +846,8 @@ def read_parquet(
     def read_csv(
         self,
         path: str | Path,
+        /,
+        *,
         schema: sch.Schema | None = None,
         table_name: str | None = None,
     ) -> ir.Table:
diff --git a/ibis/backends/polars/__init__.py b/ibis/backends/polars/__init__.py
@@ -127,6 +127,8 @@ def sql(
     def read_csv(
         self,
         path: str | Path | list[str | Path] | tuple[str | Path],
+        /,
+        *,
         table_name: str | None = None,
         **kwargs: Any,
     ) -> ir.Table:
@@ -148,7 +150,6 @@ def read_csv(
         -------
         ir.Table
             The just-registered table
-
         """
         source_list = normalize_filenames(path)
         # Flatten the list if there's only one element because Polars
diff --git a/ibis/backends/pyspark/__init__.py b/ibis/backends/pyspark/__init__.py
@@ -832,15 +832,17 @@ def read_parquet(
 
     def read_csv(
         self,
-        source_list: str | list[str] | tuple[str],
+        paths: str | list[str] | tuple[str],
+        /,
+        *,
         table_name: str | None = None,
         **kwargs: Any,
     ) -> ir.Table:
         """Register a CSV file as a table in the current database.
 
         Parameters
         ----------
-        source_list
+        paths
             The data source(s). May be a path to a file or directory of CSV files, or an
             iterable of CSV files.
         table_name
@@ -854,7 +856,6 @@ def read_csv(
         -------
         ir.Table
             The just-registered table
-
         """
         if self.mode == "streaming":
             raise NotImplementedError(
@@ -863,9 +864,9 @@ def read_csv(
             )
         inferSchema = kwargs.pop("inferSchema", True)
         header = kwargs.pop("header", True)
-        source_list = util.normalize_filenames(source_list)
+        paths = util.normalize_filenames(paths)
         spark_df = self._session.read.csv(
-            source_list, inferSchema=inferSchema, header=header, **kwargs
+            paths, inferSchema=inferSchema, header=header, **kwargs
         )
         table_name = table_name or util.gen_name("read_csv")
 
@@ -1116,6 +1117,8 @@ def to_kafka(
     def read_csv_dir(
         self,
         path: str | Path,
+        /,
+        *,
         table_name: str | None = None,
         watermark: Watermark | None = None,
         **kwargs: Any,
@@ -1139,7 +1142,6 @@ def read_csv_dir(
         -------
         ir.Table
             The just-registered table
-
         """
         inferSchema = kwargs.pop("inferSchema", True)
         header = kwargs.pop("header", True)
diff --git a/ibis/backends/snowflake/__init__.py b/ibis/backends/snowflake/__init__.py
@@ -859,7 +859,7 @@ def create_table(
         return self.table(name, database=(catalog, db))
 
     def read_csv(
-        self, path: str | Path, table_name: str | None = None, **kwargs: Any
+        self, path: str | Path, /, *, table_name: str | None = None, **kwargs: Any
     ) -> ir.Table:
         """Register a CSV file as a table in the Snowflake backend.
 
@@ -877,7 +877,6 @@ def read_csv(
         -------
         Table
             The table that was read from the CSV file
-
         """
         stage = ibis.util.gen_name("stage")
         file_format = ibis.util.gen_name("format")
diff --git a/ibis/expr/api.py b/ibis/expr/api.py
@@ -1481,7 +1481,9 @@ def row_number() -> ir.IntegerColumn:
 
 
 def read_csv(
-    sources: str | Path | Sequence[str | Path],
+    paths: str | Path | Sequence[str | Path],
+    /,
+    *,
     table_name: str | None = None,
     **kwargs: Any,
 ) -> ir.Table:
@@ -1492,7 +1494,7 @@ def read_csv(
 
     Parameters
     ----------
-    sources
+    paths
         A filesystem path or URL or list of same.  Supports CSV and TSV files.
     table_name
         A name to refer to the table.  If not provided, a name will be generated.
@@ -1529,12 +1531,11 @@ def read_csv(
     │     2 │ NULL   │
     │  NULL │ f      │
     └───────┴────────┘
-
     """
     from ibis.config import _default_backend
 
     con = _default_backend()
-    return con.read_csv(sources, table_name=table_name, **kwargs)
+    return con.read_csv(paths, table_name=table_name, **kwargs)
 
 
 @experimental