googleapis · chelsea-lin · Sep 5, 2024 · Aug 29, 2024 · Aug 29, 2024 · Sep 1, 2024
@@ -1008,10 +1008,12 @@ def _check_file_size(self, filepath: str):
             blob = bucket.blob(blob_name)
             blob.reload()
             file_size = blob.size
-        else:  # local file path
+        elif os.path.exists(filepath):  # local file path
             file_size = os.path.getsize(filepath)
+        else:
+            file_size = None
 
-        if file_size > max_size:
+        if file_size is not None and file_size > max_size:
             # Convert to GB
             file_size = round(file_size / (1024**3), 1)
             max_size = int(max_size / 1024**3)

@@ -18,6 +18,7 @@
 import dataclasses
 import datetime
 import itertools
+import os
 import typing
 from typing import Dict, Hashable, IO, Iterable, List, Optional, Sequence, Tuple, Union
 
@@ -421,11 +422,16 @@ def _read_bigquery_load_job(
                 load_job = self._bqclient.load_table_from_uri(
                     filepath_or_buffer, table, job_config=job_config
                 )
-            else:
+            elif os.path.exists(filepath_or_buffer):  # local file path
                 with open(filepath_or_buffer, "rb") as source_file:
                     load_job = self._bqclient.load_table_from_file(
                         source_file, table, job_config=job_config
                     )
+            else:
+                raise NotImplementedError(
+                    f"BigQuery engine only supports a local file path or GCS path. "
+                    f"{constants.FEEDBACK_LINK}"
+                )
         else:
             load_job = self._bqclient.load_table_from_file(
                 filepath_or_buffer, table, job_config=job_config

@@ -1036,6 +1036,25 @@ def test_read_csv_local_w_usecols(session, scalars_pandas_df_index, engine):
         assert len(df.columns) == 1
 
 
+@pytest.mark.parametrize(
+    "engine",
+    [
+        pytest.param(
+            "bigquery",
+            id="bq_engine",
+            marks=pytest.mark.xfail(
+                raises=NotImplementedError,
+            ),
+        ),
+        pytest.param(None, id="default_engine"),
+    ],
+)
+def test_read_csv_others(session, engine):
+    uri = "https://raw.githubusercontent.com/googleapis/python-bigquery-dataframes/main/tests/data/people.csv"
+    df = session.read_csv(uri, engine=engine)
+    assert len(df.columns) == 3
+
+
 @pytest.mark.parametrize(
     "engine",
     [

@@ -51,16 +51,16 @@ def read_csv(
         encoding: Optional[str] = None,
         **kwargs,
     ):
-        """Loads DataFrame from comma-separated values (csv) file locally or from
-        Cloud Storage.
+        """Loads data from a comma-separated values (csv) file into a DataFrame.
 
         The CSV file data will be persisted as a temporary BigQuery table, which can be
         automatically recycled after the Session is closed.
 
         .. note::
             using `engine="bigquery"` will not guarantee the same ordering as the
             file. Instead, set a serialized index column as the index and sort by
-            that in the resulting DataFrame.
+            that in the resulting DataFrame. Only files stored on your local machine
+            or in Google Cloud Storage are supported.
 
         .. note::
             For non-bigquery engine, data is inlined in the query SQL if it is