Skip to content

Commit 95de884

Browse files
plamutemar-kar
authored andcommitted
BigQuery: Raise helpful error when loading table from dataframe with STRUCT columns (googleapis#9053)
* Issue warning if no schema when loading from DF * Raise error if serializing DF with struct fields * Rewrite test assertion to make coverage happy * Make the unsupported type message more general * Remove warning on missing schema The warning will be added once the support for partial schemas and automatic schema detection is implemented.
1 parent aad16d2 commit 95de884

File tree

2 files changed

+45
-0
lines changed

2 files changed

+45
-0
lines changed

bigquery/google/cloud/bigquery/client.py

+11
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@
6060
from google.cloud.bigquery.retry import DEFAULT_RETRY
6161
from google.cloud.bigquery.routine import Routine
6262
from google.cloud.bigquery.routine import RoutineReference
63+
from google.cloud.bigquery.schema import _STRUCT_TYPES
6364
from google.cloud.bigquery.schema import SchemaField
6465
from google.cloud.bigquery.table import _table_arg_to_table
6566
from google.cloud.bigquery.table import _table_arg_to_table_ref
@@ -1529,6 +1530,15 @@ def load_table_from_dataframe(
15291530
os.close(tmpfd)
15301531

15311532
try:
1533+
if job_config.schema:
1534+
for field in job_config.schema:
1535+
if field.field_type in _STRUCT_TYPES:
1536+
raise ValueError(
1537+
"Uploading dataframes with struct (record) column types "
1538+
"is not supported. See: "
1539+
"https://github.com/googleapis/google-cloud-python/issues/8191"
1540+
)
1541+
15321542
if pyarrow and job_config.schema:
15331543
if parquet_compression == "snappy": # adjust the default value
15341544
parquet_compression = parquet_compression.upper()
@@ -1548,6 +1558,7 @@ def load_table_from_dataframe(
15481558
PendingDeprecationWarning,
15491559
stacklevel=2,
15501560
)
1561+
15511562
dataframe.to_parquet(tmppath, compression=parquet_compression)
15521563

15531564
with open(tmppath, "rb") as parquet_file:

bigquery/tests/unit/test_client.py

+34
Original file line numberDiff line numberDiff line change
@@ -5328,6 +5328,40 @@ def test_load_table_from_dataframe_w_custom_job_config(self):
53285328
assert sent_config is job_config
53295329
assert sent_config.source_format == job.SourceFormat.PARQUET
53305330

5331+
@unittest.skipIf(pandas is None, "Requires `pandas`")
5332+
@unittest.skipIf(pyarrow is None, "Requires `pyarrow`")
5333+
def test_load_table_from_dataframe_struct_fields_error(self):
5334+
from google.cloud.bigquery import job
5335+
from google.cloud.bigquery.schema import SchemaField
5336+
5337+
client = self._make_client()
5338+
5339+
records = [{"float_column": 3.14, "struct_column": [{"foo": 1}, {"bar": -1}]}]
5340+
dataframe = pandas.DataFrame(data=records)
5341+
5342+
schema = [
5343+
SchemaField("float_column", "FLOAT"),
5344+
SchemaField(
5345+
"agg_col",
5346+
"RECORD",
5347+
fields=[SchemaField("foo", "INTEGER"), SchemaField("bar", "INTEGER")],
5348+
),
5349+
]
5350+
job_config = job.LoadJobConfig(schema=schema)
5351+
5352+
load_patch = mock.patch(
5353+
"google.cloud.bigquery.client.Client.load_table_from_file", autospec=True
5354+
)
5355+
5356+
with pytest.raises(ValueError) as exc_info, load_patch:
5357+
client.load_table_from_dataframe(
5358+
dataframe, self.TABLE_REF, job_config=job_config, location=self.LOCATION
5359+
)
5360+
5361+
err_msg = str(exc_info.value)
5362+
assert "struct" in err_msg
5363+
assert "not support" in err_msg
5364+
53315365
@unittest.skipIf(pandas is None, "Requires `pandas`")
53325366
@unittest.skipIf(pyarrow is None, "Requires `pyarrow`")
53335367
def test_load_table_from_dataframe_w_schema_wo_pyarrow(self):

0 commit comments

Comments
 (0)