Skip to content

Commit 775e43d

Browse files
author
Ziegler, Julian
committed
SNOW-2019088: Extend write_pandas by a parameter for schema inference
1 parent 985ec5e commit 775e43d

File tree

2 files changed

+28
-22
lines changed

2 files changed

+28
-22
lines changed

DESCRIPTION.md

+1
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ Source code is also available at: https://github.com/snowflakedb/snowflake-conne
1818
- Added `gcs_use_virtual_endpoints` connection property that forces the usage of the virtual GCS usage. Thanks to this it should be possible to set up private DNS entry for the GCS endpoint. See more: https://cloud.google.com/storage/docs/request-endpoints#xml-api
1919
- Fixed a bug that caused driver to fail silently on `TO_DATE` arrow to python conversion when invalid date was followed by the correct one.
2020
- Added `check_arrow_conversion_error_on_every_column` connection property that can be set to `False` to restore previous behaviour in which driver will ignore errors until it occurs in the last column. This flag's purpose is to unblock workflows that may be impacted by the bugfix and will be removed in later releases.
21+
- Added `infer_schema` parameter to `write_pandas` to perform schema inference on the passed data.
2122

2223
- v3.14.0(March 03, 2025)
2324
- Bumped pyOpenSSL dependency upper boundary from <25.0.0 to <26.0.0.

src/snowflake/connector/pandas_tools.py

+27-22
Original file line numberDiff line numberDiff line change
@@ -258,6 +258,7 @@ def write_pandas(
258258
on_error: str = "abort_statement",
259259
parallel: int = 4,
260260
quote_identifiers: bool = True,
261+
infer_schema: bool = False,
261262
auto_create_table: bool = False,
262263
create_temp_table: bool = False,
263264
overwrite: bool = False,
@@ -316,6 +317,8 @@ def write_pandas(
316317
quote_identifiers: By default, identifiers, specifically database, schema, table and column names
317318
(from df.columns) will be quoted. If set to False, identifiers are passed on to Snowflake without quoting.
318319
I.e. identifiers will be coerced to uppercase by Snowflake. (Default value = True)
320+
infer_schema: Perform explicit schema inference on the data in the DataFrame and use the inferred data types
321+
when selecting columns from the DataFrame. (Default value = False)
319322
auto_create_table: When true, will automatically create a table with corresponding columns for each column in
320323
the passed in DataFrame. The table will not be created if it already exists
321324
create_temp_table: (Deprecated) Will make the auto-created table as a temporary table
@@ -481,7 +484,7 @@ def drop_object(name: str, object_type: str) -> None:
481484
num_statements=1,
482485
)
483486

484-
if auto_create_table or overwrite:
487+
if auto_create_table or overwrite or infer_schema:
485488
file_format_location = _create_temp_file_format(
486489
cursor,
487490
database,
@@ -520,27 +523,29 @@ def drop_object(name: str, object_type: str) -> None:
520523
quote_identifiers,
521524
)
522525

523-
iceberg = "ICEBERG " if iceberg_config else ""
524-
iceberg_config_statement = _iceberg_config_statement_helper(
525-
iceberg_config or {}
526-
)
526+
if auto_create_table or overwrite:
527+
iceberg = "ICEBERG " if iceberg_config else ""
528+
iceberg_config_statement = _iceberg_config_statement_helper(
529+
iceberg_config or {}
530+
)
531+
532+
create_table_sql = (
533+
f"CREATE {table_type.upper()} {iceberg}TABLE IF NOT EXISTS identifier(?) "
534+
f"({create_table_columns}) {iceberg_config_statement}"
535+
f" /* Python:snowflake.connector.pandas_tools.write_pandas() */ "
536+
)
537+
params = (target_table_location,)
538+
logger.debug(
539+
f"auto creating table with '{create_table_sql}'. params: %s", params
540+
)
541+
cursor.execute(
542+
create_table_sql,
543+
_is_internal=True,
544+
_force_qmark_paramstyle=True,
545+
params=params,
546+
num_statements=1,
547+
)
527548

528-
create_table_sql = (
529-
f"CREATE {table_type.upper()} {iceberg}TABLE IF NOT EXISTS identifier(?) "
530-
f"({create_table_columns}) {iceberg_config_statement}"
531-
f" /* Python:snowflake.connector.pandas_tools.write_pandas() */ "
532-
)
533-
params = (target_table_location,)
534-
logger.debug(
535-
f"auto creating table with '{create_table_sql}'. params: %s", params
536-
)
537-
cursor.execute(
538-
create_table_sql,
539-
_is_internal=True,
540-
_force_qmark_paramstyle=True,
541-
params=params,
542-
num_statements=1,
543-
)
544549
# need explicit casting when the underlying table schema is inferred
545550
parquet_columns = "$1:" + ",$1:".join(
546551
f"{quote}{snowflake_col}{quote}::{column_type_mapping[col]}"
@@ -577,7 +582,7 @@ def drop_object(name: str, object_type: str) -> None:
577582
f"FILE_FORMAT=("
578583
f"TYPE=PARQUET "
579584
f"COMPRESSION={compression_map[compression]}"
580-
f"{' BINARY_AS_TEXT=FALSE' if auto_create_table or overwrite else ''}"
585+
f"{' BINARY_AS_TEXT=FALSE' if auto_create_table or overwrite or infer_schema else ''}"
581586
f"{sql_use_logical_type}"
582587
f") "
583588
f"PURGE=TRUE ON_ERROR=?"

0 commit comments

Comments
 (0)