Skip to content

Commit 24ff852

Browse files
culpgrantGrant Culp
authored and
Grant Culp
committed
add ability to specify vertorized_scanner for write_pandas
1 parent 53592ed commit 24ff852

File tree

2 files changed

+48
-0
lines changed

2 files changed

+48
-0
lines changed

src/snowflake/connector/pandas_tools.py

+6
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,7 @@ def write_pandas(
231231
overwrite: bool = False,
232232
table_type: Literal["", "temp", "temporary", "transient"] = "",
233233
use_logical_type: bool | None = None,
234+
use_vectorized_scanner: bool | None = None,
234235
**kwargs: Any,
235236
) -> tuple[
236237
bool,
@@ -295,6 +296,10 @@ def write_pandas(
295296
Snowflake can interpret Parquet logical types during data loading. To enable Parquet logical types,
296297
set use_logical_type as True. Set to None to use Snowflakes default. For more information, see:
297298
https://docs.snowflake.com/en/sql-reference/sql/create-file-format
299+
use_vectorized_scanner: Boolean that specifies to use a vectorized scanner for loading Parquet files.
300+
Using the vectorized scanner can significantly reduce the latency for loading Parquet files. To enable
301+
Vectorized scanning of Parquet files, set use_vectorized_scanner as True. Set to None to use Snowflakes default.
302+
For more information, see: https://docs.snowflake.com/en/sql-reference/sql/copy-into-table#label-use-vectorized-scanner
298303
299304
300305
Returns:
@@ -533,6 +538,7 @@ def drop_object(name: str, object_type: str) -> None:
533538
f"COMPRESSION={compression_map[compression]}"
534539
f"{' BINARY_AS_TEXT=FALSE' if auto_create_table or overwrite else ''}"
535540
f"{sql_use_logical_type}"
541+
f"{' USE_VECTORIZED_SCANNER=' + str(use_vectorized_scanner).upper() if use_vectorized_scanner is not None else ''}"
536542
f") "
537543
f"PURGE=TRUE ON_ERROR=?"
538544
)

test/integ/pandas/test_pandas_tools.py

+42
Original file line numberDiff line numberDiff line change
@@ -482,6 +482,47 @@ def test_write_pandas_use_logical_type(
482482
cnx.execute_string(drop_sql)
483483

484484

485+
@pytest.mark.parametrize(
486+
("use_vectorized_scanner", "expected_file_format"),
487+
[
488+
(None, "FILE_FORMAT=(TYPE=PARQUET COMPRESSION=auto)"),
489+
(True, "FILE_FORMAT=(TYPE=PARQUET COMPRESSION=auto USE_VECTORIZED_SCANNER=TRUE)"),
490+
(False, "FILE_FORMAT=(TYPE=PARQUET COMPRESSION=auto USE_VECTORIZED_SCANNER=FALSE)"),
491+
],
492+
)
493+
def test_write_pandas_use_vectorized_scanner(
494+
conn_cnx: Callable[..., Generator[SnowflakeConnection, None, None]],
495+
use_vectorized_scanner: bool | None,
496+
expected_file_format: str,
497+
):
498+
"""Test that use_vectorized_scanner is making correct arguments to the COPY INTO command in SQL."""
499+
from snowflake.connector.cursor import SnowflakeCursor
500+
501+
table_name = random_string(5, "use_vectorized_scanner")
502+
503+
with conn_cnx() as cnx:
504+
def mocked_execute(*args, **kwargs):
505+
if len(args) >= 1 and args[0].startswith("COPY INTO"):
506+
assert expected_file_format in args[0]
507+
cur = SnowflakeCursor(cnx)
508+
cur._result = iter([])
509+
return cur
510+
511+
with mock.patch(
512+
"snowflake.connector.cursor.SnowflakeCursor.execute",
513+
side_effect=mocked_execute,
514+
) as m_execute:
515+
success, nchunks, nrows, _ = write_pandas(
516+
cnx,
517+
sf_connector_version_df.get(),
518+
table_name=table_name,
519+
use_vectorized_scanner=use_vectorized_scanner,
520+
)
521+
assert m_execute.called and any(
522+
map(lambda e: "COPY INTO" in str(e[0]), m_execute.call_args_list)
523+
)
524+
525+
485526
def test_invalid_table_type_write_pandas(
486527
conn_cnx: Callable[..., Generator[SnowflakeConnection, None, None]],
487528
):
@@ -534,6 +575,7 @@ def test_table_location_building(
534575

535576
def mocked_execute(*args, **kwargs):
536577
if len(args) >= 1 and args[0].startswith("COPY INTO"):
578+
print(kwargs)
537579
assert kwargs["params"][0] == expected_location
538580
cur = SnowflakeCursor(cnx)
539581
cur._result = iter([])

0 commit comments

Comments
 (0)