Skip to content

Commit 2dd2c3f

Browse files
cpcloudjcrist
authored andcommitted
feat(clickhouse): partition kwargs for compile and execution in to_pyarrow and to_pandas
1 parent dcdb7a7 commit 2dd2c3f

File tree

2 files changed

+37
-10
lines changed

2 files changed

+37
-10
lines changed

ibis/backends/clickhouse/__init__.py

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -308,7 +308,7 @@ def to_pyarrow_batches(
308308
params: Mapping[ir.Scalar, Any] | None = None,
309309
external_tables: Mapping[str, Any] | None = None,
310310
chunk_size: int = 1_000_000,
311-
**_: Any,
311+
**kwargs: Any,
312312
) -> pa.ipc.RecordBatchReader:
313313
"""Execute expression and return an iterator of pyarrow record batches.
314314
@@ -328,6 +328,8 @@ def to_pyarrow_batches(
328328
External data
329329
chunk_size
330330
Maximum number of row to return in a single chunk
331+
kwargs
332+
Extra arguments passed directly to clickhouse-connect
331333
332334
Returns
333335
-------
@@ -357,14 +359,17 @@ def to_pyarrow_batches(
357359
external_tables = self._collect_in_memory_tables(expr, external_tables)
358360
external_data = self._normalize_external_tables(external_tables)
359361

360-
def batcher(sql: str, *, schema: pa.Schema) -> Iterator[pa.RecordBatch]:
361-
settings = {}
362+
settings = kwargs.pop("settings", {})
362363

363-
# readonly != 1 means that the server setting is writable
364-
if self.con.server_settings["max_block_size"].readonly != 1:
365-
settings["max_block_size"] = chunk_size
364+
# readonly != 1 means that the server setting is writable
365+
if self.con.server_settings["max_block_size"].readonly != 1:
366+
settings["max_block_size"] = chunk_size
367+
368+
def batcher(
369+
sql: str, *, schema: pa.Schema, settings, **kwargs
370+
) -> Iterator[pa.RecordBatch]:
366371
with self.con.query_column_block_stream(
367-
sql, external_data=external_data, settings=settings
372+
sql, external_data=external_data, settings=settings, **kwargs
368373
) as blocks:
369374
yield from map(
370375
partial(pa.RecordBatch.from_arrays, schema=schema), blocks
@@ -373,29 +378,34 @@ def batcher(sql: str, *, schema: pa.Schema) -> Iterator[pa.RecordBatch]:
373378
self._log(sql)
374379
schema = table.schema().to_pyarrow()
375380
return pa.ipc.RecordBatchReader.from_batches(
376-
schema, batcher(sql, schema=schema)
381+
schema, batcher(sql, schema=schema, settings=settings, **kwargs)
377382
)
378383

379384
def execute(
380385
self,
381386
expr: ir.Expr,
382387
limit: str | None = "default",
388+
params: Mapping[ir.Scalar, Any] | None = None,
383389
external_tables: Mapping[str, pd.DataFrame] | None = None,
384390
**kwargs: Any,
385391
) -> Any:
386392
"""Execute an expression."""
387393
import pandas as pd
388394

389395
table = expr.as_table()
390-
sql = self.compile(table, limit=limit, **kwargs)
396+
sql = self.compile(table, params=params, limit=limit)
391397

392398
schema = table.schema()
393399
self._log(sql)
394400

395401
external_tables = self._collect_in_memory_tables(expr, external_tables)
396402
external_data = self._normalize_external_tables(external_tables)
397403
df = self.con.query_df(
398-
sql, external_data=external_data, use_na_values=False, use_none=True
404+
sql,
405+
external_data=external_data,
406+
use_na_values=False,
407+
use_none=True,
408+
**kwargs,
399409
)
400410

401411
if df.empty:

ibis/backends/clickhouse/tests/test_client.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -425,3 +425,20 @@ def test_alias_column_ref(con):
425425
assert result.user_id.notnull().all()
426426
assert result.account_id.notnull().all()
427427
assert result.id_md5.notnull().all()
428+
429+
430+
@pytest.mark.parametrize("method_name", ["to_pandas", "to_pyarrow"])
431+
def test_query_cache(con, method_name):
432+
t = con.table("functional_alltypes")
433+
expr = t.count()
434+
435+
method = getattr(expr, method_name)
436+
437+
expected = method()
438+
result = method(settings={"use_query_cache": True})
439+
440+
# test a bogus setting
441+
with pytest.raises(ClickHouseDatabaseError):
442+
method(settings={"ooze_query_cash": True})
443+
444+
assert result == expected

0 commit comments

Comments
 (0)