Skip to content

Commit 0a77c0e

Browse files
authored
Release 0 8 16 (#485)
* Check for optional libraries in client methods * Log unexpected http next chunk unexpected * Log unexpected http next chunk unexpected * Updates for 0.8.16 release
1 parent 56e51bd commit 0a77c0e

File tree

6 files changed

+40
-10
lines changed

6 files changed

+40
-10
lines changed

CHANGELOG.md

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,23 @@ release (0.9.0), unrecognized arguments/keywords for these methods of creating a
2121
instead of being passed as ClickHouse server settings. This is in conjunction with some refactoring in Client construction.
2222
The supported method of passing ClickHouse server settings is to prefix such arguments/query parameters with`ch_`.
2323

24+
## 0.8.16, 2025-03-28
25+
### Bug Fixes
26+
- Don't send a setting value if the setting is already correct according to the `system.settings` table.
27+
Closes https://github.com/ClickHouse/clickhouse-connect/issues/469
28+
- Ensure that the http `user_agent` header is in ascii. Note this could lead to an incorrectly encoded `os_user` if the
29+
os_user is not an Ascii string. Closes https://github.com/ClickHouse/clickhouse-connect/issues/484
30+
- Fix "cannot access local variable" exception where the http client encounters an unexpected streaming error. Also
31+
log that unexpected streaming error to assist debugging. Closes https://github.com/ClickHouse/clickhouse-connect/issues/483
32+
- Check that arrow/pandas is installed when calling `query_df` and `query_arrow` and raise a more meaningful exception
33+
if the required library is absent. Closes https://github.com/ClickHouse/clickhouse-connect/issues/477
34+
35+
### Improvements
36+
- Some typing hints have been corrected. Thanks to [Avery Fischer](https://github.com/biggerfisch) for the PR!
37+
- The docker based tests have been fixed to work with security improvements in recent ClickHouse releases
38+
- Query string cleanup is now (in theory) microseconds faster. Thanks to [Sviatoslav Bobryshev](https://github.com/sbobryshev)
39+
for the optimization
40+
2441
## 0.8.15, 2025-01-25
2542
### Bug Fix
2643
- The async client was not shutting down its associated executor thread pool, result in a memory leak if multiple

clickhouse_connect/__version__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
version = '0.8.15'
1+
version = '0.8.16'

clickhouse_connect/common.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,9 @@ def build_client_name(client_name: str):
4141
os_user = f'; os_user:{getpass.getuser()}'
4242
except Exception: # pylint: disable=broad-except
4343
pass
44-
return (f'{client_name}{product_name}clickhouse-connect/{version()}' +
44+
full_name = (f'{client_name}{product_name}clickhouse-connect/{version()}' +
4545
f' (lv:py/{py_version}; mode:sync; os:{sys.platform}{os_user})')
46+
return full_name.encode('ascii', 'ignore').decode()
4647

4748

4849
def get_setting(name: str):

clickhouse_connect/driver/client.py

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
from clickhouse_connect.driver.exceptions import ProgrammingError, OperationalError
2020
from clickhouse_connect.driver.external import ExternalData
2121
from clickhouse_connect.driver.insert import InsertContext
22+
from clickhouse_connect.driver.options import check_arrow, check_pandas, check_numpy
2223
from clickhouse_connect.driver.summary import QuerySummary
2324
from clickhouse_connect.driver.models import ColumnDef, SettingDef, SettingStatus
2425
from clickhouse_connect.driver.query import QueryResult, to_arrow, to_arrow_batches, QueryContext, arrow_buffer
@@ -68,7 +69,7 @@ def __init__(self,
6869
self.uri = uri
6970
self._init_common_settings(apply_server_timezone)
7071

71-
def _init_common_settings(self, apply_server_timezone:Optional[Union[str, bool]] ):
72+
def _init_common_settings(self, apply_server_timezone: Optional[Union[str, bool]]):
7273
self.server_tz, dst_safe = pytz.UTC, True
7374
self.server_version, server_tz = \
7475
tuple(self.command('SELECT version(), timezone()', use_database=False))
@@ -122,14 +123,16 @@ def _validate_settings(self, settings: Optional[Dict[str, Any]]) -> Dict[str, st
122123
return validated
123124

124125
def _validate_setting(self, key: str, value: Any, invalid_action: str) -> Optional[str]:
125-
new_value = str(value)
126+
str_value = str(value)
126127
if value is True:
127-
new_value = '1'
128+
str_value = '1'
128129
elif value is False:
129-
new_value = '0'
130+
str_value = '0'
130131
if key not in self.valid_transport_settings:
131132
setting_def = self.server_settings.get(key)
132-
if setting_def is None or (setting_def.readonly and setting_def.value != new_value):
133+
if setting_def and setting_def.value == str_value:
134+
return None # don't send settings that are already the expected value
135+
if setting_def is None or setting_def.readonly:
133136
if key in self.optional_transport_settings:
134137
return None
135138
if invalid_action == 'send':
@@ -139,7 +142,7 @@ def _validate_setting(self, key: str, value: Any, invalid_action: str) -> Option
139142
return None
140143
else:
141144
raise ProgrammingError(f'Setting {key} is unknown or readonly') from None
142-
return new_value
145+
return str_value
143146

144147
def _setting_status(self, key: str) -> SettingStatus:
145148
comp_setting = self.server_settings.get(key)
@@ -342,6 +345,7 @@ def query_np(self,
342345
create_query_context method
343346
:return: Numpy array representing the result set
344347
"""
348+
check_numpy()
345349
return self._context_query(locals(), use_numpy=True).np_result
346350

347351
# pylint: disable=duplicate-code,too-many-arguments,unused-argument
@@ -361,6 +365,7 @@ def query_np_stream(self,
361365
create_query_context method
362366
:return: Generator that yield a numpy array per block representing the result set
363367
"""
368+
check_numpy()
364369
return self._context_query(locals(), use_numpy=True, streaming=True).np_stream
365370

366371
# pylint: disable=duplicate-code,unused-argument
@@ -384,6 +389,7 @@ def query_df(self,
384389
create_query_context method
385390
:return: Pandas dataframe representing the result set
386391
"""
392+
check_pandas()
387393
return self._context_query(locals(), use_numpy=True, as_pandas=True).df_result
388394

389395
# pylint: disable=duplicate-code,unused-argument
@@ -407,6 +413,7 @@ def query_df_stream(self,
407413
create_query_context method
408414
:return: Generator that yields a Pandas dataframe per block representing the result set
409415
"""
416+
check_pandas()
410417
return self._context_query(locals(), use_numpy=True,
411418
as_pandas=True,
412419
streaming=True).df_stream
@@ -519,6 +526,7 @@ def query_arrow(self,
519526
:param external_data ClickHouse "external data" to send with query
520527
:return: PyArrow.Table
521528
"""
529+
check_arrow()
522530
settings = self._update_arrow_settings(settings, use_strings)
523531
return to_arrow(self.raw_query(query,
524532
parameters,
@@ -541,6 +549,7 @@ def query_arrow_stream(self,
541549
:param external_data ClickHouse "external data" to send with query
542550
:return: Generator that yields a PyArrow.Table for per block representing the result set
543551
"""
552+
check_arrow()
544553
settings = self._update_arrow_settings(settings, use_strings)
545554
return to_arrow_batches(self.raw_stream(query,
546555
parameters,
@@ -661,6 +670,7 @@ def insert_df(self, table: str = None,
661670
different data batches
662671
:return: QuerySummary with summary information, throws exception if insert fails
663672
"""
673+
check_pandas()
664674
if context is None:
665675
if column_names is None:
666676
column_names = df.columns
@@ -686,6 +696,7 @@ def insert_arrow(self, table: str,
686696
:param settings: Optional dictionary of ClickHouse settings (key/string values)
687697
:return: QuerySummary with summary information, throws exception if insert fails
688698
"""
699+
check_arrow()
689700
full_table = table if '.' in table or not database else f'{database}.{table}'
690701
compression = self.write_compression if self.write_compression in ('zstd', 'lz4') else None
691702
column_names, insert_block = arrow_buffer(arrow_table, compression)

clickhouse_connect/driver/httpclient.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -527,7 +527,7 @@ def ping(self):
527527
See BaseClient doc_string for this method
528528
"""
529529
try:
530-
response = self.http.request('GET', f'{self.url}/ping', timeout=3)
530+
response = self.http.request('GET', f'{self.url}/ping', timeout=3, preload_content=True)
531531
return 200 <= response.status < 300
532532
except HTTPError:
533533
logger.debug('ping failed', exc_info=True)

clickhouse_connect/driver/httputil.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -228,12 +228,13 @@ def buffered():
228228
read_gen = response.stream(chunk_size, decompress is None)
229229
while True:
230230
while not done:
231+
chunk = None
231232
try:
232233
chunk = next(read_gen, None) # Always try to read at least one chunk if there are any left
233234
except Exception: # pylint: disable=broad-except
234235
# By swallowing an unexpected exception reading the stream, we will let consumers decide how to
235236
# handle the unexpected end of stream
236-
pass
237+
logger.warning('unexpected failure to read next chunk', exc_info=True)
237238
if not chunk:
238239
done = True
239240
break

0 commit comments

Comments
 (0)