Skip to content

Commit 8335ba1

Browse files
authored
Fix(clickhouse)!: preserve EXTRACT(date_part FROM datetime) calls (#3729)
1 parent f4a2872 commit 8335ba1

File tree

3 files changed

+22
-6
lines changed

3 files changed

+22
-6
lines changed

sqlglot/dialects/clickhouse.py

+17-2
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,6 @@ class Parser(parser.Parser):
174174
"DATE_FORMAT": _build_date_format,
175175
"DATE_SUB": build_date_delta(exp.DateSub, default_unit=None),
176176
"DATESUB": build_date_delta(exp.DateSub, default_unit=None),
177-
"EXTRACT": exp.RegexpExtract.from_arg_list,
178177
"FORMATDATETIME": _build_date_format,
179178
"JSONEXTRACTSTRING": build_json_extract_path(
180179
exp.JSONExtractScalar, zero_based_indexing=False
@@ -347,7 +346,6 @@ class Parser(parser.Parser):
347346
"QUANTILE": lambda self: self._parse_quantile(),
348347
}
349348

350-
FUNCTION_PARSERS.pop("EXTRACT")
351349
FUNCTION_PARSERS.pop("MATCH")
352350

353351
NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy()
@@ -410,6 +408,23 @@ class Parser(parser.Parser):
410408
"INDEX",
411409
}
412410

411+
def _parse_extract(self) -> exp.Extract | exp.Anonymous:
412+
index = self._index
413+
this = self._parse_bitwise()
414+
if self._match(TokenType.FROM):
415+
self._retreat(index)
416+
return super()._parse_extract()
417+
418+
# We return Anonymous here because extract and regexpExtract have different semantics,
419+
# so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g.,
420+
# `extract('foobar', 'b')` works, but CH crashes for `regexpExtract('foobar', 'b')`.
421+
#
422+
# TODO: can we somehow convert the former into an equivalent `regexpExtract` call?
423+
self._match(TokenType.COMMA)
424+
return self.expression(
425+
exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()]
426+
)
427+
413428
def _parse_assignment(self) -> t.Optional[exp.Expression]:
414429
this = super()._parse_assignment()
415430

sqlglot/parser.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -1477,9 +1477,9 @@ def _parse_command(self) -> exp.Command:
14771477

14781478
def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]:
14791479
"""
1480-
Attemps to backtrack if a parse function that contains a try/catch internally raises an error. This behavior can
1481-
be different depending on the uset-set ErrorLevel, so _try_parse aims to solve this by setting & resetting
1482-
the parser state accordingly
1480+
Attemps to backtrack if a parse function that contains a try/catch internally raises an error.
1481+
This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to
1482+
solve this by setting & resetting the parser state accordingly
14831483
"""
14841484
index = self._index
14851485
error_level = self.error_level
@@ -5345,7 +5345,7 @@ def _parse_next_value_for(self) -> t.Optional[exp.Expression]:
53455345
order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order),
53465346
)
53475347

5348-
def _parse_extract(self) -> exp.Extract:
5348+
def _parse_extract(self) -> exp.Extract | exp.Anonymous:
53495349
this = self._parse_function() or self._parse_var_or_string(upper=True)
53505350

53515351
if self._match(TokenType.FROM):

tests/dialects/test_clickhouse.py

+1
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ def test_clickhouse(self):
2525
self.assertEqual(expr.sql(dialect="clickhouse"), "COUNT(x)")
2626
self.assertIsNone(expr._meta)
2727

28+
self.validate_identity("SELECT EXTRACT(YEAR FROM toDateTime('2023-02-01'))")
2829
self.validate_identity("extract(haystack, pattern)")
2930
self.validate_identity("SELECT * FROM x LIMIT 1 UNION ALL SELECT * FROM y")
3031
self.validate_identity("SELECT CAST(x AS Tuple(String, Array(Nullable(Float64))))")

0 commit comments

Comments
 (0)