@@ -174,7 +174,6 @@ class Parser(parser.Parser):
174
174
"DATE_FORMAT" : _build_date_format ,
175
175
"DATE_SUB" : build_date_delta (exp .DateSub , default_unit = None ),
176
176
"DATESUB" : build_date_delta (exp .DateSub , default_unit = None ),
177
- "EXTRACT" : exp .RegexpExtract .from_arg_list ,
178
177
"FORMATDATETIME" : _build_date_format ,
179
178
"JSONEXTRACTSTRING" : build_json_extract_path (
180
179
exp .JSONExtractScalar , zero_based_indexing = False
@@ -347,7 +346,6 @@ class Parser(parser.Parser):
347
346
"QUANTILE" : lambda self : self ._parse_quantile (),
348
347
}
349
348
350
- FUNCTION_PARSERS .pop ("EXTRACT" )
351
349
FUNCTION_PARSERS .pop ("MATCH" )
352
350
353
351
NO_PAREN_FUNCTION_PARSERS = parser .Parser .NO_PAREN_FUNCTION_PARSERS .copy ()
@@ -410,6 +408,23 @@ class Parser(parser.Parser):
410
408
"INDEX" ,
411
409
}
412
410
411
+ def _parse_extract (self ) -> exp .Extract | exp .Anonymous :
412
+ index = self ._index
413
+ this = self ._parse_bitwise ()
414
+ if self ._match (TokenType .FROM ):
415
+ self ._retreat (index )
416
+ return super ()._parse_extract ()
417
+
418
+ # We return Anonymous here because extract and regexpExtract have different semantics,
419
+ # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g.,
420
+ # `extract('foobar', 'b')` works, but CH crashes for `regexpExtract('foobar', 'b')`.
421
+ #
422
+ # TODO: can we somehow convert the former into an equivalent `regexpExtract` call?
423
+ self ._match (TokenType .COMMA )
424
+ return self .expression (
425
+ exp .Anonymous , this = "extract" , expressions = [this , self ._parse_bitwise ()]
426
+ )
427
+
413
428
def _parse_assignment (self ) -> t .Optional [exp .Expression ]:
414
429
this = super ()._parse_assignment ()
415
430
0 commit comments