feat(snowflake): regex extract, search and replace

cpcloud · cpcloud · commit 9c8217908caf · 2023-01-19T08:16:42.000-05:00
diff --git a/ibis/backends/snowflake/registry.py b/ibis/backends/snowflake/registry.py
@@ -226,6 +226,9 @@ def _map(_, op):
         ops.BitXor: reduction(sa.func.bitxor_agg),
         ops.DateFromYMD: fixed_arity(sa.func.date_from_parts, 3),
         ops.StringToTimestamp: fixed_arity(sa.func.to_timestamp_tz, 2),
+        ops.RegexExtract: fixed_arity(sa.func.regexp_substr, 3),
+        ops.RegexSearch: fixed_arity(lambda left, right: left.op('REGEXP')(right), 2),
+        ops.RegexReplace: fixed_arity(sa.func.regexp_replace, 3),
     }
 )
 
@@ -243,9 +246,6 @@ def _map(_, op):
     ops.MultiQuantile,
     # ibis.expr.operations.strings
     ops.FindInSet,
-    ops.RegexExtract,
-    ops.RegexReplace,
-    ops.RegexSearch,
     # ibis.expr.operations.structs
     ops.StructField,
     # ibis.expr.operations.temporal
diff --git a/ibis/backends/tests/test_string.py b/ibis/backends/tests/test_string.py
@@ -75,25 +75,25 @@ def test_string_col_is_unicode(alltypes, df):
             lambda t: t.string_col.re_search(r'\d+'),
             lambda t: t.string_col.str.contains(r'\d+'),
             id='re_search',
-            marks=pytest.mark.notimpl(["impala", "datafusion", "snowflake", "mssql"]),
+            marks=pytest.mark.notimpl(["impala", "datafusion", "mssql"]),
         ),
         param(
             lambda t: t.string_col.re_search(r'[[:digit:]]+'),
             lambda t: t.string_col.str.contains(r'\d+'),
             id='re_search_posix',
-            marks=pytest.mark.notimpl(["datafusion", "pyspark", "snowflake", "mssql"]),
+            marks=pytest.mark.notimpl(["datafusion", "pyspark", "mssql"]),
         ),
         param(
             lambda t: t.string_col.re_extract(r'(\d+)', 1),
             lambda t: t.string_col.str.extract(r'(\d+)', expand=False),
             id='re_extract',
-            marks=pytest.mark.notimpl(["impala", "mysql", "snowflake", "mssql"]),
+            marks=pytest.mark.notimpl(["impala", "mysql", "mssql"]),
         ),
         param(
             lambda t: t.string_col.re_extract(r'([[:digit:]]+)', 1),
             lambda t: t.string_col.str.extract(r'(\d+)', expand=False),
             id='re_extract_posix',
-            marks=pytest.mark.notimpl(["mysql", "pyspark", "snowflake", "mssql"]),
+            marks=pytest.mark.notimpl(["mysql", "pyspark", "mssql"]),
         ),
         param(
             lambda t: (t.string_col + "1").re_extract(r'\d(\d+)', 0),
@@ -105,17 +105,13 @@ def test_string_col_is_unicode(alltypes, df):
             lambda t: t.string_col.re_replace(r'[[:digit:]]+', 'a'),
             lambda t: t.string_col.str.replace(r'\d+', 'a', regex=True),
             id='re_replace_posix',
-            marks=pytest.mark.notimpl(
-                ['datafusion', "mysql", "pyspark", "snowflake", "mssql"]
-            ),
+            marks=pytest.mark.notimpl(['datafusion', "mysql", "pyspark", "mssql"]),
         ),
         param(
             lambda t: t.string_col.re_replace(r'\d+', 'a'),
             lambda t: t.string_col.str.replace(r'\d+', 'a', regex=True),
             id='re_replace',
-            marks=pytest.mark.notimpl(
-                ["impala", "datafusion", "mysql", "snowflake", "mssql"]
-            ),
+            marks=pytest.mark.notimpl(["impala", "datafusion", "mysql", "mssql"]),
         ),
         param(
             lambda t: t.string_col.repeat(2),
@@ -379,7 +375,7 @@ def test_string(backend, alltypes, df, result_func, expected_func):
     backend.assert_series_equal(result, expected)
 
 
-@pytest.mark.notimpl(["datafusion", "mysql", "snowflake", "mssql"])
+@pytest.mark.notimpl(["datafusion", "mysql", "mssql"])
 def test_re_replace_global(con):
     expr = ibis.literal("aba").re_replace("a", "c")
     result = con.execute(expr)