googleapis · shobsi · Feb 21, 2025 · Feb 20, 2025 · Feb 20, 2025 · Feb 21, 2025
@@ -463,10 +463,19 @@ def ibis_array_output_type_from_python_type(t: type) -> ibis_dtypes.DataType:
     return python_type_to_ibis_type(t)
 
 
-def ibis_type_from_type_kind(tk: bigquery.StandardSqlTypeNames) -> ibis_dtypes.DataType:
+def ibis_type_from_bigquery_type(
+    type_: bigquery.StandardSqlDataType,
+) -> ibis_dtypes.DataType:
     """Convert bq type to ibis. Only to be used for remote functions, does not handle all types."""
-    if tk not in bigframes.dtypes.RF_SUPPORTED_IO_BIGQUERY_TYPEKINDS:
+    if type_.type_kind not in bigframes.dtypes.RF_SUPPORTED_IO_BIGQUERY_TYPEKINDS:
         raise UnsupportedTypeError(
-            tk, bigframes.dtypes.RF_SUPPORTED_IO_BIGQUERY_TYPEKINDS
+            type_.type_kind, bigframes.dtypes.RF_SUPPORTED_IO_BIGQUERY_TYPEKINDS
+        )
+    elif type_.type_kind == "ARRAY":
+        return ibis_dtypes.Array(
+            value_type=ibis_type_from_bigquery_type(
+                typing.cast(bigquery.StandardSqlDataType, type_.array_element_type)
+            )
         )
-    return third_party_ibis_bqtypes.BigQueryType.to_ibis(tk)
+    else:
+        return third_party_ibis_bqtypes.BigQueryType.to_ibis(type_.type_kind)
@@ -4088,9 +4088,12 @@ def apply(self, func, *, axis=0, args: typing.Tuple = (), **kwargs):
                 )
             result_series.name = None
 
-            # if the output is an array, reconstruct it from the json serialized
-            # string form
-            if bigframes.dtypes.is_array_like(func.output_dtype):
+            # If the result type is string but the function output is intended
+            # to be an array, reconstruct the array from the string assuming it
+            # is a json serialized form of the array.
+            if bigframes.dtypes.is_string_like(
+                result_series.dtype
+            ) and bigframes.dtypes.is_array_like(func.output_dtype):
                 import bigframes.bigquery as bbq
 
                 result_dtype = bigframes.dtypes.arrow_dtype_to_bigframes_dtype(

@@ -874,4 +874,5 @@ def lcd_type_or_throw(dtype1: Dtype, dtype2: Dtype) -> Dtype:
     "INT64",
     "INTEGER",
     "STRING",
+    "ARRAY",
 }
@@ -501,6 +501,7 @@ def try_delattr(attr):
             try_delattr("bigframes_remote_function")
             try_delattr("input_dtypes")
             try_delattr("output_dtype")
+            try_delattr("bigframes_bigquery_function_output_dtype")
             try_delattr("is_row_processor")
             try_delattr("ibis_node")
 
@@ -589,6 +590,11 @@ def try_delattr(attr):
                     ibis_signature.output_type
                 )
             )
+            func.bigframes_bigquery_function_output_dtype = (
+                bigframes.core.compile.ibis_types.ibis_dtype_to_bigframes_dtype(
+                    ibis_output_type_for_bqrf
+                )
+            )
             func.is_row_processor = is_row_processor
             func.ibis_node = node
 

@@ -56,8 +56,10 @@ class ReturnTypeMissingError(ValueError):
 # TODO: Move this to compile folder
 def ibis_signature_from_routine(routine: bigquery.Routine) -> _utils.IbisSignature:
     if routine.return_type:
-        ibis_output_type = bigframes.core.compile.ibis_types.ibis_type_from_type_kind(
-            routine.return_type.type_kind
+        ibis_output_type = (
+            bigframes.core.compile.ibis_types.ibis_type_from_bigquery_type(
+                routine.return_type
+            )
         )
     else:
         raise ReturnTypeMissingError
@@ -82,8 +84,8 @@ def ibis_signature_from_routine(routine: bigquery.Routine) -> _utils.IbisSignatu
     return _utils.IbisSignature(
         parameter_names=[arg.name for arg in routine.arguments],
         input_types=[
-            bigframes.core.compile.ibis_types.ibis_type_from_type_kind(
-                arg.data_type.type_kind
+            bigframes.core.compile.ibis_types.ibis_type_from_bigquery_type(
+                arg.data_type
             )
             if arg.data_type
             else None
@@ -233,6 +235,8 @@ def func(*bigframes_args, **bigframes_kwargs):
         else ibis_signature.output_type
     )
 
+    func.bigframes_bigquery_function_output_dtype = bigframes.core.compile.ibis_types.ibis_dtype_to_bigframes_dtype(ibis_signature.output_type)  # type: ignore
+
     func.is_row_processor = is_row_processor  # type: ignore
     func.ibis_node = node  # type: ignore
     return func
@@ -15,7 +15,6 @@
 import dataclasses
 import typing
 
-from bigframes import dtypes
 from bigframes.operations import base_ops
 
 
@@ -31,17 +30,10 @@ def expensive(self) -> bool:
 
     def output_type(self, *input_types):
         # This property should be set to a valid Dtype by the @remote_function decorator or read_gbq_function method
-        if hasattr(self.func, "output_dtype"):
-            if dtypes.is_array_like(self.func.output_dtype):
-                # TODO(b/284515241): remove this special handling to support
-                # array output types once BQ remote functions support ARRAY.
-                # Until then, use json serialized strings at the remote function
-                # level, and parse that to the intended output type at the
-                # bigframes level.
-                return dtypes.STRING_DTYPE
-            return self.func.output_dtype
+        if hasattr(self.func, "bigframes_bigquery_function_output_dtype"):
+            return self.func.bigframes_bigquery_function_output_dtype
         else:
-            raise AttributeError("output_dtype not defined")
+            raise AttributeError("bigframes_bigquery_function_output_dtype not defined")
 
 
 @dataclasses.dataclass(frozen=True)
@@ -55,17 +47,10 @@ def expensive(self) -> bool:
 
     def output_type(self, *input_types):
         # This property should be set to a valid Dtype by the @remote_function decorator or read_gbq_function method
-        if hasattr(self.func, "output_dtype"):
-            if dtypes.is_array_like(self.func.output_dtype):
-                # TODO(b/284515241): remove this special handling to support
-                # array output types once BQ remote functions support ARRAY.
-                # Until then, use json serialized strings at the remote function
-                # level, and parse that to the intended output type at the
-                # bigframes level.
-                return dtypes.STRING_DTYPE
-            return self.func.output_dtype
+        if hasattr(self.func, "bigframes_bigquery_function_output_dtype"):
+            return self.func.bigframes_bigquery_function_output_dtype
         else:
-            raise AttributeError("output_dtype not defined")
+            raise AttributeError("bigframes_bigquery_function_output_dtype not defined")
 
 
 @dataclasses.dataclass(frozen=True)
@@ -79,14 +64,7 @@ def expensive(self) -> bool:
 
     def output_type(self, *input_types):
         # This property should be set to a valid Dtype by the @remote_function decorator or read_gbq_function method
-        if hasattr(self.func, "output_dtype"):
-            if dtypes.is_array_like(self.func.output_dtype):
-                # TODO(b/284515241): remove this special handling to support
-                # array output types once BQ remote functions support ARRAY.
-                # Until then, use json serialized strings at the remote function
-                # level, and parse that to the intended output type at the
-                # bigframes level.
-                return dtypes.STRING_DTYPE
-            return self.func.output_dtype
+        if hasattr(self.func, "bigframes_bigquery_function_output_dtype"):
+            return self.func.bigframes_bigquery_function_output_dtype
         else:
-            raise AttributeError("output_dtype not defined")
+            raise AttributeError("bigframes_bigquery_function_output_dtype not defined")
@@ -1545,9 +1545,12 @@ def apply(
             ops.RemoteFunctionOp(func=func, apply_on_null=True)
         )
 
-        # if the output is an array, reconstruct it from the json serialized
-        # string form
-        if bigframes.dtypes.is_array_like(func.output_dtype):
+        # If the result type is string but the function output is intended to
+        # be an array, reconstruct the array from the string assuming it is a
+        # json serialized form of the array.
+        if bigframes.dtypes.is_string_like(
+            result_series.dtype
+        ) and bigframes.dtypes.is_array_like(func.output_dtype):
             import bigframes.bigquery as bbq
 
             result_dtype = bigframes.dtypes.arrow_dtype_to_bigframes_dtype(
@@ -1585,9 +1588,12 @@ def combine(
             other, ops.BinaryRemoteFunctionOp(func=func)
         )
 
-        # if the output is an array, reconstruct it from the json serialized
-        # string form
-        if bigframes.dtypes.is_array_like(func.output_dtype):
+        # If the result type is string but the function output is intended to
+        # be an array, reconstruct the array from the string assuming it is a
+        # json serialized form of the array.
+        if bigframes.dtypes.is_string_like(
+            result_series.dtype
+        ) and bigframes.dtypes.is_array_like(func.output_dtype):
             import bigframes.bigquery as bbq
 
             result_dtype = bigframes.dtypes.arrow_dtype_to_bigframes_dtype(

@@ -251,6 +251,11 @@ def table_id_unique(dataset_id: str):
     return f"{dataset_id}.{prefixer.create_prefix()}"
 
 
+@pytest.fixture(scope="function")
+def routine_id_unique(dataset_id: str):
+    return f"{dataset_id}.{prefixer.create_prefix()}"
+
+
 @pytest.fixture(scope="session")
 def scalars_schema(bigquery_client: bigquery.Client):
     # TODO(swast): Add missing scalar data types such as BIGNUMERIC.

@@ -2193,6 +2193,10 @@ def foo(x, y, z):
                 )
             )
         )
+        assert (
+            getattr(foo, "bigframes_bigquery_function_output_dtype")
+            == bigframes.dtypes.STRING_DTYPE
+        )
 
         # Fails to apply on dataframe with incompatible number of columns
         with pytest.raises(