@@ -982,61 +982,76 @@ def unpivot(
982
982
ArrayValue: The unpivoted ArrayValue
983
983
"""
984
984
table = self ._to_ibis_expr (ordering_mode = "offset_col" )
985
- sub_expressions = []
986
-
987
- # Use ibis memtable to infer type of rowlabels (if possible)
988
- # TODO: Allow caller to specify dtype
989
- labels_ibis_type = ibis .memtable ({"col" : row_labels })["col" ].type ()
990
- labels_dtype = bigframes .dtypes .ibis_dtype_to_bigframes_dtype (labels_ibis_type )
991
-
992
985
row_n = len (row_labels )
993
986
if not all (
994
987
len (source_columns ) == row_n for _ , source_columns in unpivot_columns
995
988
):
996
989
raise ValueError ("Columns and row labels must all be same length." )
997
990
998
- for i in range (row_n ):
999
- values = []
1000
- for j in range (len (unpivot_columns )):
1001
- result_col , source_cols = unpivot_columns [j ]
1002
- col_dtype = dtype [j ] if utils .is_list_like (dtype ) else dtype
1003
- if source_cols [i ] is not None :
1004
- values .append (
1005
- ops .AsTypeOp (col_dtype )
1006
- ._as_ibis (table [source_cols [i ]])
1007
- .name (result_col )
1008
- )
1009
- else :
1010
- values .append (
1011
- bigframes .dtypes .literal_to_ibis_scalar (
1012
- None , force_dtype = col_dtype
1013
- ).name (result_col )
1014
- )
1015
- offsets_value = (
1016
- ((table [ORDER_ID_COLUMN ] * row_n ) + i )
1017
- .cast (ibis_dtypes .int64 )
1018
- .name (ORDER_ID_COLUMN ),
991
+ unpivot_offset_id = bigframes .core .guid .generate_guid ("unpivot_offsets_" )
992
+ unpivot_table = table .cross_join (
993
+ ibis .memtable ({unpivot_offset_id : range (row_n )})
994
+ )
995
+ unpivot_offsets_value = (
996
+ (
997
+ (unpivot_table [ORDER_ID_COLUMN ] * row_n )
998
+ + unpivot_table [unpivot_offset_id ]
1019
999
)
1020
- sub_expr = table .select (
1021
- passthrough_columns ,
1000
+ .cast (ibis_dtypes .int64 )
1001
+ .name (ORDER_ID_COLUMN ),
1002
+ )
1003
+
1004
+ # Use ibis memtable to infer type of rowlabels (if possible)
1005
+ # TODO: Allow caller to specify dtype
1006
+ labels_ibis_type = ibis .memtable ({"col" : row_labels })["col" ].type ()
1007
+ labels_dtype = bigframes .dtypes .ibis_dtype_to_bigframes_dtype (labels_ibis_type )
1008
+ cases = [
1009
+ (
1010
+ i ,
1022
1011
bigframes .dtypes .literal_to_ibis_scalar (
1023
- row_labels [i ], force_dtype = labels_dtype # type:ignore
1024
- ).name (index_col_id ),
1025
- * values ,
1026
- offsets_value ,
1012
+ row_labels [i ], force_dtype = labels_dtype
1013
+ ),
1027
1014
)
1028
- sub_expressions .append (sub_expr )
1029
- rotated_table = ibis .union (* sub_expressions )
1015
+ for i in range (len (row_labels ))
1016
+ ]
1017
+ labels_value = (
1018
+ typing .cast (ibis_types .IntegerColumn , unpivot_table [unpivot_offset_id ])
1019
+ .cases (cases , default = None )
1020
+ .name (index_col_id )
1021
+ )
1022
+
1023
+ unpivot_values = []
1024
+ for j in range (len (unpivot_columns )):
1025
+ col_dtype = dtype [j ] if utils .is_list_like (dtype ) else dtype
1026
+ result_col , source_cols = unpivot_columns [j ]
1027
+ null_value = bigframes .dtypes .literal_to_ibis_scalar (
1028
+ None , force_dtype = col_dtype
1029
+ )
1030
+ ibis_values = [
1031
+ ops .AsTypeOp (col_dtype )._as_ibis (unpivot_table [col ])
1032
+ if col is not None
1033
+ else null_value
1034
+ for col in source_cols
1035
+ ]
1036
+ cases = [(i , ibis_values [i ]) for i in range (len (ibis_values ))]
1037
+ unpivot_value = typing .cast (
1038
+ ibis_types .IntegerColumn , unpivot_table [unpivot_offset_id ]
1039
+ ).cases (cases , default = null_value )
1040
+ unpivot_values .append (unpivot_value .name (result_col ))
1041
+
1042
+ unpivot_table = unpivot_table .select (
1043
+ passthrough_columns , labels_value , * unpivot_values , unpivot_offsets_value
1044
+ )
1030
1045
1031
1046
value_columns = [
1032
- rotated_table [value_col_id ] for value_col_id , _ in unpivot_columns
1047
+ unpivot_table [value_col_id ] for value_col_id , _ in unpivot_columns
1033
1048
]
1034
- passthrough_values = [rotated_table [col ] for col in passthrough_columns ]
1049
+ passthrough_values = [unpivot_table [col ] for col in passthrough_columns ]
1035
1050
return ArrayValue (
1036
1051
session = self ._session ,
1037
- table = rotated_table ,
1038
- columns = [rotated_table [index_col_id ], * value_columns , * passthrough_values ],
1039
- hidden_ordering_columns = [rotated_table [ORDER_ID_COLUMN ]],
1052
+ table = unpivot_table ,
1053
+ columns = [unpivot_table [index_col_id ], * value_columns , * passthrough_values ],
1054
+ hidden_ordering_columns = [unpivot_table [ORDER_ID_COLUMN ]],
1040
1055
ordering = ExpressionOrdering (
1041
1056
ordering_value_columns = [OrderingColumnReference (ORDER_ID_COLUMN )],
1042
1057
integer_encoding = IntegerEncoding (is_encoded = True , is_sequential = True ),
0 commit comments