@@ -144,21 +144,56 @@ def mem_expr_from_pandas(
144
144
"""
145
145
Builds an in-memory only (SQL only) expr from a pandas dataframe.
146
146
147
- Caution: If session is None, only a subset of expr functionality will be available (null Session is usually not supported).
147
+ Caution: If session is None, only a subset of expr functionality will
148
+ be available (null Session is usually not supported).
148
149
"""
149
- # must set non-null column labels. these are not the user-facing labels
150
- pd_df = pd_df . set_axis (
151
- [ column or bigframes . core . guid . generate_guid ( ) for column in pd_df .columns ],
152
- axis = "columns" ,
153
- )
150
+ # We can't include any hidden columns in the ArrayValue constructor, so
151
+ # grab the column names before we add the hidden ordering column.
152
+ column_names = [ str ( column ) for column in pd_df .columns ]
153
+ # Make sure column names are all strings.
154
+ pd_df = pd_df . set_axis ( column_names , axis = "columns" )
154
155
pd_df = pd_df .assign (** {ORDER_ID_COLUMN : range (len (pd_df ))})
156
+
155
157
# ibis memtable cannot handle NA, must convert to None
156
158
pd_df = pd_df .astype ("object" ) # type: ignore
157
159
pd_df = pd_df .where (pandas .notnull (pd_df ), None )
160
+
161
+ # NULL type isn't valid in BigQuery, so retry with an explicit schema in these cases.
158
162
keys_memtable = ibis .memtable (pd_df )
163
+ schema = keys_memtable .schema ()
164
+ new_schema = []
165
+ for column_index , column in enumerate (schema ):
166
+ if column == ORDER_ID_COLUMN :
167
+ new_type = ibis_dtypes .int64
168
+ else :
169
+ column_type = schema [column ]
170
+ # The autodetected type might not be one we can support, such
171
+ # as NULL type for empty rows, so convert to a type we do
172
+ # support.
173
+ new_type = bigframes .dtypes .bigframes_dtype_to_ibis_dtype (
174
+ bigframes .dtypes .ibis_dtype_to_bigframes_dtype (column_type )
175
+ )
176
+ # TODO(swast): Ibis memtable doesn't use backticks in struct
177
+ # field names, so spaces and other characters aren't allowed in
178
+ # the memtable context. Blocked by
179
+ # https://github.com/ibis-project/ibis/issues/7187
180
+ column = f"col_{ column_index } "
181
+ new_schema .append ((column , new_type ))
182
+
183
+ # must set non-null column labels. these are not the user-facing labels
184
+ pd_df = pd_df .set_axis (
185
+ [column for column , _ in new_schema ],
186
+ axis = "columns" ,
187
+ )
188
+ keys_memtable = ibis .memtable (pd_df , schema = ibis .schema (new_schema ))
189
+
159
190
return cls (
160
191
session , # type: ignore # Session cannot normally be none, see "caution" above
161
192
keys_memtable ,
193
+ columns = [
194
+ keys_memtable [f"col_{ column_index } " ].name (column )
195
+ for column_index , column in enumerate (column_names )
196
+ ],
162
197
ordering = ExpressionOrdering (
163
198
ordering_value_columns = [OrderingColumnReference (ORDER_ID_COLUMN )],
164
199
total_ordering_columns = frozenset ([ORDER_ID_COLUMN ]),
@@ -426,11 +461,16 @@ def shape(self) -> typing.Tuple[int, int]:
426
461
width = len (self .columns )
427
462
count_expr = self ._to_ibis_expr (ordering_mode = "unordered" ).count ()
428
463
sql = self ._session .ibis_client .compile (count_expr )
429
- row_iterator , _ = self ._session ._start_query (
430
- sql = sql ,
431
- max_results = 1 ,
432
- )
433
- length = next (row_iterator )[0 ]
464
+
465
+ # Support in-memory engines for hermetic unit tests.
466
+ if not isinstance (sql , str ):
467
+ length = self ._session .ibis_client .execute (count_expr )
468
+ else :
469
+ row_iterator , _ = self ._session ._start_query (
470
+ sql = sql ,
471
+ max_results = 1 ,
472
+ )
473
+ length = next (row_iterator )[0 ]
434
474
return (length , width )
435
475
436
476
def concat (self , other : typing .Sequence [ArrayValue ]) -> ArrayValue :
0 commit comments