File tree 6 files changed +44
-8
lines changed
bigframes/session/_io/bigquery
6 files changed +44
-8
lines changed Original file line number Diff line number Diff line change @@ -31,11 +31,11 @@ repos:
31
31
hooks :
32
32
- id : black
33
33
- repo : https://github.com/pycqa/flake8
34
- rev : 6 .1.0
34
+ rev : 7 .1.2
35
35
hooks :
36
36
- id : flake8
37
37
- repo : https://github.com/pre-commit/mirrors-mypy
38
- rev : v1.10 .0
38
+ rev : v1.15 .0
39
39
hooks :
40
40
- id : mypy
41
41
additional_dependencies : [types-requests, types-tabulate, pandas-stubs<=2.2.3.241126]
Original file line number Diff line number Diff line change @@ -245,6 +245,8 @@ def start_query_with_client(
245
245
location = location ,
246
246
project = project ,
247
247
api_timeout = timeout ,
248
+ page_size = page_size ,
249
+ max_results = max_results ,
248
250
)
249
251
if metrics is not None :
250
252
metrics .count_job_stats (query = sql )
Original file line number Diff line number Diff line change 29
29
import nox .sessions
30
30
31
31
BLACK_VERSION = "black==22.3.0"
32
+ FLAKE8_VERSION = "flake8==7.1.2"
32
33
ISORT_VERSION = "isort==5.12.0"
34
+ MYPY_VERSION = "mypy==1.15.0"
33
35
34
36
# TODO: switch to 3.13 once remote functions / cloud run adds a runtime for it (internal issue 333742751)
35
37
LATEST_FULLY_SUPPORTED_PYTHON = "3.12"
@@ -135,7 +137,7 @@ def lint(session):
135
137
Returns a failure if the linters find linting errors or sufficiently
136
138
serious code quality issues.
137
139
"""
138
- session .install ("flake8" , BLACK_VERSION , ISORT_VERSION )
140
+ session .install (FLAKE8_VERSION , BLACK_VERSION , ISORT_VERSION )
139
141
session .run (
140
142
"isort" ,
141
143
"--check" ,
@@ -264,7 +266,7 @@ def mypy(session):
264
266
deps = (
265
267
set (
266
268
[
267
- "mypy" ,
269
+ MYPY_VERSION ,
268
270
# TODO: update to latest pandas-stubs once we resolve bigframes issues.
269
271
"pandas-stubs<=2.2.3.241126" ,
270
272
"types-protobuf" ,
Original file line number Diff line number Diff line change @@ -75,17 +75,19 @@ def test_index_repr_large_table():
75
75
76
76
77
77
def test_to_pandas_batches_large_table ():
78
- df = bpd .read_gbq ("load_testing.scalars_1tb " )
78
+ df = bpd .read_gbq ("load_testing.scalars_100gb " )
79
79
_ , expected_column_count = df .shape
80
80
81
81
# download only a few batches, since 1tb would be too much
82
- iterable = df .to_pandas_batches (page_size = 500 , max_results = 1500 )
82
+ iterable = df .to_pandas_batches (
83
+ page_size = 500 , max_results = 1500 , allow_large_results = True
84
+ )
83
85
# use page size since client library doesn't support
84
86
# streaming only part of the dataframe via bqstorage
85
87
for pdf in iterable :
86
88
batch_row_count , batch_column_count = pdf .shape
87
89
assert batch_column_count == expected_column_count
88
- assert batch_row_count > 0
90
+ assert 0 < batch_row_count <= 500
89
91
90
92
91
93
@pytest .mark .skip (reason = "See if it caused kokoro build aborted." )
Original file line number Diff line number Diff line change 13
13
# limitations under the License.
14
14
15
15
from concurrent .futures import ThreadPoolExecutor
16
+ import time
16
17
17
18
import google
18
19
import google .api_core .exceptions
@@ -58,7 +59,11 @@ def test_bq_session_create_temp_table_clustered(bigquery_client: bigquery.Client
58
59
59
60
session_resource_manager .close ()
60
61
with pytest .raises (google .api_core .exceptions .NotFound ):
61
- bigquery_client .get_table (session_table_ref )
62
+ # It may take time for the underlying tables to get cleaned up after
63
+ # closing the session, so wait at least 1 minute to check.
64
+ for _ in range (6 ):
65
+ bigquery_client .get_table (session_table_ref )
66
+ time .sleep (10 )
62
67
63
68
64
69
def test_bq_session_create_multi_temp_tables (bigquery_client : bigquery .Client ):
Original file line number Diff line number Diff line change 35
35
36
36
import bigframes
37
37
import bigframes .dataframe
38
+ import bigframes .enums
38
39
import bigframes .features
39
40
import bigframes .pandas as bpd
40
41
@@ -288,6 +289,30 @@ def test_to_pandas_batches_w_correct_dtypes(scalars_df_default_index):
288
289
pd .testing .assert_series_equal (actual , expected )
289
290
290
291
292
+ @pytest .mark .parametrize ("allow_large_results" , (True , False ))
293
+ def test_to_pandas_batches_w_page_size_and_max_results (session , allow_large_results ):
294
+ """Verify to_pandas_batches() APIs returns the expected page size.
295
+
296
+ Regression test for b/407521010.
297
+ """
298
+ bf_df = session .read_gbq (
299
+ "bigquery-public-data.usa_names.usa_1910_2013" ,
300
+ index_col = bigframes .enums .DefaultIndexKind .NULL ,
301
+ )
302
+ expected_column_count = len (bf_df .columns )
303
+
304
+ batch_count = 0
305
+ for pd_df in bf_df .to_pandas_batches (
306
+ page_size = 42 , allow_large_results = allow_large_results , max_results = 42 * 3
307
+ ):
308
+ batch_row_count , batch_column_count = pd_df .shape
309
+ batch_count += 1
310
+ assert batch_column_count == expected_column_count
311
+ assert batch_row_count == 42
312
+
313
+ assert batch_count == 3
314
+
315
+
291
316
@pytest .mark .parametrize (
292
317
("index" ,),
293
318
[(True ,), (False ,)],
You can’t perform that action at this time.
0 commit comments