@@ -1385,7 +1385,10 @@ def _begin(self, client=None, retry=DEFAULT_RETRY, timeout=None):
1385
1385
raise
1386
1386
1387
1387
def _reload_query_results (
1388
- self , retry : "retries.Retry" = DEFAULT_RETRY , timeout : Optional [float ] = None
1388
+ self ,
1389
+ retry : "retries.Retry" = DEFAULT_RETRY ,
1390
+ timeout : Optional [float ] = None ,
1391
+ page_size : int = 0 ,
1389
1392
):
1390
1393
"""Refresh the cached query results unless already cached and complete.
1391
1394
@@ -1395,6 +1398,9 @@ def _reload_query_results(
1395
1398
timeout (Optional[float]):
1396
1399
The number of seconds to wait for the underlying HTTP transport
1397
1400
before using ``retry``.
1401
+ page_size (int):
1402
+ Maximum number of rows in a single response. See maxResults in
1403
+ the jobs.getQueryResults REST API.
1398
1404
"""
1399
1405
# Optimization: avoid a call to jobs.getQueryResults if it's already
1400
1406
# been fetched, e.g. from jobs.query first page of results.
@@ -1425,7 +1431,14 @@ def _reload_query_results(
1425
1431
1426
1432
# If an explicit timeout is not given, fall back to the transport timeout
1427
1433
# stored in _blocking_poll() in the process of polling for job completion.
1428
- transport_timeout = timeout if timeout is not None else self ._transport_timeout
1434
+ if timeout is not None :
1435
+ transport_timeout = timeout
1436
+ else :
1437
+ transport_timeout = self ._transport_timeout
1438
+
1439
+ # Handle PollingJob._DEFAULT_VALUE.
1440
+ if not isinstance (transport_timeout , (float , int )):
1441
+ transport_timeout = None
1429
1442
1430
1443
self ._query_results = self ._client ._get_query_results (
1431
1444
self .job_id ,
@@ -1434,6 +1447,7 @@ def _reload_query_results(
1434
1447
timeout_ms = timeout_ms ,
1435
1448
location = self .location ,
1436
1449
timeout = transport_timeout ,
1450
+ page_size = page_size ,
1437
1451
)
1438
1452
1439
1453
def result ( # type: ignore # (incompatible with supertype)
@@ -1515,11 +1529,25 @@ def result( # type: ignore # (incompatible with supertype)
1515
1529
# actually correspond to a finished query job.
1516
1530
)
1517
1531
1532
+ # Setting max_results should be equivalent to setting page_size with
1533
+ # regards to allowing the user to tune how many results to download
1534
+ # while we wait for the query to finish. See internal issue:
1535
+ # 344008814.
1536
+ if page_size is None and max_results is not None :
1537
+ page_size = max_results
1538
+
1518
1539
# When timeout has default sentinel value ``object()``, do not pass
1519
1540
# anything to invoke default timeouts in subsequent calls.
1520
- kwargs : Dict [str , Union [_helpers .TimeoutType , object ]] = {}
1541
+ done_kwargs : Dict [str , Union [_helpers .TimeoutType , object ]] = {}
1542
+ reload_query_results_kwargs : Dict [str , Union [_helpers .TimeoutType , object ]] = {}
1543
+ list_rows_kwargs : Dict [str , Union [_helpers .TimeoutType , object ]] = {}
1521
1544
if type (timeout ) is not object :
1522
- kwargs ["timeout" ] = timeout
1545
+ done_kwargs ["timeout" ] = timeout
1546
+ list_rows_kwargs ["timeout" ] = timeout
1547
+ reload_query_results_kwargs ["timeout" ] = timeout
1548
+
1549
+ if page_size is not None :
1550
+ reload_query_results_kwargs ["page_size" ] = page_size
1523
1551
1524
1552
try :
1525
1553
retry_do_query = getattr (self , "_retry_do_query" , None )
@@ -1562,7 +1590,7 @@ def is_job_done():
1562
1590
# rateLimitExceeded errors are ambiguous. We want to know if
1563
1591
# the query job failed and not just the call to
1564
1592
# jobs.getQueryResults.
1565
- if self .done (retry = retry , ** kwargs ):
1593
+ if self .done (retry = retry , ** done_kwargs ):
1566
1594
# If it's already failed, we might as well stop.
1567
1595
job_failed_exception = self .exception ()
1568
1596
if job_failed_exception is not None :
@@ -1599,14 +1627,16 @@ def is_job_done():
1599
1627
# response from the REST API. This ensures we aren't
1600
1628
# making any extra API calls if the previous loop
1601
1629
# iteration fetched the finished job.
1602
- self ._reload_query_results (retry = retry , ** kwargs )
1630
+ self ._reload_query_results (
1631
+ retry = retry , ** reload_query_results_kwargs
1632
+ )
1603
1633
return True
1604
1634
1605
1635
# Call jobs.getQueryResults with max results set to 0 just to
1606
1636
# wait for the query to finish. Unlike most methods,
1607
1637
# jobs.getQueryResults hangs as long as it can to ensure we
1608
1638
# know when the query has finished as soon as possible.
1609
- self ._reload_query_results (retry = retry , ** kwargs )
1639
+ self ._reload_query_results (retry = retry , ** reload_query_results_kwargs )
1610
1640
1611
1641
# Even if the query is finished now according to
1612
1642
# jobs.getQueryResults, we'll want to reload the job status if
@@ -1679,8 +1709,9 @@ def is_job_done():
1679
1709
# We know that there's at least 1 row, so only treat the response from
1680
1710
# jobs.getQueryResults / jobs.query as the first page of the
1681
1711
# RowIterator response if there are any rows in it. This prevents us
1682
- # from stopping the iteration early because we're missing rows and
1683
- # there's no next page token.
1712
+ # from stopping the iteration early in the cases where we set
1713
+ # maxResults=0. In that case, we're missing rows and there's no next
1714
+ # page token.
1684
1715
first_page_response = self ._query_results ._properties
1685
1716
if "rows" not in first_page_response :
1686
1717
first_page_response = None
@@ -1699,7 +1730,7 @@ def is_job_done():
1699
1730
query_id = self .query_id ,
1700
1731
first_page_response = first_page_response ,
1701
1732
num_dml_affected_rows = self ._query_results .num_dml_affected_rows ,
1702
- ** kwargs ,
1733
+ ** list_rows_kwargs ,
1703
1734
)
1704
1735
rows ._preserve_order = _contains_order_by (self .query )
1705
1736
return rows
0 commit comments