Skip to content

Commit 9fbad76

Browse files
Linchintswast
andauthored
feat: add default timeout for Client.get_job() (#1935)
* feat: add default timeout for Client.get_job() * change timeout type detection * lint * fix unit test and coverage * add type hint * fix type hint * change import style and add comments * remove sentinel value in client * type hint * typo * add sentinel for query_and_wait() * add unit tests * fix unit test * Update google/cloud/bigquery/job/query.py Co-authored-by: Tim Sweña (Swast) <[email protected]> * Update google/cloud/bigquery/job/query.py Co-authored-by: Tim Sweña (Swast) <[email protected]> * address comments * typo * type hint * typos --------- Co-authored-by: Tim Sweña (Swast) <[email protected]>
1 parent 94d61b0 commit 9fbad76

14 files changed

+421
-209
lines changed

google/cloud/bigquery/_helpers.py

+2
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@
3333
from google.auth import credentials as ga_credentials # type: ignore
3434
from google.api_core import client_options as client_options_lib
3535

36+
TimeoutType = Union[float, None]
37+
3638
_RFC3339_MICROS_NO_ZULU = "%Y-%m-%dT%H:%M:%S.%f"
3739
_TIMEONLY_WO_MICROS = "%H:%M:%S"
3840
_TIMEONLY_W_MICROS = "%H:%M:%S.%f"

google/cloud/bigquery/_job_helpers.py

+8-5
Original file line numberDiff line numberDiff line change
@@ -39,14 +39,15 @@
3939
import functools
4040
import os
4141
import uuid
42-
from typing import Any, Dict, TYPE_CHECKING, Optional
42+
from typing import Any, Dict, Optional, TYPE_CHECKING, Union
4343

4444
import google.api_core.exceptions as core_exceptions
4545
from google.api_core import retry as retries
4646

4747
from google.cloud.bigquery import job
4848
import google.cloud.bigquery.query
4949
from google.cloud.bigquery import table
50+
from google.cloud.bigquery.retry import POLLING_DEFAULT_VALUE
5051

5152
# Avoid circular imports
5253
if TYPE_CHECKING: # pragma: NO COVER
@@ -328,7 +329,7 @@ def query_and_wait(
328329
location: Optional[str],
329330
project: str,
330331
api_timeout: Optional[float] = None,
331-
wait_timeout: Optional[float] = None,
332+
wait_timeout: Optional[Union[float, object]] = POLLING_DEFAULT_VALUE,
332333
retry: Optional[retries.Retry],
333334
job_retry: Optional[retries.Retry],
334335
page_size: Optional[int] = None,
@@ -364,10 +365,12 @@ def query_and_wait(
364365
api_timeout (Optional[float]):
365366
The number of seconds to wait for the underlying HTTP transport
366367
before using ``retry``.
367-
wait_timeout (Optional[float]):
368+
wait_timeout (Optional[Union[float, object]]):
368369
The number of seconds to wait for the query to finish. If the
369370
query doesn't finish before this timeout, the client attempts
370-
to cancel the query.
371+
to cancel the query. If unset, the underlying Client.get_job() API
372+
call has timeout, but we still wait indefinitely for the job to
373+
finish.
371374
retry (Optional[google.api_core.retry.Retry]):
372375
How to retry the RPC. This only applies to making RPC
373376
calls. It isn't used to retry failed jobs. This has
@@ -545,7 +548,7 @@ def _supported_by_jobs_query(request_body: Dict[str, Any]) -> bool:
545548
def _wait_or_cancel(
546549
job: job.QueryJob,
547550
api_timeout: Optional[float],
548-
wait_timeout: Optional[float],
551+
wait_timeout: Optional[Union[object, float]],
549552
retry: Optional[retries.Retry],
550553
page_size: Optional[int],
551554
max_results: Optional[int],

google/cloud/bigquery/client.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@
8282
from google.cloud.bigquery._helpers import _DEFAULT_UNIVERSE
8383
from google.cloud.bigquery._helpers import _validate_universe
8484
from google.cloud.bigquery._helpers import _get_client_universe
85+
from google.cloud.bigquery._helpers import TimeoutType
8586
from google.cloud.bigquery._job_helpers import make_job_id as _make_job_id
8687
from google.cloud.bigquery.dataset import Dataset
8788
from google.cloud.bigquery.dataset import DatasetListItem
@@ -107,6 +108,7 @@
107108
DEFAULT_JOB_RETRY,
108109
DEFAULT_RETRY,
109110
DEFAULT_TIMEOUT,
111+
DEFAULT_GET_JOB_TIMEOUT,
110112
)
111113
from google.cloud.bigquery.routine import Routine
112114
from google.cloud.bigquery.routine import RoutineReference
@@ -123,7 +125,6 @@
123125
_versions_helpers.PANDAS_VERSIONS.try_import()
124126
) # mypy check fails because pandas import is outside module, there are type: ignore comments related to this
125127

126-
TimeoutType = Union[float, None]
127128
ResumableTimeoutType = Union[
128129
None, float, Tuple[float, float]
129130
] # for resumable media methods
@@ -2139,7 +2140,7 @@ def get_job(
21392140
project: Optional[str] = None,
21402141
location: Optional[str] = None,
21412142
retry: retries.Retry = DEFAULT_RETRY,
2142-
timeout: TimeoutType = DEFAULT_TIMEOUT,
2143+
timeout: TimeoutType = DEFAULT_GET_JOB_TIMEOUT,
21432144
) -> Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob, job.UnknownJob]:
21442145
"""Fetch a job for the project associated with this client.
21452146

google/cloud/bigquery/job/base.py

+12-17
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,11 @@
2626
import google.api_core.future.polling
2727

2828
from google.cloud.bigquery import _helpers
29-
from google.cloud.bigquery.retry import DEFAULT_RETRY
3029
from google.cloud.bigquery._helpers import _int_or_none
30+
from google.cloud.bigquery.retry import (
31+
DEFAULT_GET_JOB_TIMEOUT,
32+
DEFAULT_RETRY,
33+
)
3134

3235

3336
_DONE_STATE = "DONE"
@@ -801,7 +804,7 @@ def reload(
801804
self,
802805
client=None,
803806
retry: "retries.Retry" = DEFAULT_RETRY,
804-
timeout: Optional[float] = None,
807+
timeout: Optional[float] = DEFAULT_GET_JOB_TIMEOUT,
805808
):
806809
"""API call: refresh job properties via a GET request.
807810
@@ -820,22 +823,14 @@ def reload(
820823
"""
821824
client = self._require_client(client)
822825

823-
extra_params = {}
824-
if self.location:
825-
extra_params["location"] = self.location
826-
span_attributes = {"path": self.path}
827-
828-
api_response = client._call_api(
829-
retry,
830-
span_name="BigQuery.job.reload",
831-
span_attributes=span_attributes,
832-
job_ref=self,
833-
method="GET",
834-
path=self.path,
835-
query_params=extra_params,
826+
got_job = client.get_job(
827+
self,
828+
project=self.project,
829+
location=self.location,
830+
retry=retry,
836831
timeout=timeout,
837832
)
838-
self._set_properties(api_response)
833+
self._set_properties(got_job._properties)
839834

840835
def cancel(
841836
self,
@@ -913,7 +908,7 @@ def _set_future_result(self):
913908
def done(
914909
self,
915910
retry: "retries.Retry" = DEFAULT_RETRY,
916-
timeout: Optional[float] = None,
911+
timeout: Optional[float] = DEFAULT_GET_JOB_TIMEOUT,
917912
reload: bool = True,
918913
) -> bool:
919914
"""Checks if the job is complete.

google/cloud/bigquery/job/query.py

+24-10
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,11 @@
4040
StructQueryParameter,
4141
UDFResource,
4242
)
43-
from google.cloud.bigquery.retry import DEFAULT_RETRY, DEFAULT_JOB_RETRY
43+
from google.cloud.bigquery.retry import (
44+
DEFAULT_RETRY,
45+
DEFAULT_JOB_RETRY,
46+
POLLING_DEFAULT_VALUE,
47+
)
4448
from google.cloud.bigquery.routine import RoutineReference
4549
from google.cloud.bigquery.schema import SchemaField
4650
from google.cloud.bigquery.table import _EmptyRowIterator
@@ -1437,7 +1441,7 @@ def result( # type: ignore # (incompatible with supertype)
14371441
page_size: Optional[int] = None,
14381442
max_results: Optional[int] = None,
14391443
retry: Optional[retries.Retry] = DEFAULT_RETRY,
1440-
timeout: Optional[float] = None,
1444+
timeout: Optional[Union[float, object]] = POLLING_DEFAULT_VALUE,
14411445
start_index: Optional[int] = None,
14421446
job_retry: Optional[retries.Retry] = DEFAULT_JOB_RETRY,
14431447
) -> Union["RowIterator", _EmptyRowIterator]:
@@ -1457,11 +1461,14 @@ def result( # type: ignore # (incompatible with supertype)
14571461
is ``DONE``, retrying is aborted early even if the
14581462
results are not available, as this will not change
14591463
anymore.
1460-
timeout (Optional[float]):
1464+
timeout (Optional[Union[float, \
1465+
google.api_core.future.polling.PollingFuture._DEFAULT_VALUE, \
1466+
]]):
14611467
The number of seconds to wait for the underlying HTTP transport
1462-
before using ``retry``.
1463-
If multiple requests are made under the hood, ``timeout``
1464-
applies to each individual request.
1468+
before using ``retry``. If ``None``, wait indefinitely
1469+
unless an error is returned. If unset, only the
1470+
underlying API calls have their default timeouts, but we still
1471+
wait indefinitely for the job to finish.
14651472
start_index (Optional[int]):
14661473
The zero-based index of the starting row to read.
14671474
job_retry (Optional[google.api_core.retry.Retry]):
@@ -1507,6 +1514,13 @@ def result( # type: ignore # (incompatible with supertype)
15071514
# Intentionally omit job_id and query_id since this doesn't
15081515
# actually correspond to a finished query job.
15091516
)
1517+
1518+
# When timeout has default sentinel value ``object()``, do not pass
1519+
# anything to invoke default timeouts in subsequent calls.
1520+
kwargs: Dict[str, Union[_helpers.TimeoutType, object]] = {}
1521+
if type(timeout) is not object:
1522+
kwargs["timeout"] = timeout
1523+
15101524
try:
15111525
retry_do_query = getattr(self, "_retry_do_query", None)
15121526
if retry_do_query is not None:
@@ -1548,7 +1562,7 @@ def is_job_done():
15481562
# rateLimitExceeded errors are ambiguous. We want to know if
15491563
# the query job failed and not just the call to
15501564
# jobs.getQueryResults.
1551-
if self.done(retry=retry, timeout=timeout):
1565+
if self.done(retry=retry, **kwargs):
15521566
# If it's already failed, we might as well stop.
15531567
job_failed_exception = self.exception()
15541568
if job_failed_exception is not None:
@@ -1585,14 +1599,14 @@ def is_job_done():
15851599
# response from the REST API. This ensures we aren't
15861600
# making any extra API calls if the previous loop
15871601
# iteration fetched the finished job.
1588-
self._reload_query_results(retry=retry, timeout=timeout)
1602+
self._reload_query_results(retry=retry, **kwargs)
15891603
return True
15901604

15911605
# Call jobs.getQueryResults with max results set to 0 just to
15921606
# wait for the query to finish. Unlike most methods,
15931607
# jobs.getQueryResults hangs as long as it can to ensure we
15941608
# know when the query has finished as soon as possible.
1595-
self._reload_query_results(retry=retry, timeout=timeout)
1609+
self._reload_query_results(retry=retry, **kwargs)
15961610

15971611
# Even if the query is finished now according to
15981612
# jobs.getQueryResults, we'll want to reload the job status if
@@ -1682,10 +1696,10 @@ def is_job_done():
16821696
max_results=max_results,
16831697
start_index=start_index,
16841698
retry=retry,
1685-
timeout=timeout,
16861699
query_id=self.query_id,
16871700
first_page_response=first_page_response,
16881701
num_dml_affected_rows=self._query_results.num_dml_affected_rows,
1702+
**kwargs,
16891703
)
16901704
rows._preserve_order = _contains_order_by(self.query)
16911705
return rows

google/cloud/bigquery/retry.py

+11
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
from google.api_core import exceptions
1616
from google.api_core import retry
17+
import google.api_core.future.polling
1718
from google.auth import exceptions as auth_exceptions # type: ignore
1819
import requests.exceptions
1920

@@ -140,3 +141,13 @@ def _job_should_retry(exc):
140141
"""
141142
The default job retry object.
142143
"""
144+
145+
DEFAULT_GET_JOB_TIMEOUT = 128
146+
"""
147+
Default timeout for Client.get_job().
148+
"""
149+
150+
POLLING_DEFAULT_VALUE = google.api_core.future.polling.PollingFuture._DEFAULT_VALUE
151+
"""
152+
Default value defined in google.api_core.future.polling.PollingFuture.
153+
"""

0 commit comments

Comments
 (0)