12
12
# See the License for the specific language governing permissions and
13
13
# limitations under the License.
14
14
15
- """Helpers for interacting with the job REST APIs from the client."""
15
+ """Helpers for interacting with the job REST APIs from the client.
16
+
17
+ For queries, there are three cases to consider:
18
+
19
+ 1. jobs.insert: This always returns a job resource.
20
+ 2. jobs.query, jobCreationMode=JOB_CREATION_REQUIRED:
21
+ This sometimes can return the results inline, but always includes a job ID.
22
+ 3. jobs.query, jobCreationMode=JOB_CREATION_OPTIONAL:
23
+ This sometimes doesn't create a job at all, instead returning the results.
24
+
25
+ Client.query() calls either (1) or (2), depending on what the user provides
26
+ for the api_method parameter. query() always returns a QueryJob object, which
27
+ can retry the query when the query job fails for a retriable reason.
28
+
29
+ Client.query_and_wait() calls (3). This returns a RowIterator that may wrap
30
+ local results from the response or may wrap a query job containing multiple
31
+ pages of results. Even though query_and_wait() waits for the job to complete,
32
+ we still need a separate job_retry object because there are different
33
+ predicates where it is safe to generate a new query ID.
34
+ """
16
35
17
36
import copy
37
+ import os
18
38
import uuid
19
39
from typing import Any , Dict , TYPE_CHECKING , Optional
20
40
21
41
import google .api_core .exceptions as core_exceptions
22
42
from google .api_core import retry as retries
23
43
24
44
from google .cloud .bigquery import job
45
+ from google .cloud .bigquery import table
25
46
26
47
# Avoid circular imports
27
48
if TYPE_CHECKING : # pragma: NO COVER
@@ -122,7 +143,12 @@ def do_query():
122
143
return future
123
144
124
145
125
- def _to_query_request (job_config : Optional [job .QueryJobConfig ]) -> Dict [str , Any ]:
146
+ def _to_query_request (
147
+ query : str ,
148
+ job_config : Optional [job .QueryJobConfig ],
149
+ location : Optional [str ],
150
+ timeout : Optional [float ],
151
+ ) -> Dict [str , Any ]:
126
152
"""Transform from Job resource to QueryRequest resource.
127
153
128
154
Most of the keys in job.configuration.query are in common with
@@ -149,6 +175,12 @@ def _to_query_request(job_config: Optional[job.QueryJobConfig]) -> Dict[str, Any
149
175
request_body .setdefault ("formatOptions" , {})
150
176
request_body ["formatOptions" ]["useInt64Timestamp" ] = True # type: ignore
151
177
178
+ if timeout is not None :
179
+ # Subtract a buffer for context switching, network latency, etc.
180
+ request_body ["timeoutMs" ] = max (0 , int (1000 * timeout ) - _TIMEOUT_BUFFER_MILLIS )
181
+ request_body ["location" ] = location
182
+ request_body ["query" ] = query
183
+
152
184
return request_body
153
185
154
186
@@ -211,6 +243,10 @@ def _to_query_job(
211
243
return query_job
212
244
213
245
246
+ def _to_query_path (project : str ) -> str :
247
+ return f"/projects/{ project } /queries"
248
+
249
+
214
250
def query_jobs_query (
215
251
client : "Client" ,
216
252
query : str ,
@@ -221,18 +257,12 @@ def query_jobs_query(
221
257
timeout : Optional [float ],
222
258
job_retry : retries .Retry ,
223
259
) -> job .QueryJob :
224
- """Initiate a query using jobs.query.
260
+ """Initiate a query using jobs.query with jobCreationMode=JOB_CREATION_REQUIRED .
225
261
226
262
See: https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query
227
263
"""
228
- path = f"/projects/{ project } /queries"
229
- request_body = _to_query_request (job_config )
230
-
231
- if timeout is not None :
232
- # Subtract a buffer for context switching, network latency, etc.
233
- request_body ["timeoutMs" ] = max (0 , int (1000 * timeout ) - _TIMEOUT_BUFFER_MILLIS )
234
- request_body ["location" ] = location
235
- request_body ["query" ] = query
264
+ path = _to_query_path (project )
265
+ request_body = _to_query_request (query , job_config , location , timeout )
236
266
237
267
def do_query ():
238
268
request_body ["requestId" ] = make_job_id ()
@@ -257,3 +287,48 @@ def do_query():
257
287
future ._job_retry = job_retry
258
288
259
289
return future
290
+
291
+
292
+ def query_and_wait (
293
+ client : "Client" ,
294
+ query : str ,
295
+ job_config : Optional [job .QueryJobConfig ],
296
+ location : Optional [str ],
297
+ project : str ,
298
+ retry : retries .Retry ,
299
+ timeout : Optional [float ],
300
+ job_retry : retries .Retry ,
301
+ ) -> table .RowIterator :
302
+ """Initiate a query using jobs.query with jobCreationMode=JOB_CREATION_REQUIRED.
303
+
304
+ See: https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query
305
+ """
306
+ path = _to_query_path (project )
307
+ request_body = _to_query_request (query , job_config , location , timeout )
308
+
309
+ if os .getenv ("QUERY_PREVIEW_ENABLED" ).casefold () == "true" :
310
+ request_body ["jobCreationMode" ] = "JOB_CREATION_OPTIONAL"
311
+
312
+ @job_retry
313
+ def do_query ():
314
+ request_body ["requestId" ] = make_job_id ()
315
+ span_attributes = {"path" : path }
316
+ return client ._call_api (
317
+ retry ,
318
+ span_name = "BigQuery.query" ,
319
+ span_attributes = span_attributes ,
320
+ method = "POST" ,
321
+ path = path ,
322
+ data = request_body ,
323
+ timeout = timeout ,
324
+ )
325
+
326
+ results_or_not = do_query ()
327
+
328
+ # The future might be in a failed state now, but if it's
329
+ # unrecoverable, we'll find out when we ask for it's result, at which
330
+ # point, we may retry.
331
+ future ._retry_do_query = do_query # in case we have to retry later
332
+ future ._job_retry = job_retry
333
+
334
+ return future
0 commit comments