12
12
# See the License for the specific language governing permissions and
13
13
# limitations under the License.
14
14
15
- """Helpers for interacting with the job REST APIs from the client."""
15
+ """Helpers for interacting with the job REST APIs from the client.
16
+
17
+ For queries, there are three cases to consider:
18
+
19
+ 1. jobs.insert: This always returns a job resource.
20
+ 2. jobs.query, jobCreationMode=JOB_CREATION_REQUIRED:
21
+ This sometimes can return the results inline, but always includes a job ID.
22
+ 3. jobs.query, jobCreationMode=JOB_CREATION_OPTIONAL:
23
+ This sometimes doesn't create a job at all, instead returning the results.
24
+ For better debugging, a query ID is included in the response (not always a
25
+ job ID).
26
+
27
+ Client.query() calls either (1) or (2), depending on what the user provides
28
+ for the api_method parameter. query() always returns a QueryJob object, which
29
+ can retry the query when the query job fails for a retriable reason.
30
+
31
+ Client.query_and_wait() calls (3). This returns a RowIterator that may wrap
32
+ local results from the response or may wrap a query job containing multiple
33
+ pages of results. Even though query_and_wait() waits for the job to complete,
34
+ we still need a separate job_retry object because there are different
35
+ predicates where it is safe to generate a new query ID.
36
+ """
16
37
17
38
import copy
39
+ import os
18
40
import uuid
19
41
from typing import Any , Dict , TYPE_CHECKING , Optional
20
42
23
45
24
46
from google .cloud .bigquery import job
25
47
import google .cloud .bigquery .query
48
+ from google .cloud .bigquery import table
26
49
27
50
# Avoid circular imports
28
51
if TYPE_CHECKING : # pragma: NO COVER
@@ -123,7 +146,12 @@ def do_query():
123
146
return future
124
147
125
148
126
- def _to_query_request (job_config : Optional [job .QueryJobConfig ]) -> Dict [str , Any ]:
149
+ def _to_query_request (
150
+ query : str ,
151
+ job_config : Optional [job .QueryJobConfig ],
152
+ location : Optional [str ],
153
+ timeout : Optional [float ],
154
+ ) -> Dict [str , Any ]:
127
155
"""Transform from Job resource to QueryRequest resource.
128
156
129
157
Most of the keys in job.configuration.query are in common with
@@ -150,6 +178,12 @@ def _to_query_request(job_config: Optional[job.QueryJobConfig]) -> Dict[str, Any
150
178
request_body .setdefault ("formatOptions" , {})
151
179
request_body ["formatOptions" ]["useInt64Timestamp" ] = True # type: ignore
152
180
181
+ if timeout is not None :
182
+ # Subtract a buffer for context switching, network latency, etc.
183
+ request_body ["timeoutMs" ] = max (0 , int (1000 * timeout ) - _TIMEOUT_BUFFER_MILLIS )
184
+ request_body ["location" ] = location
185
+ request_body ["query" ] = query
186
+
153
187
return request_body
154
188
155
189
@@ -207,6 +241,10 @@ def _to_query_job(
207
241
return query_job
208
242
209
243
244
+ def _to_query_path (project : str ) -> str :
245
+ return f"/projects/{ project } /queries"
246
+
247
+
210
248
def query_jobs_query (
211
249
client : "Client" ,
212
250
query : str ,
@@ -217,18 +255,12 @@ def query_jobs_query(
217
255
timeout : Optional [float ],
218
256
job_retry : retries .Retry ,
219
257
) -> job .QueryJob :
220
- """Initiate a query using jobs.query.
258
+ """Initiate a query using jobs.query with jobCreationMode=JOB_CREATION_REQUIRED .
221
259
222
260
See: https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query
223
261
"""
224
- path = f"/projects/{ project } /queries"
225
- request_body = _to_query_request (job_config )
226
-
227
- if timeout is not None :
228
- # Subtract a buffer for context switching, network latency, etc.
229
- request_body ["timeoutMs" ] = max (0 , int (1000 * timeout ) - _TIMEOUT_BUFFER_MILLIS )
230
- request_body ["location" ] = location
231
- request_body ["query" ] = query
262
+ path = _to_query_path (project )
263
+ request_body = _to_query_request (query , job_config , location , timeout )
232
264
233
265
def do_query ():
234
266
request_body ["requestId" ] = make_job_id ()
@@ -253,3 +285,84 @@ def do_query():
253
285
future ._job_retry = job_retry
254
286
255
287
return future
288
+
289
+
290
+ def query_and_wait (
291
+ client : "Client" ,
292
+ query : str ,
293
+ job_config : Optional [job .QueryJobConfig ],
294
+ location : Optional [str ],
295
+ project : str ,
296
+ retry : retries .Retry ,
297
+ timeout : Optional [float ],
298
+ job_retry : retries .Retry ,
299
+ ) -> table .RowIterator :
300
+ """Initiate a query using jobs.query and waits for results.
301
+
302
+ While ``jobCreationMode=JOB_CREATION_OPTIONAL`` is in preview, use the
303
+ default ``jobCreationMode`` unless the environment variable
304
+ ``QUERY_PREVIEW_ENABLED=true``. After ``jobCreationMode`` is GA, this
305
+ method will always use ``jobCreationMode=JOB_CREATION_OPTIONAL``.
306
+
307
+ See: https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query
308
+ """
309
+ path = _to_query_path (project )
310
+ request_body = _to_query_request (query , job_config , location , timeout )
311
+
312
+ if os .getenv ("QUERY_PREVIEW_ENABLED" ).casefold () == "true" :
313
+ request_body ["jobCreationMode" ] = "JOB_CREATION_OPTIONAL"
314
+
315
+ @job_retry
316
+ def do_query ():
317
+ request_body ["requestId" ] = make_job_id ()
318
+ span_attributes = {"path" : path }
319
+
320
+ response = client ._call_api (
321
+ retry ,
322
+ span_name = "BigQuery.query" ,
323
+ span_attributes = span_attributes ,
324
+ method = "POST" ,
325
+ path = path ,
326
+ data = request_body ,
327
+ timeout = timeout ,
328
+ )
329
+
330
+ # The query hasn't finished, so we expect there to be a job ID now.
331
+ # Wait until the query finishes.
332
+ if not response .get ("jobComplete" , False ):
333
+ return _to_query_job (client , query , job_config , response ).result ()
334
+
335
+ # Even if we run with JOB_CREATION_OPTIONAL, if there are more pages
336
+ # to fetch, there will be a job ID for jobs.getQueryResults.
337
+ query_results = google .cloud .bigquery .query ._QueryResults .from_api_repr (response )
338
+ job_id = query_results .job_id
339
+ location = query_results .location
340
+ rows = query_results .rows
341
+ total_rows = query_results .total_rows
342
+ more_pages = (
343
+ job_id is not None
344
+ and location is not None
345
+ and len (rows ) < total_rows
346
+ )
347
+
348
+ if more_pages :
349
+ # TODO(swast): Call client._list_rows_from_query_results directly
350
+ # after updating RowIterator to fetch destination only if needed.
351
+ return _to_query_job (client , query , job_config , response ).result ()
352
+
353
+ return table .RowIterator (
354
+ client = client ,
355
+ api_request = client ._call_api ,
356
+ path = None ,
357
+ schema = query_results .schema ,
358
+ # TODO(swast): Support max_results
359
+ max_results = None ,
360
+ total_rows = total_rows ,
361
+ first_page_response = response ,
362
+ location = location ,
363
+ job_id = job_id ,
364
+ query_id = query_results .query_id ,
365
+ project = project ,
366
+ )
367
+
368
+ return do_query ()
0 commit comments