14
14
15
15
"""Cursor for the Google BigQuery DB-API."""
16
16
17
+ from __future__ import annotations
18
+
17
19
import collections
18
20
from collections import abc as collections_abc
19
- import copy
20
- import logging
21
21
import re
22
+ from typing import Optional
22
23
23
24
try :
24
25
from google .cloud .bigquery_storage import ArrowSerializationOptions
34
35
import google .cloud .exceptions # type: ignore
35
36
36
37
37
- _LOGGER = logging .getLogger (__name__ )
38
-
39
38
# Per PEP 249: A 7-item sequence containing information describing one result
40
39
# column. The first two items (name and type_code) are mandatory, the other
41
40
# five are optional and are set to None if no meaningful values can be
@@ -76,18 +75,31 @@ def __init__(self, connection):
76
75
# most appropriate size.
77
76
self .arraysize = None
78
77
self ._query_data = None
79
- self ._query_job = None
78
+ self ._query_rows = None
80
79
self ._closed = False
81
80
82
81
@property
83
- def query_job (self ):
84
- """google.cloud.bigquery.job.query.QueryJob: The query job created by
85
- the last ``execute*()`` call.
82
+ def query_job (self ) -> Optional [ job . QueryJob ] :
83
+ """google.cloud.bigquery.job.query.QueryJob | None : The query job
84
+ created by the last ``execute*()`` call, if a query job was created .
86
85
87
86
.. note::
88
87
If the last ``execute*()`` call was ``executemany()``, this is the
89
88
last job created by ``executemany()``."""
90
- return self ._query_job
89
+ rows = self ._query_rows
90
+
91
+ if rows is None :
92
+ return None
93
+
94
+ job_id = rows .job_id
95
+ project = rows .project
96
+ location = rows .location
97
+ client = self .connection ._client
98
+
99
+ if job_id is None :
100
+ return None
101
+
102
+ return client .get_job (job_id , location = location , project = project )
91
103
92
104
def close (self ):
93
105
"""Mark the cursor as closed, preventing its further use."""
@@ -117,8 +129,8 @@ def _set_description(self, schema):
117
129
for field in schema
118
130
)
119
131
120
- def _set_rowcount (self , query_results ):
121
- """Set the rowcount from query results .
132
+ def _set_rowcount (self , rows ):
133
+ """Set the rowcount from a RowIterator .
122
134
123
135
Normally, this sets rowcount to the number of rows returned by the
124
136
query, but if it was a DML statement, it sets rowcount to the number
@@ -129,10 +141,10 @@ def _set_rowcount(self, query_results):
129
141
Results of a query.
130
142
"""
131
143
total_rows = 0
132
- num_dml_affected_rows = query_results .num_dml_affected_rows
144
+ num_dml_affected_rows = rows .num_dml_affected_rows
133
145
134
- if query_results .total_rows is not None and query_results .total_rows > 0 :
135
- total_rows = query_results .total_rows
146
+ if rows .total_rows is not None and rows .total_rows > 0 :
147
+ total_rows = rows .total_rows
136
148
if num_dml_affected_rows is not None and num_dml_affected_rows > 0 :
137
149
total_rows = num_dml_affected_rows
138
150
self .rowcount = total_rows
@@ -165,9 +177,10 @@ def execute(self, operation, parameters=None, job_id=None, job_config=None):
165
177
parameters (Union[Mapping[str, Any], Sequence[Any]]):
166
178
(Optional) dictionary or sequence of parameter values.
167
179
168
- job_id (str):
169
- (Optional) The job_id to use. If not set, a job ID
170
- is generated at random.
180
+ job_id (str | None):
181
+ (Optional and discouraged) The job ID to use when creating
182
+ the query job. For best performance and reliability, manually
183
+ setting a job ID is discouraged.
171
184
172
185
job_config (google.cloud.bigquery.job.QueryJobConfig):
173
186
(Optional) Extra configuration options for the query job.
@@ -181,7 +194,7 @@ def _execute(
181
194
self , formatted_operation , parameters , job_id , job_config , parameter_types
182
195
):
183
196
self ._query_data = None
184
- self ._query_job = None
197
+ self ._query_results = None
185
198
client = self .connection ._client
186
199
187
200
# The DB-API uses the pyformat formatting, since the way BigQuery does
@@ -190,33 +203,35 @@ def _execute(
190
203
# libraries.
191
204
query_parameters = _helpers .to_query_parameters (parameters , parameter_types )
192
205
193
- if client ._default_query_job_config :
194
- if job_config :
195
- config = job_config ._fill_from_default (client ._default_query_job_config )
196
- else :
197
- config = copy .deepcopy (client ._default_query_job_config )
198
- else :
199
- config = job_config or job .QueryJobConfig (use_legacy_sql = False )
200
-
206
+ config = job_config or job .QueryJobConfig ()
201
207
config .query_parameters = query_parameters
202
- self ._query_job = client .query (
203
- formatted_operation , job_config = config , job_id = job_id
204
- )
205
208
206
- if self ._query_job .dry_run :
207
- self ._set_description (schema = None )
208
- self .rowcount = 0
209
- return
210
-
211
- # Wait for the query to finish.
209
+ # Start the query and wait for the query to finish.
212
210
try :
213
- self ._query_job .result ()
211
+ if job_id is not None :
212
+ rows = client .query (
213
+ formatted_operation ,
214
+ job_config = job_config ,
215
+ job_id = job_id ,
216
+ ).result (
217
+ page_size = self .arraysize ,
218
+ )
219
+ else :
220
+ rows = client .query_and_wait (
221
+ formatted_operation ,
222
+ job_config = config ,
223
+ page_size = self .arraysize ,
224
+ )
214
225
except google .cloud .exceptions .GoogleCloudError as exc :
215
226
raise exceptions .DatabaseError (exc )
216
227
217
- query_results = self ._query_job ._query_results
218
- self ._set_rowcount (query_results )
219
- self ._set_description (query_results .schema )
228
+ self ._query_rows = rows
229
+ self ._set_description (rows .schema )
230
+
231
+ if config .dry_run :
232
+ self .rowcount = 0
233
+ else :
234
+ self ._set_rowcount (rows )
220
235
221
236
def executemany (self , operation , seq_of_parameters ):
222
237
"""Prepare and execute a database operation multiple times.
@@ -250,25 +265,26 @@ def _try_fetch(self, size=None):
250
265
251
266
Mutates self to indicate that iteration has started.
252
267
"""
253
- if self ._query_job is None :
268
+ if self ._query_data is not None :
269
+ # Already started fetching the data.
270
+ return
271
+
272
+ rows = self ._query_rows
273
+ if rows is None :
254
274
raise exceptions .InterfaceError (
255
275
"No query results: execute() must be called before fetch."
256
276
)
257
277
258
- if self ._query_job .dry_run :
259
- self ._query_data = iter ([])
278
+ bqstorage_client = self .connection ._bqstorage_client
279
+ if rows ._should_use_bqstorage (
280
+ bqstorage_client ,
281
+ create_bqstorage_client = False ,
282
+ ):
283
+ rows_iterable = self ._bqstorage_fetch (bqstorage_client )
284
+ self ._query_data = _helpers .to_bq_table_rows (rows_iterable )
260
285
return
261
286
262
- if self ._query_data is None :
263
- bqstorage_client = self .connection ._bqstorage_client
264
-
265
- if bqstorage_client is not None :
266
- rows_iterable = self ._bqstorage_fetch (bqstorage_client )
267
- self ._query_data = _helpers .to_bq_table_rows (rows_iterable )
268
- return
269
-
270
- rows_iter = self ._query_job .result (page_size = self .arraysize )
271
- self ._query_data = iter (rows_iter )
287
+ self ._query_data = iter (rows )
272
288
273
289
def _bqstorage_fetch (self , bqstorage_client ):
274
290
"""Start fetching data with the BigQuery Storage API.
@@ -290,7 +306,7 @@ def _bqstorage_fetch(self, bqstorage_client):
290
306
# bigquery_storage can indeed be imported here without errors.
291
307
from google .cloud import bigquery_storage
292
308
293
- table_reference = self ._query_job . destination
309
+ table_reference = self ._query_rows . _table
294
310
295
311
requested_session = bigquery_storage .types .ReadSession (
296
312
table = table_reference .to_bqstorage (),
0 commit comments