Skip to content

Commit 730df17

Browse files
authored
perf: don't fetch rows when waiting for query to finish (#400)
When there are large result sets, fetching rows while waiting for the query to finish can cause the API to hang indefinitely. (This may be due to an interaction between connection timeout and API timeout.) This reverts commit 86f6a51 (#374). Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [x] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [x] Ensure the tests and linter pass - [x] Code coverage does not decrease (if any source code was changed) - [x] Appropriate docs were updated (if necessary) Fixes googleapis/python-bigquery-pandas#343 Fixes #394 🦕
1 parent 673a9cb commit 730df17

File tree

6 files changed

+89
-80
lines changed

6 files changed

+89
-80
lines changed

google/cloud/bigquery/client.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -1534,7 +1534,7 @@ def _get_query_results(
15341534
A new ``_QueryResults`` instance.
15351535
"""
15361536

1537-
extra_params = {}
1537+
extra_params = {"maxResults": 0}
15381538

15391539
if project is None:
15401540
project = self.project
@@ -3187,7 +3187,6 @@ def _list_rows_from_query_results(
31873187
page_size=None,
31883188
retry=DEFAULT_RETRY,
31893189
timeout=None,
3190-
first_page_response=None,
31913190
):
31923191
"""List the rows of a completed query.
31933192
See
@@ -3248,7 +3247,6 @@ def _list_rows_from_query_results(
32483247
table=destination,
32493248
extra_params=params,
32503249
total_rows=total_rows,
3251-
first_page_response=first_page_response,
32523250
)
32533251
return row_iterator
32543252

google/cloud/bigquery/job/query.py

-5
Original file line numberDiff line numberDiff line change
@@ -1177,10 +1177,6 @@ def result(
11771177
if self._query_results.total_rows is None:
11781178
return _EmptyRowIterator()
11791179

1180-
first_page_response = None
1181-
if max_results is None and page_size is None and start_index is None:
1182-
first_page_response = self._query_results._properties
1183-
11841180
rows = self._client._list_rows_from_query_results(
11851181
self.job_id,
11861182
self.location,
@@ -1193,7 +1189,6 @@ def result(
11931189
start_index=start_index,
11941190
retry=retry,
11951191
timeout=timeout,
1196-
first_page_response=first_page_response,
11971192
)
11981193
rows._preserve_order = _contains_order_by(self.query)
11991194
return rows

tests/unit/job/test_query.py

+13-42
Original file line numberDiff line numberDiff line change
@@ -787,9 +787,7 @@ def test_result(self):
787787
"location": "EU",
788788
},
789789
"schema": {"fields": [{"name": "col1", "type": "STRING"}]},
790-
"totalRows": "3",
791-
"rows": [{"f": [{"v": "abc"}]}],
792-
"pageToken": "next-page",
790+
"totalRows": "2",
793791
}
794792
job_resource = self._make_resource(started=True, location="EU")
795793
job_resource_done = self._make_resource(started=True, ended=True, location="EU")
@@ -801,9 +799,9 @@ def test_result(self):
801799
query_page_resource = {
802800
# Explicitly set totalRows to be different from the initial
803801
# response to test update during iteration.
804-
"totalRows": "2",
802+
"totalRows": "1",
805803
"pageToken": None,
806-
"rows": [{"f": [{"v": "def"}]}],
804+
"rows": [{"f": [{"v": "abc"}]}],
807805
}
808806
conn = _make_connection(
809807
query_resource, query_resource_done, job_resource_done, query_page_resource
@@ -814,20 +812,19 @@ def test_result(self):
814812
result = job.result()
815813

816814
self.assertIsInstance(result, RowIterator)
817-
self.assertEqual(result.total_rows, 3)
815+
self.assertEqual(result.total_rows, 2)
818816
rows = list(result)
819-
self.assertEqual(len(rows), 2)
817+
self.assertEqual(len(rows), 1)
820818
self.assertEqual(rows[0].col1, "abc")
821-
self.assertEqual(rows[1].col1, "def")
822819
# Test that the total_rows property has changed during iteration, based
823820
# on the response from tabledata.list.
824-
self.assertEqual(result.total_rows, 2)
821+
self.assertEqual(result.total_rows, 1)
825822

826823
query_results_path = f"/projects/{self.PROJECT}/queries/{self.JOB_ID}"
827824
query_results_call = mock.call(
828825
method="GET",
829826
path=query_results_path,
830-
query_params={"location": "EU"},
827+
query_params={"maxResults": 0, "location": "EU"},
831828
timeout=None,
832829
)
833830
reload_call = mock.call(
@@ -842,7 +839,6 @@ def test_result(self):
842839
query_params={
843840
"fields": _LIST_ROWS_FROM_QUERY_RESULTS_FIELDS,
844841
"location": "EU",
845-
"pageToken": "next-page",
846842
},
847843
timeout=None,
848844
)
@@ -855,9 +851,7 @@ def test_result_with_done_job_calls_get_query_results(self):
855851
"jobComplete": True,
856852
"jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID},
857853
"schema": {"fields": [{"name": "col1", "type": "STRING"}]},
858-
"totalRows": "2",
859-
"rows": [{"f": [{"v": "abc"}]}],
860-
"pageToken": "next-page",
854+
"totalRows": "1",
861855
}
862856
job_resource = self._make_resource(started=True, ended=True, location="EU")
863857
job_resource["configuration"]["query"]["destinationTable"] = {
@@ -866,9 +860,9 @@ def test_result_with_done_job_calls_get_query_results(self):
866860
"tableId": "dest_table",
867861
}
868862
results_page_resource = {
869-
"totalRows": "2",
863+
"totalRows": "1",
870864
"pageToken": None,
871-
"rows": [{"f": [{"v": "def"}]}],
865+
"rows": [{"f": [{"v": "abc"}]}],
872866
}
873867
conn = _make_connection(query_resource_done, results_page_resource)
874868
client = _make_client(self.PROJECT, connection=conn)
@@ -877,15 +871,14 @@ def test_result_with_done_job_calls_get_query_results(self):
877871
result = job.result()
878872

879873
rows = list(result)
880-
self.assertEqual(len(rows), 2)
874+
self.assertEqual(len(rows), 1)
881875
self.assertEqual(rows[0].col1, "abc")
882-
self.assertEqual(rows[1].col1, "def")
883876

884877
query_results_path = f"/projects/{self.PROJECT}/queries/{self.JOB_ID}"
885878
query_results_call = mock.call(
886879
method="GET",
887880
path=query_results_path,
888-
query_params={"location": "EU"},
881+
query_params={"maxResults": 0, "location": "EU"},
889882
timeout=None,
890883
)
891884
query_results_page_call = mock.call(
@@ -894,7 +887,6 @@ def test_result_with_done_job_calls_get_query_results(self):
894887
query_params={
895888
"fields": _LIST_ROWS_FROM_QUERY_RESULTS_FIELDS,
896889
"location": "EU",
897-
"pageToken": "next-page",
898890
},
899891
timeout=None,
900892
)
@@ -908,12 +900,6 @@ def test_result_with_max_results(self):
908900
"jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID},
909901
"schema": {"fields": [{"name": "col1", "type": "STRING"}]},
910902
"totalRows": "5",
911-
# These rows are discarded because max_results is set.
912-
"rows": [
913-
{"f": [{"v": "xyz"}]},
914-
{"f": [{"v": "uvw"}]},
915-
{"f": [{"v": "rst"}]},
916-
],
917903
}
918904
query_page_resource = {
919905
"totalRows": "5",
@@ -939,7 +925,6 @@ def test_result_with_max_results(self):
939925
rows = list(result)
940926

941927
self.assertEqual(len(rows), 3)
942-
self.assertEqual(rows[0].col1, "abc")
943928
self.assertEqual(len(connection.api_request.call_args_list), 2)
944929
query_page_request = connection.api_request.call_args_list[1]
945930
self.assertEqual(
@@ -994,7 +979,7 @@ def test_result_w_retry(self):
994979
query_results_call = mock.call(
995980
method="GET",
996981
path=f"/projects/{self.PROJECT}/queries/{self.JOB_ID}",
997-
query_params={"location": "asia-northeast1"},
982+
query_params={"maxResults": 0, "location": "asia-northeast1"},
998983
timeout=None,
999984
)
1000985
reload_call = mock.call(
@@ -1094,12 +1079,6 @@ def test_result_w_page_size(self):
10941079
"jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID},
10951080
"schema": {"fields": [{"name": "col1", "type": "STRING"}]},
10961081
"totalRows": "4",
1097-
# These rows are discarded because page_size is set.
1098-
"rows": [
1099-
{"f": [{"v": "xyz"}]},
1100-
{"f": [{"v": "uvw"}]},
1101-
{"f": [{"v": "rst"}]},
1102-
],
11031082
}
11041083
job_resource = self._make_resource(started=True, ended=True, location="US")
11051084
q_config = job_resource["configuration"]["query"]
@@ -1130,7 +1109,6 @@ def test_result_w_page_size(self):
11301109
# Assert
11311110
actual_rows = list(result)
11321111
self.assertEqual(len(actual_rows), 4)
1133-
self.assertEqual(actual_rows[0].col1, "row1")
11341112

11351113
query_results_path = f"/projects/{self.PROJECT}/queries/{self.JOB_ID}"
11361114
query_page_1_call = mock.call(
@@ -1164,12 +1142,6 @@ def test_result_with_start_index(self):
11641142
"jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID},
11651143
"schema": {"fields": [{"name": "col1", "type": "STRING"}]},
11661144
"totalRows": "5",
1167-
# These rows are discarded because start_index is set.
1168-
"rows": [
1169-
{"f": [{"v": "xyz"}]},
1170-
{"f": [{"v": "uvw"}]},
1171-
{"f": [{"v": "rst"}]},
1172-
],
11731145
}
11741146
tabledata_resource = {
11751147
"totalRows": "5",
@@ -1196,7 +1168,6 @@ def test_result_with_start_index(self):
11961168
rows = list(result)
11971169

11981170
self.assertEqual(len(rows), 4)
1199-
self.assertEqual(rows[0].col1, "abc")
12001171
self.assertEqual(len(connection.api_request.call_args_list), 2)
12011172
tabledata_list_request = connection.api_request.call_args_list[1]
12021173
self.assertEqual(

tests/unit/job/test_query_pandas.py

+16-28
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,6 @@ def test_to_dataframe_bqstorage_preserve_order(query):
100100
]
101101
},
102102
"totalRows": "4",
103-
"pageToken": "next-page",
104103
}
105104
connection = _make_connection(get_query_results_resource, job_resource)
106105
client = _make_client(connection=connection)
@@ -135,16 +134,7 @@ def test_to_dataframe_bqstorage_preserve_order(query):
135134

136135

137136
@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`")
138-
@pytest.mark.parametrize(
139-
"method_kwargs",
140-
[
141-
{"create_bqstorage_client": False},
142-
# Since all rows are contained in the first page of results, the BigQuery
143-
# Storage API won't actually be used.
144-
{"create_bqstorage_client": True},
145-
],
146-
)
147-
def test_to_arrow(method_kwargs):
137+
def test_to_arrow():
148138
from google.cloud.bigquery.job import QueryJob as target_class
149139

150140
begun_resource = _make_job_resource(job_type="query")
@@ -172,6 +162,8 @@ def test_to_arrow(method_kwargs):
172162
},
173163
]
174164
},
165+
}
166+
tabledata_resource = {
175167
"rows": [
176168
{
177169
"f": [
@@ -185,15 +177,17 @@ def test_to_arrow(method_kwargs):
185177
{"v": {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}},
186178
]
187179
},
188-
],
180+
]
189181
}
190182
done_resource = copy.deepcopy(begun_resource)
191183
done_resource["status"] = {"state": "DONE"}
192-
connection = _make_connection(begun_resource, query_resource, done_resource)
184+
connection = _make_connection(
185+
begun_resource, query_resource, done_resource, tabledata_resource
186+
)
193187
client = _make_client(connection=connection)
194188
job = target_class.from_api_repr(begun_resource, client)
195189

196-
tbl = job.to_arrow(**method_kwargs)
190+
tbl = job.to_arrow(create_bqstorage_client=False)
197191

198192
assert isinstance(tbl, pyarrow.Table)
199193
assert tbl.num_rows == 2
@@ -375,16 +369,7 @@ def test_to_arrow_w_tqdm_wo_query_plan():
375369

376370

377371
@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
378-
@pytest.mark.parametrize(
379-
"method_kwargs",
380-
[
381-
{"create_bqstorage_client": False},
382-
# Since all rows are contained in the first page of results, the BigQuery
383-
# Storage API won't actually be used.
384-
{"create_bqstorage_client": True},
385-
],
386-
)
387-
def test_to_dataframe(method_kwargs):
372+
def test_to_dataframe():
388373
from google.cloud.bigquery.job import QueryJob as target_class
389374

390375
begun_resource = _make_job_resource(job_type="query")
@@ -398,20 +383,24 @@ def test_to_dataframe(method_kwargs):
398383
{"name": "age", "type": "INTEGER", "mode": "NULLABLE"},
399384
]
400385
},
386+
}
387+
tabledata_resource = {
401388
"rows": [
402389
{"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]},
403390
{"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]},
404391
{"f": [{"v": "Wylma Phlyntstone"}, {"v": "29"}]},
405392
{"f": [{"v": "Bhettye Rhubble"}, {"v": "27"}]},
406-
],
393+
]
407394
}
408395
done_resource = copy.deepcopy(begun_resource)
409396
done_resource["status"] = {"state": "DONE"}
410-
connection = _make_connection(begun_resource, query_resource, done_resource)
397+
connection = _make_connection(
398+
begun_resource, query_resource, done_resource, tabledata_resource
399+
)
411400
client = _make_client(connection=connection)
412401
job = target_class.from_api_repr(begun_resource, client)
413402

414-
df = job.to_dataframe(**method_kwargs)
403+
df = job.to_dataframe(create_bqstorage_client=False)
415404

416405
assert isinstance(df, pandas.DataFrame)
417406
assert len(df) == 4 # verify the number of rows
@@ -456,7 +445,6 @@ def test_to_dataframe_bqstorage():
456445
{"name": "age", "type": "INTEGER", "mode": "NULLABLE"},
457446
]
458447
},
459-
"pageToken": "next-page",
460448
}
461449
connection = _make_connection(query_resource)
462450
client = _make_client(connection=connection)

tests/unit/test_client.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -319,7 +319,7 @@ def test__get_query_results_miss_w_explicit_project_and_timeout(self):
319319
conn.api_request.assert_called_once_with(
320320
method="GET",
321321
path=path,
322-
query_params={"timeoutMs": 500, "location": self.LOCATION},
322+
query_params={"maxResults": 0, "timeoutMs": 500, "location": self.LOCATION},
323323
timeout=42,
324324
)
325325

@@ -336,7 +336,7 @@ def test__get_query_results_miss_w_client_location(self):
336336
conn.api_request.assert_called_once_with(
337337
method="GET",
338338
path="/projects/PROJECT/queries/nothere",
339-
query_params={"location": self.LOCATION},
339+
query_params={"maxResults": 0, "location": self.LOCATION},
340340
timeout=None,
341341
)
342342

0 commit comments

Comments
 (0)