Skip to content

Commit 0bccf6c

Browse files
authored
Make total_rows available on RowIterator before iteration (#7622)
* Make total_rows available on RowIterator before iteration After running a query, the total number of rows is available from the call to the getQueryResults API. This commit plumbs the total rows through to the faux Table created in QueryJob.results and then on through to the RowIterator created by list_rows. * Simplify RowIterator constructor. Add test comments. Use getattr instead of protecting with hasattr in the RowIterator constructor. Add comments about intentionally conflicting values for total_rows.
1 parent d8212f2 commit 0bccf6c

File tree

6 files changed

+105
-57
lines changed

6 files changed

+105
-57
lines changed

bigquery/docs/snippets.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -1908,8 +1908,7 @@ def test_client_query_total_rows(client, capsys):
19081908
location="US",
19091909
) # API request - starts the query
19101910

1911-
results = query_job.result() # Waits for query to complete.
1912-
next(iter(results)) # Fetch the first page of results, which contains total_rows.
1911+
results = query_job.result() # Wait for query to complete.
19131912
print("Got {} rows.".format(results.total_rows))
19141913
# [END bigquery_query_total_rows]
19151914

bigquery/google/cloud/bigquery/job.py

+1
Original file line numberDiff line numberDiff line change
@@ -2835,6 +2835,7 @@ def result(self, timeout=None, retry=DEFAULT_RETRY):
28352835
schema = self._query_results.schema
28362836
dest_table_ref = self.destination
28372837
dest_table = Table(dest_table_ref, schema=schema)
2838+
dest_table._properties["numRows"] = self._query_results.total_rows
28382839
return self._client.list_rows(dest_table, retry=retry)
28392840

28402841
def to_dataframe(self, bqstorage_client=None, dtypes=None, progress_bar_type=None):

bigquery/google/cloud/bigquery/table.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -1308,13 +1308,13 @@ def __init__(
13081308
page_start=_rows_page_start,
13091309
next_token="pageToken",
13101310
)
1311-
self._schema = schema
13121311
self._field_to_index = _helpers._field_to_index_mapping(schema)
1313-
self._total_rows = None
13141312
self._page_size = page_size
1315-
self._table = table
1316-
self._selected_fields = selected_fields
13171313
self._project = client.project
1314+
self._schema = schema
1315+
self._selected_fields = selected_fields
1316+
self._table = table
1317+
self._total_rows = getattr(table, "num_rows", None)
13181318

13191319
def _get_next_page_response(self):
13201320
"""Requests the next page from the path provided.

bigquery/tests/unit/test_client.py

+15-12
Original file line numberDiff line numberDiff line change
@@ -4359,18 +4359,21 @@ def test_list_rows_empty_table(self):
43594359
client._connection = _make_connection(response, response)
43604360

43614361
# Table that has no schema because it's an empty table.
4362-
rows = tuple(
4363-
client.list_rows(
4364-
# Test with using a string for the table ID.
4365-
"{}.{}.{}".format(
4366-
self.TABLE_REF.project,
4367-
self.TABLE_REF.dataset_id,
4368-
self.TABLE_REF.table_id,
4369-
),
4370-
selected_fields=[],
4371-
)
4362+
rows = client.list_rows(
4363+
# Test with using a string for the table ID.
4364+
"{}.{}.{}".format(
4365+
self.TABLE_REF.project,
4366+
self.TABLE_REF.dataset_id,
4367+
self.TABLE_REF.table_id,
4368+
),
4369+
selected_fields=[],
43724370
)
4373-
self.assertEqual(rows, ())
4371+
4372+
# When a table reference / string and selected_fields is provided,
4373+
# total_rows can't be populated until iteration starts.
4374+
self.assertIsNone(rows.total_rows)
4375+
self.assertEqual(tuple(rows), ())
4376+
self.assertEqual(rows.total_rows, 0)
43744377

43754378
def test_list_rows_query_params(self):
43764379
from google.cloud.bigquery.table import Table, SchemaField
@@ -4573,7 +4576,7 @@ def test_list_rows_with_missing_schema(self):
45734576

45744577
conn.api_request.assert_called_once_with(method="GET", path=table_path)
45754578
conn.api_request.reset_mock()
4576-
self.assertIsNone(row_iter.total_rows, msg=repr(table))
4579+
self.assertEqual(row_iter.total_rows, 2, msg=repr(table))
45774580

45784581
rows = list(row_iter)
45794582
conn.api_request.assert_called_once_with(

bigquery/tests/unit/test_job.py

+23-2
Original file line numberDiff line numberDiff line change
@@ -4024,21 +4024,41 @@ def test_estimated_bytes_processed(self):
40244024
self.assertEqual(job.estimated_bytes_processed, est_bytes)
40254025

40264026
def test_result(self):
4027+
from google.cloud.bigquery.table import RowIterator
4028+
40274029
query_resource = {
40284030
"jobComplete": True,
40294031
"jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID},
40304032
"schema": {"fields": [{"name": "col1", "type": "STRING"}]},
4033+
"totalRows": "2",
40314034
}
4032-
connection = _make_connection(query_resource, query_resource)
4035+
tabledata_resource = {
4036+
# Explicitly set totalRows to be different from the query response.
4037+
# to test update during iteration.
4038+
"totalRows": "1",
4039+
"pageToken": None,
4040+
"rows": [{"f": [{"v": "abc"}]}],
4041+
}
4042+
connection = _make_connection(query_resource, tabledata_resource)
40334043
client = _make_client(self.PROJECT, connection=connection)
40344044
resource = self._make_resource(ended=True)
40354045
job = self._get_target_class().from_api_repr(resource, client)
40364046

40374047
result = job.result()
40384048

4039-
self.assertEqual(list(result), [])
4049+
self.assertIsInstance(result, RowIterator)
4050+
self.assertEqual(result.total_rows, 2)
4051+
4052+
rows = list(result)
4053+
self.assertEqual(len(rows), 1)
4054+
self.assertEqual(rows[0].col1, "abc")
4055+
# Test that the total_rows property has changed during iteration, based
4056+
# on the response from tabledata.list.
4057+
self.assertEqual(result.total_rows, 1)
40404058

40414059
def test_result_w_empty_schema(self):
4060+
from google.cloud.bigquery.table import _EmptyRowIterator
4061+
40424062
# Destination table may have no schema for some DDL and DML queries.
40434063
query_resource = {
40444064
"jobComplete": True,
@@ -4052,6 +4072,7 @@ def test_result_w_empty_schema(self):
40524072

40534073
result = job.result()
40544074

4075+
self.assertIsInstance(result, _EmptyRowIterator)
40554076
self.assertEqual(list(result), [])
40564077

40574078
def test_result_invokes_begins(self):

0 commit comments

Comments
 (0)