Skip to content

Commit eaa1a52

Browse files
authored
perf: decrease the threshold in which we use the BQ Storage Read API (#1925)
* perf: decrease the threshold in which we use the BQ Storage Read API * fix unit test * update comment
1 parent 0dac714 commit eaa1a52

File tree

2 files changed

+20
-2
lines changed

2 files changed

+20
-2
lines changed

google/cloud/bigquery/table.py

+11-1
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,17 @@
108108

109109
# How many of the total rows need to be downloaded already for us to skip
110110
# calling the BQ Storage API?
111-
ALMOST_COMPLETELY_CACHED_RATIO = 0.333
111+
#
112+
# In microbenchmarks on 2024-05-21, I (tswast@) measure that at about 2 MB of
113+
# remaining results, it's faster to use the BQ Storage Read API to download
114+
# the results than use jobs.getQueryResults. Since we don't have a good way to
115+
# know the remaining bytes, we estimate by remaining number of rows.
116+
#
117+
# Except when rows themselves are larger, I observe that the a single page of
118+
# results will be around 10 MB. Therefore, the proportion of rows already
119+
# downloaded should be 10 (first page) / 12 (all results) or less for it to be
120+
# worth it to make a call to jobs.getQueryResults.
121+
ALMOST_COMPLETELY_CACHED_RATIO = 0.833333
112122

113123

114124
def _reference_getter(table):

tests/unit/test_table.py

+9-1
Original file line numberDiff line numberDiff line change
@@ -2307,9 +2307,17 @@ def test__is_almost_completely_cached_returns_true_with_some_rows_remaining(self
23072307
rows = [
23082308
{"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]},
23092309
{"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]},
2310+
{"f": [{"v": "Whillma Phlyntstone"}, {"v": "27"}]},
2311+
{"f": [{"v": "Bhetty Rhubble"}, {"v": "28"}]},
2312+
{"f": [{"v": "Pebbles Phlyntstone"}, {"v": "4"}]},
2313+
{"f": [{"v": "Bamm-Bamm Rhubble"}, {"v": "5"}]},
2314+
{"f": [{"v": "Joseph Rockhead"}, {"v": "32"}]},
2315+
{"f": [{"v": "Perry Masonry"}, {"v": "33"}]},
23102316
]
23112317
first_page = {"pageToken": "next-page", "rows": rows}
2312-
iterator = self._make_one(first_page_response=first_page, total_rows=6)
2318+
iterator = self._make_one(
2319+
first_page_response=first_page, total_rows=len(rows) + 1
2320+
)
23132321
self.assertTrue(iterator._is_almost_completely_cached())
23142322

23152323
def test__is_almost_completely_cached_returns_true_with_no_rows_remaining(self):

0 commit comments

Comments
 (0)