Skip to content

Commit d73cf49

Browse files
authored
fix: updates tests based on revised hacker_news tables (#1591)
This fixes four broken tests that failed due to an unexpected change in the Google Public Dataset: Hacker News. The `comments` table was deleted and only the `full` table remained. This edit updates the name of the table in four tests and updates the names of columns in the table as well as updates the expected results for one of the tests. Fixes #1590 🦕
1 parent 41799b4 commit d73cf49

File tree

2 files changed

+25
-24
lines changed

2 files changed

+25
-24
lines changed

tests/system/test_client.py

+15-14
Original file line numberDiff line numberDiff line change
@@ -1706,8 +1706,8 @@ def test_dbapi_fetch_w_bqstorage_client_large_result_set(self):
17061706

17071707
cursor.execute(
17081708
"""
1709-
SELECT id, `by`, time_ts
1710-
FROM `bigquery-public-data.hacker_news.comments`
1709+
SELECT id, `by`, timestamp
1710+
FROM `bigquery-public-data.hacker_news.full`
17111711
ORDER BY `id` ASC
17121712
LIMIT 100000
17131713
"""
@@ -1717,27 +1717,28 @@ def test_dbapi_fetch_w_bqstorage_client_large_result_set(self):
17171717

17181718
field_name = operator.itemgetter(0)
17191719
fetched_data = [sorted(row.items(), key=field_name) for row in result_rows]
1720-
17211720
# Since DB API is not thread safe, only a single result stream should be
17221721
# requested by the BQ storage client, meaning that results should arrive
17231722
# in the sorted order.
1723+
17241724
expected_data = [
17251725
[
1726-
("by", "sama"),
1727-
("id", 15),
1728-
("time_ts", datetime.datetime(2006, 10, 9, 19, 51, 1, tzinfo=UTC)),
1726+
("by", "pg"),
1727+
("id", 1),
1728+
("timestamp", datetime.datetime(2006, 10, 9, 18, 21, 51, tzinfo=UTC)),
17291729
],
17301730
[
1731-
("by", "pg"),
1732-
("id", 17),
1733-
("time_ts", datetime.datetime(2006, 10, 9, 19, 52, 45, tzinfo=UTC)),
1731+
("by", "phyllis"),
1732+
("id", 2),
1733+
("timestamp", datetime.datetime(2006, 10, 9, 18, 30, 28, tzinfo=UTC)),
17341734
],
17351735
[
1736-
("by", "pg"),
1737-
("id", 22),
1738-
("time_ts", datetime.datetime(2006, 10, 10, 2, 18, 22, tzinfo=UTC)),
1736+
("by", "phyllis"),
1737+
("id", 3),
1738+
("timestamp", datetime.datetime(2006, 10, 9, 18, 40, 33, tzinfo=UTC)),
17391739
],
17401740
]
1741+
17411742
self.assertEqual(fetched_data, expected_data)
17421743

17431744
def test_dbapi_dry_run_query(self):
@@ -1769,8 +1770,8 @@ def test_dbapi_connection_does_not_leak_sockets(self):
17691770

17701771
cursor.execute(
17711772
"""
1772-
SELECT id, `by`, time_ts
1773-
FROM `bigquery-public-data.hacker_news.comments`
1773+
SELECT id, `by`, timestamp
1774+
FROM `bigquery-public-data.hacker_news.full`
17741775
ORDER BY `id` ASC
17751776
LIMIT 100000
17761777
"""

tests/system/test_pandas.py

+10-10
Original file line numberDiff line numberDiff line change
@@ -740,21 +740,21 @@ def test_load_table_from_dataframe_w_explicit_schema_source_format_csv_floats(
740740

741741
def test_query_results_to_dataframe(bigquery_client):
742742
QUERY = """
743-
SELECT id, author, time_ts, dead
744-
FROM `bigquery-public-data.hacker_news.comments`
743+
SELECT id, `by`, timestamp, dead
744+
FROM `bigquery-public-data.hacker_news.full`
745745
LIMIT 10
746746
"""
747747

748748
df = bigquery_client.query(QUERY).result().to_dataframe()
749749

750750
assert isinstance(df, pandas.DataFrame)
751751
assert len(df) == 10 # verify the number of rows
752-
column_names = ["id", "author", "time_ts", "dead"]
752+
column_names = ["id", "by", "timestamp", "dead"]
753753
assert list(df) == column_names # verify the column names
754754
exp_datatypes = {
755755
"id": int,
756-
"author": str,
757-
"time_ts": pandas.Timestamp,
756+
"by": str,
757+
"timestamp": pandas.Timestamp,
758758
"dead": bool,
759759
}
760760
for _, row in df.iterrows():
@@ -766,8 +766,8 @@ def test_query_results_to_dataframe(bigquery_client):
766766

767767
def test_query_results_to_dataframe_w_bqstorage(bigquery_client):
768768
query = """
769-
SELECT id, author, time_ts, dead
770-
FROM `bigquery-public-data.hacker_news.comments`
769+
SELECT id, `by`, timestamp, dead
770+
FROM `bigquery-public-data.hacker_news.full`
771771
LIMIT 10
772772
"""
773773

@@ -779,12 +779,12 @@ def test_query_results_to_dataframe_w_bqstorage(bigquery_client):
779779

780780
assert isinstance(df, pandas.DataFrame)
781781
assert len(df) == 10 # verify the number of rows
782-
column_names = ["id", "author", "time_ts", "dead"]
782+
column_names = ["id", "by", "timestamp", "dead"]
783783
assert list(df) == column_names
784784
exp_datatypes = {
785785
"id": int,
786-
"author": str,
787-
"time_ts": pandas.Timestamp,
786+
"by": str,
787+
"timestamp": pandas.Timestamp,
788788
"dead": bool,
789789
}
790790
for index, row in df.iterrows():

0 commit comments

Comments
 (0)