Skip to content

build: update google-cloud-bigquery to 3.31.0 to support JSON when allow_large_results=False #1547

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 28, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
"google-auth >=2.15.0,<3.0dev",
"google-cloud-bigtable >=2.24.0",
"google-cloud-pubsub >=2.21.4",
"google-cloud-bigquery[bqstorage,pandas] >=3.18.0",
"google-cloud-bigquery[bqstorage,pandas] >=3.31.0",
"google-cloud-functions >=1.12.0",
"google-cloud-bigquery-connection >=1.12.0",
"google-cloud-iam >=2.12.1",
Expand All @@ -51,7 +51,7 @@
"jellyfish >=0.8.9,<1.1.2",
"numpy >=1.24.0",
"pandas >=1.5.3",
"pandas-gbq >=0.26.0",
"pandas-gbq >=0.26.1",
"pyarrow >=15.0.2",
"pydata-google-auth >=1.8.2",
"requests >=2.27.1",
Expand Down
4 changes: 2 additions & 2 deletions testing/constraints-3.9.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ geopandas==0.12.2
google-auth==2.15.0
google-cloud-bigtable==2.24.0
google-cloud-pubsub==2.21.4
google-cloud-bigquery==3.18.0
google-cloud-bigquery==3.31.0
google-cloud-functions==1.12.0
google-cloud-bigquery-connection==1.12.0
google-cloud-iam==2.12.1
Expand All @@ -15,7 +15,7 @@ google-cloud-storage==2.0.0
jellyfish==0.8.9
numpy==1.24.0
pandas==1.5.3
pandas-gbq==0.26.0
pandas-gbq==0.26.1
pyarrow==15.0.2
pydata-google-auth==1.8.2
requests==2.27.1
Expand Down
66 changes: 11 additions & 55 deletions tests/system/small/bigquery/test_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,7 @@ def test_json_set_at_json_path(json_path, expected_json):
actual = bbq.json_set(s, json_path_value_pairs=[(json_path, 10)])
expected = bpd.Series(expected_json, dtype=dtypes.JSON_DTYPE)

# TODO(b/401630655): JSON is not compatible with allow_large_results=False
pd.testing.assert_series_equal(
actual.to_pandas(allow_large_results=True),
expected.to_pandas(allow_large_results=True),
)
pd.testing.assert_series_equal(actual.to_pandas(), expected.to_pandas())


@pytest.mark.parametrize(
Expand All @@ -60,11 +56,7 @@ def test_json_set_at_json_value_type(json_value, expected_json):
actual = bbq.json_set(s, json_path_value_pairs=[("$.a.b", json_value)])
expected = bpd.Series(expected_json, dtype=dtypes.JSON_DTYPE)

# TODO(b/401630655): JSON is not compatible with allow_large_results=False
pd.testing.assert_series_equal(
actual.to_pandas(allow_large_results=True),
expected.to_pandas(allow_large_results=True),
)
pd.testing.assert_series_equal(actual.to_pandas(), expected.to_pandas())


def test_json_set_w_more_pairs():
Expand All @@ -77,11 +69,7 @@ def test_json_set_w_more_pairs():
expected_json = ['{"a": 3, "b": 2}', '{"a": 4, "b": 2}', '{"a": 5, "b": 2, "c": 1}']
expected = bpd.Series(expected_json, dtype=dtypes.JSON_DTYPE)

# TODO(b/401630655): JSON is not compatible with allow_large_results=False
pd.testing.assert_series_equal(
actual.to_pandas(allow_large_results=True),
expected.to_pandas(allow_large_results=True),
)
pd.testing.assert_series_equal(actual.to_pandas(), expected.to_pandas())


def test_json_set_w_invalid_value_type():
Expand Down Expand Up @@ -114,11 +102,7 @@ def test_json_extract_from_json():
actual = bbq.json_extract(s, "$.a.b")
expected = bpd.Series(["[1, 2]", None, "0"], dtype=dtypes.JSON_DTYPE)

# TODO(b/401630655): JSON is not compatible with allow_large_results=False
pd.testing.assert_series_equal(
actual.to_pandas(allow_large_results=True),
expected.to_pandas(allow_large_results=True),
)
pd.testing.assert_series_equal(actual.to_pandas(), expected.to_pandas())


def test_json_extract_from_string():
Expand All @@ -129,11 +113,7 @@ def test_json_extract_from_string():
actual = bbq.json_extract(s, "$.a.b")
expected = bpd.Series(["[1,2]", None, "0"], dtype=pd.StringDtype(storage="pyarrow"))

# TODO(b/401630655): JSON is not compatible with allow_large_results=False
pd.testing.assert_series_equal(
actual.to_pandas(allow_large_results=True),
expected.to_pandas(allow_large_results=True),
)
pd.testing.assert_series_equal(actual.to_pandas(), expected.to_pandas())


def test_json_extract_w_invalid_series_type():
Expand Down Expand Up @@ -165,11 +145,7 @@ def test_json_extract_array_from_json():
expected.index.name = None
expected.name = None

# TODO(b/401630655): JSON is not compatible with allow_large_results=False
pd.testing.assert_series_equal(
actual.to_pandas(allow_large_results=True),
expected.to_pandas(allow_large_results=True),
)
pd.testing.assert_series_equal(actual.to_pandas(), expected.to_pandas())


def test_json_extract_array_from_json_strings():
Expand All @@ -183,11 +159,7 @@ def test_json_extract_array_from_json_strings():
dtype=pd.ArrowDtype(pa.list_(pa.string())),
)

# TODO(b/401630655): JSON is not compatible with allow_large_results=False
pd.testing.assert_series_equal(
actual.to_pandas(allow_large_results=True),
expected.to_pandas(allow_large_results=True),
)
pd.testing.assert_series_equal(actual.to_pandas(), expected.to_pandas())


def test_json_extract_array_from_json_array_strings():
Expand All @@ -201,11 +173,7 @@ def test_json_extract_array_from_json_array_strings():
dtype=pd.ArrowDtype(pa.list_(pa.string())),
)

# TODO(b/401630655): JSON is not compatible with allow_large_results=False
pd.testing.assert_series_equal(
actual.to_pandas(allow_large_results=True),
expected.to_pandas(allow_large_results=True),
)
pd.testing.assert_series_equal(actual.to_pandas(), expected.to_pandas())


def test_json_extract_array_w_invalid_series_type():
Expand All @@ -219,35 +187,23 @@ def test_json_extract_string_array_from_json_strings():
actual = bbq.json_extract_string_array(s, "$.a")
expected = bpd.Series([["ab", "2", "3 xy"], [], ["4", "5"]])

# TODO(b/401630655): JSON is not compatible with allow_large_results=False
pd.testing.assert_series_equal(
actual.to_pandas(allow_large_results=True),
expected.to_pandas(allow_large_results=True),
)
pd.testing.assert_series_equal(actual.to_pandas(), expected.to_pandas())


def test_json_extract_string_array_from_array_strings():
s = bpd.Series(["[1, 2, 3]", "[]", "[4,5]"])
actual = bbq.json_extract_string_array(s)
expected = bpd.Series([["1", "2", "3"], [], ["4", "5"]])

# TODO(b/401630655): JSON is not compatible with allow_large_results=False
pd.testing.assert_series_equal(
actual.to_pandas(allow_large_results=True),
expected.to_pandas(allow_large_results=True),
)
pd.testing.assert_series_equal(actual.to_pandas(), expected.to_pandas())


def test_json_extract_string_array_as_float_array_from_array_strings():
s = bpd.Series(["[1, 2.5, 3]", "[]", "[4,5]"])
actual = bbq.json_extract_string_array(s, value_dtype=dtypes.FLOAT_DTYPE)
expected = bpd.Series([[1, 2.5, 3], [], [4, 5]])

# TODO(b/401630655): JSON is not compatible with allow_large_results=False
pd.testing.assert_series_equal(
actual.to_pandas(allow_large_results=True),
expected.to_pandas(allow_large_results=True),
)
pd.testing.assert_series_equal(actual.to_pandas(), expected.to_pandas())


def test_json_extract_string_array_w_invalid_series_type():
Expand Down
5 changes: 1 addition & 4 deletions tests/system/small/blob/test_properties.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,7 @@ def test_blob_version(images_mm_df: bpd.DataFrame):


def test_blob_metadata(images_mm_df: bpd.DataFrame):
# allow_large_result=False incompatible with json b/401630655
with bigframes.option_context(
"bigquery.allow_large_results", True, "experiments.blob", True
):
with bigframes.option_context("experiments.blob", True):
actual = images_mm_df["blob_col"].blob.metadata().to_pandas()
expected = pd.Series(
[
Expand Down
7 changes: 0 additions & 7 deletions tests/system/small/ml/test_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,6 @@
from tests.system import utils


# Until b/401630655 is resolved, ML apis return json, not compatible with allow_large_results=False
@pytest.fixture(scope="module", autouse=True)
def always_create_table():
with bigframes.option_context("bigquery.allow_large_results", True):
yield


@pytest.mark.parametrize(
"model_name",
("text-embedding-005", "text-embedding-004", "text-multilingual-embedding-002"),
Expand Down
5 changes: 2 additions & 3 deletions tests/system/small/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -4607,13 +4607,12 @@ def test_df_drop_duplicates(scalars_df_index, scalars_pandas_df_index, keep, sub
],
)
def test_df_drop_duplicates_w_json(json_df, keep):
bf_df = json_df.drop_duplicates(keep=keep).to_pandas(allow_large_results=True)
bf_df = json_df.drop_duplicates(keep=keep).to_pandas()

# drop_duplicates relies on pa.compute.dictionary_encode, which is incompatible
# with Arrow string extension types. Temporary conversion to standard Pandas
# strings is required.
# allow_large_results=True for b/401630655
json_pandas_df = json_df.to_pandas(allow_large_results=True)
json_pandas_df = json_df.to_pandas()
json_pandas_df["json_col"] = json_pandas_df["json_col"].astype(
pd.StringDtype(storage="pyarrow")
)
Expand Down
35 changes: 16 additions & 19 deletions tests/system/small/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -322,24 +322,22 @@ def test_series_construct_local_unordered_has_sequential_index(unordered_session


def test_series_construct_w_dtype_for_json():
# Until b/401630655 is resolved, json, not compatible with allow_large_results=False
with bigframes.option_context("bigquery.allow_large_results", True):
data = [
"1",
'"str"',
"false",
'["a", {"b": 1}, null]',
None,
'{"a": {"b": [1, 2, 3], "c": true}}',
]
s = bigframes.pandas.Series(data, dtype=dtypes.JSON_DTYPE)
data = [
"1",
'"str"',
"false",
'["a", {"b": 1}, null]',
None,
'{"a": {"b": [1, 2, 3], "c": true}}',
]
s = bigframes.pandas.Series(data, dtype=dtypes.JSON_DTYPE)

assert s[0] == "1"
assert s[1] == '"str"'
assert s[2] == "false"
assert s[3] == '["a",{"b":1},null]'
assert pd.isna(s[4])
assert s[5] == '{"a":{"b":[1,2,3],"c":true}}'
assert s[0] == "1"
assert s[1] == '"str"'
assert s[2] == "false"
assert s[3] == '["a",{"b":1},null]'
assert pd.isna(s[4])
assert s[5] == '{"a":{"b":[1,2,3],"c":true}}'


def test_series_keys(scalars_dfs):
Expand Down Expand Up @@ -402,8 +400,7 @@ def test_get_column(scalars_dfs, col_name, expected_dtype):

def test_get_column_w_json(json_df, json_pandas_df):
series = json_df["json_col"]
# Until b/401630655 is resolved, json not compatible with allow_large_results=False
series_pandas = series.to_pandas(allow_large_results=True)
series_pandas = series.to_pandas()
assert series.dtype == pd.ArrowDtype(db_dtypes.JSONArrowType())
assert series_pandas.shape[0] == json_pandas_df.shape[0]

Expand Down
Loading