Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add roundingmode enum, wiring, and tests #2121

Merged
merged 8 commits into from
Jan 31, 2025
42 changes: 41 additions & 1 deletion google/cloud/bigquery/enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,11 @@ class KeyResultStatementKind:


class StandardSqlTypeNames(str, enum.Enum):
"""Enum of allowed SQL type names in schema.SchemaField.

Datatype used in GoogleSQL.
"""

def _generate_next_value_(name, start, count, last_values):
return name

Expand All @@ -267,6 +272,9 @@ def _generate_next_value_(name, start, count, last_values):
ARRAY = enum.auto()
STRUCT = enum.auto()
RANGE = enum.auto()
# NOTE: FOREIGN acts as a wrapper for data types
# not natively understood by BigQuery unless translated
FOREIGN = enum.auto()


class EntityTypes(str, enum.Enum):
Expand All @@ -285,7 +293,10 @@ class EntityTypes(str, enum.Enum):
# See also: https://cloud.google.com/bigquery/data-types#legacy_sql_data_types
# and https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types
class SqlTypeNames(str, enum.Enum):
"""Enum of allowed SQL type names in schema.SchemaField."""
"""Enum of allowed SQL type names in schema.SchemaField.

Datatype used in Legacy SQL.
"""

STRING = "STRING"
BYTES = "BYTES"
Expand All @@ -306,6 +317,9 @@ class SqlTypeNames(str, enum.Enum):
DATETIME = "DATETIME"
INTERVAL = "INTERVAL" # NOTE: not available in legacy types
RANGE = "RANGE" # NOTE: not available in legacy types
# NOTE: FOREIGN acts as a wrapper for data types
# not natively understood by BigQuery unless translated
FOREIGN = "FOREIGN"


class WriteDisposition(object):
Expand Down Expand Up @@ -344,3 +358,29 @@ class DeterminismLevel:

NOT_DETERMINISTIC = "NOT_DETERMINISTIC"
"""The UDF is not deterministic."""


class RoundingMode(enum.Enum):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe if we do a class RoundingMode(str, enum.Enum): with enum.auto() values as above in StandardSqlTypeNames, this will give strings compatible with the BQ REST API (assuming these work like other enums in that regard).

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

"""Rounding mode options that can be used when storing NUMERIC or BIGNUMERIC
values.

ROUNDING_MODE_UNSPECIFIED: will default to using ROUND_HALF_AWAY_FROM_ZERO.

ROUND_HALF_AWAY_FROM_ZERO: rounds half values away from zero when applying
precision and scale upon writing of NUMERIC and BIGNUMERIC values.
For Scale: 0
* 1.1, 1.2, 1.3, 1.4 => 1
* 1.5, 1.6, 1.7, 1.8, 1.9 => 2

ROUND_HALF_EVEN: rounds half values to the nearest even value when applying
precision and scale upon writing of NUMERIC and BIGNUMERIC values.
For Scale: 0
* 1.1, 1.2, 1.3, 1.4 => 1
* 1.5 => 2
* 1.6, 1.7, 1.8, 1.9 => 2
* 2.5 => 2
"""

ROUNDING_MODE_UNSPECIFIED = 0
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm curious, is this what the REST API uses? In past, the BQ enums mapped to strings, even if they start life in proto as integers.

Per https://protobuf.dev/programming-guides/json/, supposedly both should be accepted, but I think we should use whatever the JSON API returns so customers' equality checks function.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The REST API is as follows:

"roundingMode": {
          "description": "Optional. Specifies the rounding mode to be used when storing values of NUMERIC and BIGNUMERIC type.",
          "enum": [
            "ROUNDING_MODE_UNSPECIFIED",
            "ROUND_HALF_AWAY_FROM_ZERO",
            "ROUND_HALF_EVEN"
          ],
          "enumDescriptions": [
            "Unspecified will default to using ROUND_HALF_AWAY_FROM_ZERO.",
            "ROUND_HALF_AWAY_FROM_ZERO rounds half values away from zero when applying precision and scale upon writing of NUMERIC and BIGNUMERIC values. For Scale: 0 1.1, 1.2, 1.3, 1.4 =\u003e 1 1.5, 1.6, 1.7, 1.8, 1.9 =\u003e 2",
            "ROUND_HALF_EVEN rounds half values to the nearest even value when applying precision and scale upon writing of NUMERIC and BIGNUMERIC values. For Scale: 0 1.1, 1.2, 1.3, 1.4 =\u003e 1 1.5 =\u003e 2 1.6, 1.7, 1.8, 1.9 =\u003e 2 2.5 =\u003e 2"
          ],
          "type": "string"

ROUND_HALF_AWAY_FROM_ZERO = 1
ROUND_HALF_EVEN = 2
70 changes: 68 additions & 2 deletions google/cloud/bigquery/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,15 @@

from google.cloud.bigquery import _helpers
from google.cloud.bigquery import standard_sql
from google.cloud.bigquery import enums
from google.cloud.bigquery.enums import StandardSqlTypeNames


_STRUCT_TYPES = ("RECORD", "STRUCT")

# SQL types reference:
# https://cloud.google.com/bigquery/data-types#legacy_sql_data_types
# https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types
# LEGACY SQL: https://cloud.google.com/bigquery/data-types#legacy_sql_data_types
# GoogleSQL: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types
LEGACY_TO_STANDARD_TYPES = {
"STRING": StandardSqlTypeNames.STRING,
"BYTES": StandardSqlTypeNames.BYTES,
Expand All @@ -48,6 +49,7 @@
"DATE": StandardSqlTypeNames.DATE,
"TIME": StandardSqlTypeNames.TIME,
"DATETIME": StandardSqlTypeNames.DATETIME,
"FOREIGN": StandardSqlTypeNames.FOREIGN,
# no direct conversion from ARRAY, the latter is represented by mode="REPEATED"
}
"""String names of the legacy SQL types to integer codes of Standard SQL standard_sql."""
Expand Down Expand Up @@ -166,6 +168,35 @@ class SchemaField(object):
the type is RANGE, this field is required. Possible values for the
field element type of a RANGE include `DATE`, `DATETIME` and
`TIMESTAMP`.

rounding_mode: Union[enums.RoundingMode, str, None]
Specifies the rounding mode to be used when storing values of
NUMERIC and BIGNUMERIC type.

Unspecified will default to using ROUND_HALF_AWAY_FROM_ZERO.
ROUND_HALF_AWAY_FROM_ZERO rounds half values away from zero
when applying precision and scale upon writing of NUMERIC and BIGNUMERIC
values.

For Scale: 0
1.1, 1.2, 1.3, 1.4 => 1
1.5, 1.6, 1.7, 1.8, 1.9 => 2

ROUND_HALF_EVEN rounds half values to the nearest even value
when applying precision and scale upon writing of NUMERIC and BIGNUMERIC
values.

For Scale: 0
1.1, 1.2, 1.3, 1.4 => 1
1.5 => 2
1.6, 1.7, 1.8, 1.9 => 2
2.5 => 2

foreign_type_definition: Optional[str]
Definition of the foreign data type.

Only valid for top-level schema fields (not nested fields).
If the type is FOREIGN, this field is required.
"""

def __init__(
Expand All @@ -181,11 +212,14 @@ def __init__(
scale: Union[int, _DefaultSentinel] = _DEFAULT_VALUE,
max_length: Union[int, _DefaultSentinel] = _DEFAULT_VALUE,
range_element_type: Union[FieldElementType, str, None] = None,
rounding_mode: Union[enums.RoundingMode, str, None] = None,
foreign_type_definition: Optional[str] = None,
):
self._properties: Dict[str, Any] = {
"name": name,
"type": field_type,
}
self._properties["name"] = name
if mode is not None:
self._properties["mode"] = mode.upper()
if description is not _DEFAULT_VALUE:
Expand All @@ -206,6 +240,22 @@ def __init__(
self._properties["rangeElementType"] = {"type": range_element_type}
if isinstance(range_element_type, FieldElementType):
self._properties["rangeElementType"] = range_element_type.to_api_repr()
if rounding_mode is not None:
if isinstance(rounding_mode, enums.RoundingMode):
rounding_mode = rounding_mode.name
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think if we do the RoundingMode(str, enum.Enum) trick, this logic should be unnecessary.

Would be good to double check in some unit tests that json.dumps(field.to_api_repr()) works when passing in one of these enums if we do remove this if statement.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

With the changes to the Rounding_Mode enum and our existing test suite, we do have a test that ensures given an enum representation, we get the correct JSON output when calling to_api_repr(). Specifically, see test_to_api_repr().

self._properties["roundingMode"] = rounding_mode
if isinstance(foreign_type_definition, str):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm curious, why not is not None here?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

self._properties["foreignTypeDefinition"] = foreign_type_definition

# The order of operations is important:
# If field_type is FOREIGN, then foreign_type_definition must be set.
if field_type != "FOREIGN":
self._properties["type"] = field_type
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks redundant with line 187/220 above ("type": field_type,) I don't think this is necessary.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

else:
if self._properties.get("foreignTypeDefinition") is None:
raise ValueError(
"If the 'field_type' is 'FOREIGN', then 'foreign_type_definition' is required."
)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm wary of putting client-side validation like this, since theoretically the server could choose to lift this restriction in future (imagine some foreign schema autodetect support or something).

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

if fields: # Don't set the property if it's not set.
self._properties["fields"] = [field.to_api_repr() for field in fields]

Expand Down Expand Up @@ -304,6 +354,22 @@ def range_element_type(self):
ret = self._properties.get("rangeElementType")
return FieldElementType.from_api_repr(ret)

@property
def rounding_mode(self):
"""Enum that specifies the rounding mode to be used when storing values of
NUMERIC and BIGNUMERIC type.
"""
return self._properties.get("roundingMode")

@property
def foreign_type_definition(self):
"""Definition of the foreign data type.

Only valid for top-level schema fields (not nested fields).
If the type is FOREIGN, this field is required.
"""
return self._properties.get("foreignTypeDefinition")

@property
def fields(self):
"""Optional[tuple]: Subfields contained in this field.
Expand Down
71 changes: 70 additions & 1 deletion tests/unit/test_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import pytest

from google.cloud import bigquery
from google.cloud.bigquery import enums
from google.cloud.bigquery.standard_sql import StandardSqlStructType
from google.cloud.bigquery import schema
from google.cloud.bigquery.schema import PolicyTagList
Expand Down Expand Up @@ -49,9 +50,12 @@ def test_constructor_defaults(self):
self.assertEqual(field.fields, ())
self.assertIsNone(field.policy_tags)
self.assertIsNone(field.default_value_expression)
self.assertEqual(field.rounding_mode, None)
self.assertEqual(field.foreign_type_definition, None)

def test_constructor_explicit(self):
FIELD_DEFAULT_VALUE_EXPRESSION = "This is the default value for this field"
ROUNDINGMODE = enums.RoundingMode.ROUNDING_MODE_UNSPECIFIED
field = self._make_one(
"test",
"STRING",
Expand All @@ -64,6 +68,8 @@ def test_constructor_explicit(self):
)
),
default_value_expression=FIELD_DEFAULT_VALUE_EXPRESSION,
rounding_mode=ROUNDINGMODE,
foreign_type_definition="INTEGER",
)
self.assertEqual(field.name, "test")
self.assertEqual(field.field_type, "STRING")
Expand All @@ -80,6 +86,8 @@ def test_constructor_explicit(self):
)
),
)
self.assertEqual(field.rounding_mode, ROUNDINGMODE.name)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: The string "ROUNDING_MODE_UNSPECIFIED" would be easier to convince myself that the code is correct. With .name, this is a bit of a "change detector test", since we've duplicated that logic from the implementation.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

self.assertEqual(field.foreign_type_definition, "INTEGER")

def test_constructor_explicit_none(self):
field = self._make_one("test", "STRING", description=None, policy_tags=None)
Expand Down Expand Up @@ -137,8 +145,16 @@ def test_to_api_repr(self):
{"names": ["foo", "bar"]},
)

ROUNDINGMODE = enums.RoundingMode.ROUNDING_MODE_UNSPECIFIED

field = self._make_one(
"foo", "INTEGER", "NULLABLE", description="hello world", policy_tags=policy
"foo",
"INTEGER",
"NULLABLE",
description="hello world",
policy_tags=policy,
rounding_mode=ROUNDINGMODE,
foreign_type_definition=None,
)
self.assertEqual(
field.to_api_repr(),
Expand All @@ -148,6 +164,7 @@ def test_to_api_repr(self):
"type": "INTEGER",
"description": "hello world",
"policyTags": {"names": ["foo", "bar"]},
"roundingMode": "ROUNDING_MODE_UNSPECIFIED",
},
)

Expand Down Expand Up @@ -181,6 +198,7 @@ def test_from_api_repr(self):
"description": "test_description",
"name": "foo",
"type": "record",
"roundingMode": "ROUNDING_MODE_UNSPECIFIED",
}
)
self.assertEqual(field.name, "foo")
Expand All @@ -192,6 +210,7 @@ def test_from_api_repr(self):
self.assertEqual(field.fields[0].field_type, "INTEGER")
self.assertEqual(field.fields[0].mode, "NULLABLE")
self.assertEqual(field.range_element_type, None)
self.assertEqual(field.rounding_mode, "ROUNDING_MODE_UNSPECIFIED")

def test_from_api_repr_policy(self):
field = self._get_target_class().from_api_repr(
Expand Down Expand Up @@ -283,6 +302,30 @@ def test_fields_property(self):
schema_field = self._make_one("boat", "RECORD", fields=fields)
self.assertEqual(schema_field.fields, fields)

def test_roundingmode_property_str(self):
# via init
ROUNDINGMODE = "ROUND_HALF_AWAY_FROM_ZERO"
schema_field = self._make_one("test", "STRING", rounding_mode=ROUNDINGMODE)
self.assertEqual(schema_field.rounding_mode, ROUNDINGMODE)

# via _properties
del schema_field
schema_field = self._make_one("test", "STRING")
schema_field._properties["roundingMode"] = ROUNDINGMODE
self.assertEqual(schema_field.rounding_mode, ROUNDINGMODE)

def test_foreign_type_definition_property_str(self):
FOREIGN_TYPE_DEFINITION = "INTEGER"
schema_field = self._make_one(
"test", "STRING", foreign_type_definition=FOREIGN_TYPE_DEFINITION
)
self.assertEqual(schema_field.foreign_type_definition, FOREIGN_TYPE_DEFINITION)

del schema_field
schema_field = self._make_one("test", "STRING")
schema_field._properties["foreignTypeDefinition"] = FOREIGN_TYPE_DEFINITION
self.assertEqual(schema_field.foreign_type_definition, FOREIGN_TYPE_DEFINITION)

def test_to_standard_sql_simple_type(self):
examples = (
# a few legacy types
Expand Down Expand Up @@ -457,6 +500,32 @@ def test_to_standard_sql_unknown_type(self):
bigquery.StandardSqlTypeNames.TYPE_KIND_UNSPECIFIED,
)

def test_to_standard_sql_foreign_type_valid(self):
legacy_type = "FOREIGN"
standard_type = bigquery.StandardSqlTypeNames.FOREIGN
foreign_type_definition = "INTEGER"

field = self._make_one(
"some_field",
field_type=legacy_type,
foreign_type_definition=foreign_type_definition,
)
standard_field = field.to_standard_sql()
self.assertEqual(standard_field.name, "some_field")
self.assertEqual(standard_field.type.type_kind, standard_type)

def test_to_standard_sql_foreign_type_invalid(self):
legacy_type = "FOREIGN"
foreign_type_definition = None

with self.assertRaises(ValueError) as context:
self._make_one(
"some_field",
field_type=legacy_type,
foreign_type_definition=foreign_type_definition,
)
self.assertTrue("If the 'field_type'" in context.exception.args[0])

def test___eq___wrong_type(self):
field = self._make_one("test", "STRING")
other = object()
Expand Down