Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add roundingmode enum, wiring, and tests #2121

Merged
merged 8 commits into from
Jan 31, 2025
45 changes: 44 additions & 1 deletion google/cloud/bigquery/enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,11 @@ class KeyResultStatementKind:


class StandardSqlTypeNames(str, enum.Enum):
"""Enum of allowed SQL type names in schema.SchemaField.

Datatype used in GoogleSQL.
"""

def _generate_next_value_(name, start, count, last_values):
return name

Expand All @@ -267,6 +272,9 @@ def _generate_next_value_(name, start, count, last_values):
ARRAY = enum.auto()
STRUCT = enum.auto()
RANGE = enum.auto()
# NOTE: FOREIGN acts as a wrapper for data types
# not natively understood by BigQuery unless translated
FOREIGN = enum.auto()


class EntityTypes(str, enum.Enum):
Expand All @@ -285,7 +293,10 @@ class EntityTypes(str, enum.Enum):
# See also: https://cloud.google.com/bigquery/data-types#legacy_sql_data_types
# and https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types
class SqlTypeNames(str, enum.Enum):
"""Enum of allowed SQL type names in schema.SchemaField."""
"""Enum of allowed SQL type names in schema.SchemaField.

Datatype used in Legacy SQL.
"""

STRING = "STRING"
BYTES = "BYTES"
Expand All @@ -306,6 +317,9 @@ class SqlTypeNames(str, enum.Enum):
DATETIME = "DATETIME"
INTERVAL = "INTERVAL" # NOTE: not available in legacy types
RANGE = "RANGE" # NOTE: not available in legacy types
# NOTE: FOREIGN acts as a wrapper for data types
# not natively understood by BigQuery unless translated
FOREIGN = "FOREIGN"


class WriteDisposition(object):
Expand Down Expand Up @@ -344,3 +358,32 @@ class DeterminismLevel:

NOT_DETERMINISTIC = "NOT_DETERMINISTIC"
"""The UDF is not deterministic."""


class RoundingMode(str, enum.Enum):
"""Rounding mode options that can be used when storing NUMERIC or BIGNUMERIC
values.

ROUNDING_MODE_UNSPECIFIED: will default to using ROUND_HALF_AWAY_FROM_ZERO.

ROUND_HALF_AWAY_FROM_ZERO: rounds half values away from zero when applying
precision and scale upon writing of NUMERIC and BIGNUMERIC values.
For Scale: 0
* 1.1, 1.2, 1.3, 1.4 => 1
* 1.5, 1.6, 1.7, 1.8, 1.9 => 2

ROUND_HALF_EVEN: rounds half values to the nearest even value when applying
precision and scale upon writing of NUMERIC and BIGNUMERIC values.
For Scale: 0
* 1.1, 1.2, 1.3, 1.4 => 1
* 1.5 => 2
* 1.6, 1.7, 1.8, 1.9 => 2
* 2.5 => 2
"""

def _generate_next_value_(name, start, count, last_values):
return name

ROUNDING_MODE_UNSPECIFIED = enum.auto()
ROUND_HALF_AWAY_FROM_ZERO = enum.auto()
ROUND_HALF_EVEN = enum.auto()
59 changes: 57 additions & 2 deletions google/cloud/bigquery/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,15 @@

from google.cloud.bigquery import _helpers
from google.cloud.bigquery import standard_sql
from google.cloud.bigquery import enums
from google.cloud.bigquery.enums import StandardSqlTypeNames


_STRUCT_TYPES = ("RECORD", "STRUCT")

# SQL types reference:
# https://cloud.google.com/bigquery/data-types#legacy_sql_data_types
# https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types
# LEGACY SQL: https://cloud.google.com/bigquery/data-types#legacy_sql_data_types
# GoogleSQL: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types
LEGACY_TO_STANDARD_TYPES = {
"STRING": StandardSqlTypeNames.STRING,
"BYTES": StandardSqlTypeNames.BYTES,
Expand All @@ -48,6 +49,7 @@
"DATE": StandardSqlTypeNames.DATE,
"TIME": StandardSqlTypeNames.TIME,
"DATETIME": StandardSqlTypeNames.DATETIME,
"FOREIGN": StandardSqlTypeNames.FOREIGN,
# no direct conversion from ARRAY, the latter is represented by mode="REPEATED"
}
"""String names of the legacy SQL types to integer codes of Standard SQL standard_sql."""
Expand Down Expand Up @@ -166,6 +168,35 @@ class SchemaField(object):
the type is RANGE, this field is required. Possible values for the
field element type of a RANGE include `DATE`, `DATETIME` and
`TIMESTAMP`.

rounding_mode: Union[enums.RoundingMode, str, None]
Specifies the rounding mode to be used when storing values of
NUMERIC and BIGNUMERIC type.

Unspecified will default to using ROUND_HALF_AWAY_FROM_ZERO.
ROUND_HALF_AWAY_FROM_ZERO rounds half values away from zero
when applying precision and scale upon writing of NUMERIC and BIGNUMERIC
values.

For Scale: 0
1.1, 1.2, 1.3, 1.4 => 1
1.5, 1.6, 1.7, 1.8, 1.9 => 2

ROUND_HALF_EVEN rounds half values to the nearest even value
when applying precision and scale upon writing of NUMERIC and BIGNUMERIC
values.

For Scale: 0
1.1, 1.2, 1.3, 1.4 => 1
1.5 => 2
1.6, 1.7, 1.8, 1.9 => 2
2.5 => 2

foreign_type_definition: Optional[str]
Definition of the foreign data type.

Only valid for top-level schema fields (not nested fields).
If the type is FOREIGN, this field is required.
"""

def __init__(
Expand All @@ -181,11 +212,14 @@ def __init__(
scale: Union[int, _DefaultSentinel] = _DEFAULT_VALUE,
max_length: Union[int, _DefaultSentinel] = _DEFAULT_VALUE,
range_element_type: Union[FieldElementType, str, None] = None,
rounding_mode: Union[enums.RoundingMode, str, None] = None,
foreign_type_definition: Optional[str] = None,
):
self._properties: Dict[str, Any] = {
"name": name,
"type": field_type,
}
self._properties["name"] = name
if mode is not None:
self._properties["mode"] = mode.upper()
if description is not _DEFAULT_VALUE:
Expand All @@ -206,6 +240,11 @@ def __init__(
self._properties["rangeElementType"] = {"type": range_element_type}
if isinstance(range_element_type, FieldElementType):
self._properties["rangeElementType"] = range_element_type.to_api_repr()
if rounding_mode is not None:
self._properties["roundingMode"] = rounding_mode
if isinstance(foreign_type_definition, str):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm curious, why not is not None here?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

self._properties["foreignTypeDefinition"] = foreign_type_definition

if fields: # Don't set the property if it's not set.
self._properties["fields"] = [field.to_api_repr() for field in fields]

Expand Down Expand Up @@ -304,6 +343,22 @@ def range_element_type(self):
ret = self._properties.get("rangeElementType")
return FieldElementType.from_api_repr(ret)

@property
def rounding_mode(self):
"""Enum that specifies the rounding mode to be used when storing values of
NUMERIC and BIGNUMERIC type.
"""
return self._properties.get("roundingMode")

@property
def foreign_type_definition(self):
"""Definition of the foreign data type.

Only valid for top-level schema fields (not nested fields).
If the type is FOREIGN, this field is required.
"""
return self._properties.get("foreignTypeDefinition")

@property
def fields(self):
"""Optional[tuple]: Subfields contained in this field.
Expand Down
56 changes: 55 additions & 1 deletion tests/unit/test_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import pytest

from google.cloud import bigquery
from google.cloud.bigquery import enums
from google.cloud.bigquery.standard_sql import StandardSqlStructType
from google.cloud.bigquery import schema
from google.cloud.bigquery.schema import PolicyTagList
Expand Down Expand Up @@ -49,6 +50,8 @@ def test_constructor_defaults(self):
self.assertEqual(field.fields, ())
self.assertIsNone(field.policy_tags)
self.assertIsNone(field.default_value_expression)
self.assertEqual(field.rounding_mode, None)
self.assertEqual(field.foreign_type_definition, None)

def test_constructor_explicit(self):
FIELD_DEFAULT_VALUE_EXPRESSION = "This is the default value for this field"
Expand All @@ -64,6 +67,8 @@ def test_constructor_explicit(self):
)
),
default_value_expression=FIELD_DEFAULT_VALUE_EXPRESSION,
rounding_mode=enums.RoundingMode.ROUNDING_MODE_UNSPECIFIED,
foreign_type_definition="INTEGER",
)
self.assertEqual(field.name, "test")
self.assertEqual(field.field_type, "STRING")
Expand All @@ -80,6 +85,8 @@ def test_constructor_explicit(self):
)
),
)
self.assertEqual(field.rounding_mode, "ROUNDING_MODE_UNSPECIFIED")
self.assertEqual(field.foreign_type_definition, "INTEGER")

def test_constructor_explicit_none(self):
field = self._make_one("test", "STRING", description=None, policy_tags=None)
Expand Down Expand Up @@ -137,8 +144,16 @@ def test_to_api_repr(self):
{"names": ["foo", "bar"]},
)

ROUNDINGMODE = enums.RoundingMode.ROUNDING_MODE_UNSPECIFIED

field = self._make_one(
"foo", "INTEGER", "NULLABLE", description="hello world", policy_tags=policy
"foo",
"INTEGER",
"NULLABLE",
description="hello world",
policy_tags=policy,
rounding_mode=ROUNDINGMODE,
foreign_type_definition=None,
)
self.assertEqual(
field.to_api_repr(),
Expand All @@ -148,6 +163,7 @@ def test_to_api_repr(self):
"type": "INTEGER",
"description": "hello world",
"policyTags": {"names": ["foo", "bar"]},
"roundingMode": "ROUNDING_MODE_UNSPECIFIED",
},
)

Expand Down Expand Up @@ -181,6 +197,7 @@ def test_from_api_repr(self):
"description": "test_description",
"name": "foo",
"type": "record",
"roundingMode": "ROUNDING_MODE_UNSPECIFIED",
}
)
self.assertEqual(field.name, "foo")
Expand All @@ -192,6 +209,7 @@ def test_from_api_repr(self):
self.assertEqual(field.fields[0].field_type, "INTEGER")
self.assertEqual(field.fields[0].mode, "NULLABLE")
self.assertEqual(field.range_element_type, None)
self.assertEqual(field.rounding_mode, "ROUNDING_MODE_UNSPECIFIED")

def test_from_api_repr_policy(self):
field = self._get_target_class().from_api_repr(
Expand Down Expand Up @@ -283,6 +301,28 @@ def test_fields_property(self):
schema_field = self._make_one("boat", "RECORD", fields=fields)
self.assertEqual(schema_field.fields, fields)

def test_roundingmode_property_str(self):
ROUNDINGMODE = "ROUND_HALF_AWAY_FROM_ZERO"
schema_field = self._make_one("test", "STRING", rounding_mode=ROUNDINGMODE)
self.assertEqual(schema_field.rounding_mode, ROUNDINGMODE)

del schema_field
schema_field = self._make_one("test", "STRING")
schema_field._properties["roundingMode"] = ROUNDINGMODE
self.assertEqual(schema_field.rounding_mode, ROUNDINGMODE)

def test_foreign_type_definition_property_str(self):
FOREIGN_TYPE_DEFINITION = "INTEGER"
schema_field = self._make_one(
"test", "STRING", foreign_type_definition=FOREIGN_TYPE_DEFINITION
)
self.assertEqual(schema_field.foreign_type_definition, FOREIGN_TYPE_DEFINITION)

del schema_field
schema_field = self._make_one("test", "STRING")
schema_field._properties["foreignTypeDefinition"] = FOREIGN_TYPE_DEFINITION
self.assertEqual(schema_field.foreign_type_definition, FOREIGN_TYPE_DEFINITION)

def test_to_standard_sql_simple_type(self):
examples = (
# a few legacy types
Expand Down Expand Up @@ -457,6 +497,20 @@ def test_to_standard_sql_unknown_type(self):
bigquery.StandardSqlTypeNames.TYPE_KIND_UNSPECIFIED,
)

def test_to_standard_sql_foreign_type_valid(self):
legacy_type = "FOREIGN"
standard_type = bigquery.StandardSqlTypeNames.FOREIGN
foreign_type_definition = "INTEGER"

field = self._make_one(
"some_field",
field_type=legacy_type,
foreign_type_definition=foreign_type_definition,
)
standard_field = field.to_standard_sql()
self.assertEqual(standard_field.name, "some_field")
self.assertEqual(standard_field.type.type_kind, standard_type)

def test___eq___wrong_type(self):
field = self._make_one("test", "STRING")
other = object()
Expand Down