-
Notifications
You must be signed in to change notification settings - Fork 313
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: add roundingmode enum, wiring, and tests #2121
Changes from 6 commits
6f86838
c66377e
afe0d9b
e440460
bdc5764
afcd892
eacf9fe
70b5fb1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -246,6 +246,11 @@ class KeyResultStatementKind: | |
|
||
|
||
class StandardSqlTypeNames(str, enum.Enum): | ||
"""Enum of allowed SQL type names in schema.SchemaField. | ||
|
||
Datatype used in GoogleSQL. | ||
""" | ||
|
||
def _generate_next_value_(name, start, count, last_values): | ||
return name | ||
|
||
|
@@ -267,6 +272,9 @@ def _generate_next_value_(name, start, count, last_values): | |
ARRAY = enum.auto() | ||
STRUCT = enum.auto() | ||
RANGE = enum.auto() | ||
# NOTE: FOREIGN acts as a wrapper for data types | ||
# not natively understood by BigQuery unless translated | ||
FOREIGN = enum.auto() | ||
|
||
|
||
class EntityTypes(str, enum.Enum): | ||
|
@@ -285,7 +293,10 @@ class EntityTypes(str, enum.Enum): | |
# See also: https://cloud.google.com/bigquery/data-types#legacy_sql_data_types | ||
# and https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types | ||
class SqlTypeNames(str, enum.Enum): | ||
"""Enum of allowed SQL type names in schema.SchemaField.""" | ||
"""Enum of allowed SQL type names in schema.SchemaField. | ||
|
||
Datatype used in Legacy SQL. | ||
""" | ||
|
||
STRING = "STRING" | ||
BYTES = "BYTES" | ||
|
@@ -306,6 +317,9 @@ class SqlTypeNames(str, enum.Enum): | |
DATETIME = "DATETIME" | ||
INTERVAL = "INTERVAL" # NOTE: not available in legacy types | ||
RANGE = "RANGE" # NOTE: not available in legacy types | ||
# NOTE: FOREIGN acts as a wrapper for data types | ||
# not natively understood by BigQuery unless translated | ||
FOREIGN = "FOREIGN" | ||
|
||
|
||
class WriteDisposition(object): | ||
|
@@ -344,3 +358,29 @@ class DeterminismLevel: | |
|
||
NOT_DETERMINISTIC = "NOT_DETERMINISTIC" | ||
"""The UDF is not deterministic.""" | ||
|
||
|
||
class RoundingMode(enum.Enum): | ||
"""Rounding mode options that can be used when storing NUMERIC or BIGNUMERIC | ||
values. | ||
|
||
ROUNDING_MODE_UNSPECIFIED: will default to using ROUND_HALF_AWAY_FROM_ZERO. | ||
|
||
ROUND_HALF_AWAY_FROM_ZERO: rounds half values away from zero when applying | ||
precision and scale upon writing of NUMERIC and BIGNUMERIC values. | ||
For Scale: 0 | ||
* 1.1, 1.2, 1.3, 1.4 => 1 | ||
* 1.5, 1.6, 1.7, 1.8, 1.9 => 2 | ||
|
||
ROUND_HALF_EVEN: rounds half values to the nearest even value when applying | ||
precision and scale upon writing of NUMERIC and BIGNUMERIC values. | ||
For Scale: 0 | ||
* 1.1, 1.2, 1.3, 1.4 => 1 | ||
* 1.5 => 2 | ||
* 1.6, 1.7, 1.8, 1.9 => 2 | ||
* 2.5 => 2 | ||
""" | ||
|
||
ROUNDING_MODE_UNSPECIFIED = 0 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm curious, is this what the REST API uses? In past, the BQ enums mapped to strings, even if they start life in proto as integers. Per https://protobuf.dev/programming-guides/json/, supposedly both should be accepted, but I think we should use whatever the JSON API returns so customers' equality checks function. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The REST API is as follows:
|
||
ROUND_HALF_AWAY_FROM_ZERO = 1 | ||
ROUND_HALF_EVEN = 2 |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,14 +22,15 @@ | |
|
||
from google.cloud.bigquery import _helpers | ||
from google.cloud.bigquery import standard_sql | ||
from google.cloud.bigquery import enums | ||
from google.cloud.bigquery.enums import StandardSqlTypeNames | ||
|
||
|
||
_STRUCT_TYPES = ("RECORD", "STRUCT") | ||
|
||
# SQL types reference: | ||
# https://cloud.google.com/bigquery/data-types#legacy_sql_data_types | ||
# https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types | ||
# LEGACY SQL: https://cloud.google.com/bigquery/data-types#legacy_sql_data_types | ||
# GoogleSQL: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types | ||
LEGACY_TO_STANDARD_TYPES = { | ||
"STRING": StandardSqlTypeNames.STRING, | ||
"BYTES": StandardSqlTypeNames.BYTES, | ||
|
@@ -48,6 +49,7 @@ | |
"DATE": StandardSqlTypeNames.DATE, | ||
"TIME": StandardSqlTypeNames.TIME, | ||
"DATETIME": StandardSqlTypeNames.DATETIME, | ||
"FOREIGN": StandardSqlTypeNames.FOREIGN, | ||
# no direct conversion from ARRAY, the latter is represented by mode="REPEATED" | ||
} | ||
"""String names of the legacy SQL types to integer codes of Standard SQL standard_sql.""" | ||
|
@@ -166,6 +168,35 @@ class SchemaField(object): | |
the type is RANGE, this field is required. Possible values for the | ||
field element type of a RANGE include `DATE`, `DATETIME` and | ||
`TIMESTAMP`. | ||
|
||
rounding_mode: Union[enums.RoundingMode, str, None] | ||
Specifies the rounding mode to be used when storing values of | ||
NUMERIC and BIGNUMERIC type. | ||
|
||
Unspecified will default to using ROUND_HALF_AWAY_FROM_ZERO. | ||
ROUND_HALF_AWAY_FROM_ZERO rounds half values away from zero | ||
when applying precision and scale upon writing of NUMERIC and BIGNUMERIC | ||
values. | ||
|
||
For Scale: 0 | ||
1.1, 1.2, 1.3, 1.4 => 1 | ||
1.5, 1.6, 1.7, 1.8, 1.9 => 2 | ||
|
||
ROUND_HALF_EVEN rounds half values to the nearest even value | ||
when applying precision and scale upon writing of NUMERIC and BIGNUMERIC | ||
values. | ||
|
||
For Scale: 0 | ||
1.1, 1.2, 1.3, 1.4 => 1 | ||
1.5 => 2 | ||
1.6, 1.7, 1.8, 1.9 => 2 | ||
2.5 => 2 | ||
|
||
foreign_type_definition: Optional[str] | ||
Definition of the foreign data type. | ||
|
||
Only valid for top-level schema fields (not nested fields). | ||
If the type is FOREIGN, this field is required. | ||
""" | ||
|
||
def __init__( | ||
|
@@ -181,11 +212,14 @@ def __init__( | |
scale: Union[int, _DefaultSentinel] = _DEFAULT_VALUE, | ||
max_length: Union[int, _DefaultSentinel] = _DEFAULT_VALUE, | ||
range_element_type: Union[FieldElementType, str, None] = None, | ||
rounding_mode: Union[enums.RoundingMode, str, None] = None, | ||
foreign_type_definition: Optional[str] = None, | ||
): | ||
self._properties: Dict[str, Any] = { | ||
"name": name, | ||
"type": field_type, | ||
} | ||
self._properties["name"] = name | ||
if mode is not None: | ||
self._properties["mode"] = mode.upper() | ||
if description is not _DEFAULT_VALUE: | ||
|
@@ -206,6 +240,22 @@ def __init__( | |
self._properties["rangeElementType"] = {"type": range_element_type} | ||
if isinstance(range_element_type, FieldElementType): | ||
self._properties["rangeElementType"] = range_element_type.to_api_repr() | ||
if rounding_mode is not None: | ||
if isinstance(rounding_mode, enums.RoundingMode): | ||
rounding_mode = rounding_mode.name | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think if we do the Would be good to double check in some unit tests that There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. With the changes to the |
||
self._properties["roundingMode"] = rounding_mode | ||
if isinstance(foreign_type_definition, str): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm curious, why not There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done |
||
self._properties["foreignTypeDefinition"] = foreign_type_definition | ||
|
||
# The order of operations is important: | ||
# If field_type is FOREIGN, then foreign_type_definition must be set. | ||
if field_type != "FOREIGN": | ||
self._properties["type"] = field_type | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This looks redundant with line 187/220 above ( There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. |
||
else: | ||
if self._properties.get("foreignTypeDefinition") is None: | ||
raise ValueError( | ||
"If the 'field_type' is 'FOREIGN', then 'foreign_type_definition' is required." | ||
) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm wary of putting client-side validation like this, since theoretically the server could choose to lift this restriction in future (imagine some foreign schema autodetect support or something). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. |
||
if fields: # Don't set the property if it's not set. | ||
self._properties["fields"] = [field.to_api_repr() for field in fields] | ||
|
||
|
@@ -304,6 +354,22 @@ def range_element_type(self): | |
ret = self._properties.get("rangeElementType") | ||
return FieldElementType.from_api_repr(ret) | ||
|
||
@property | ||
def rounding_mode(self): | ||
"""Enum that specifies the rounding mode to be used when storing values of | ||
NUMERIC and BIGNUMERIC type. | ||
""" | ||
return self._properties.get("roundingMode") | ||
|
||
@property | ||
def foreign_type_definition(self): | ||
"""Definition of the foreign data type. | ||
|
||
Only valid for top-level schema fields (not nested fields). | ||
If the type is FOREIGN, this field is required. | ||
""" | ||
return self._properties.get("foreignTypeDefinition") | ||
|
||
@property | ||
def fields(self): | ||
"""Optional[tuple]: Subfields contained in this field. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,6 +19,7 @@ | |
import pytest | ||
|
||
from google.cloud import bigquery | ||
from google.cloud.bigquery import enums | ||
from google.cloud.bigquery.standard_sql import StandardSqlStructType | ||
from google.cloud.bigquery import schema | ||
from google.cloud.bigquery.schema import PolicyTagList | ||
|
@@ -49,9 +50,12 @@ def test_constructor_defaults(self): | |
self.assertEqual(field.fields, ()) | ||
self.assertIsNone(field.policy_tags) | ||
self.assertIsNone(field.default_value_expression) | ||
self.assertEqual(field.rounding_mode, None) | ||
self.assertEqual(field.foreign_type_definition, None) | ||
|
||
def test_constructor_explicit(self): | ||
FIELD_DEFAULT_VALUE_EXPRESSION = "This is the default value for this field" | ||
ROUNDINGMODE = enums.RoundingMode.ROUNDING_MODE_UNSPECIFIED | ||
field = self._make_one( | ||
"test", | ||
"STRING", | ||
|
@@ -64,6 +68,8 @@ def test_constructor_explicit(self): | |
) | ||
), | ||
default_value_expression=FIELD_DEFAULT_VALUE_EXPRESSION, | ||
rounding_mode=ROUNDINGMODE, | ||
foreign_type_definition="INTEGER", | ||
) | ||
self.assertEqual(field.name, "test") | ||
self.assertEqual(field.field_type, "STRING") | ||
|
@@ -80,6 +86,8 @@ def test_constructor_explicit(self): | |
) | ||
), | ||
) | ||
self.assertEqual(field.rounding_mode, ROUNDINGMODE.name) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: The string There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. |
||
self.assertEqual(field.foreign_type_definition, "INTEGER") | ||
|
||
def test_constructor_explicit_none(self): | ||
field = self._make_one("test", "STRING", description=None, policy_tags=None) | ||
|
@@ -137,8 +145,16 @@ def test_to_api_repr(self): | |
{"names": ["foo", "bar"]}, | ||
) | ||
|
||
ROUNDINGMODE = enums.RoundingMode.ROUNDING_MODE_UNSPECIFIED | ||
|
||
field = self._make_one( | ||
"foo", "INTEGER", "NULLABLE", description="hello world", policy_tags=policy | ||
"foo", | ||
"INTEGER", | ||
"NULLABLE", | ||
description="hello world", | ||
policy_tags=policy, | ||
rounding_mode=ROUNDINGMODE, | ||
foreign_type_definition=None, | ||
) | ||
self.assertEqual( | ||
field.to_api_repr(), | ||
|
@@ -148,6 +164,7 @@ def test_to_api_repr(self): | |
"type": "INTEGER", | ||
"description": "hello world", | ||
"policyTags": {"names": ["foo", "bar"]}, | ||
"roundingMode": "ROUNDING_MODE_UNSPECIFIED", | ||
}, | ||
) | ||
|
||
|
@@ -181,6 +198,7 @@ def test_from_api_repr(self): | |
"description": "test_description", | ||
"name": "foo", | ||
"type": "record", | ||
"roundingMode": "ROUNDING_MODE_UNSPECIFIED", | ||
} | ||
) | ||
self.assertEqual(field.name, "foo") | ||
|
@@ -192,6 +210,7 @@ def test_from_api_repr(self): | |
self.assertEqual(field.fields[0].field_type, "INTEGER") | ||
self.assertEqual(field.fields[0].mode, "NULLABLE") | ||
self.assertEqual(field.range_element_type, None) | ||
self.assertEqual(field.rounding_mode, "ROUNDING_MODE_UNSPECIFIED") | ||
|
||
def test_from_api_repr_policy(self): | ||
field = self._get_target_class().from_api_repr( | ||
|
@@ -283,6 +302,30 @@ def test_fields_property(self): | |
schema_field = self._make_one("boat", "RECORD", fields=fields) | ||
self.assertEqual(schema_field.fields, fields) | ||
|
||
def test_roundingmode_property_str(self): | ||
# via init | ||
ROUNDINGMODE = "ROUND_HALF_AWAY_FROM_ZERO" | ||
schema_field = self._make_one("test", "STRING", rounding_mode=ROUNDINGMODE) | ||
self.assertEqual(schema_field.rounding_mode, ROUNDINGMODE) | ||
|
||
# via _properties | ||
del schema_field | ||
schema_field = self._make_one("test", "STRING") | ||
schema_field._properties["roundingMode"] = ROUNDINGMODE | ||
self.assertEqual(schema_field.rounding_mode, ROUNDINGMODE) | ||
|
||
def test_foreign_type_definition_property_str(self): | ||
FOREIGN_TYPE_DEFINITION = "INTEGER" | ||
schema_field = self._make_one( | ||
"test", "STRING", foreign_type_definition=FOREIGN_TYPE_DEFINITION | ||
) | ||
self.assertEqual(schema_field.foreign_type_definition, FOREIGN_TYPE_DEFINITION) | ||
|
||
del schema_field | ||
schema_field = self._make_one("test", "STRING") | ||
schema_field._properties["foreignTypeDefinition"] = FOREIGN_TYPE_DEFINITION | ||
self.assertEqual(schema_field.foreign_type_definition, FOREIGN_TYPE_DEFINITION) | ||
|
||
def test_to_standard_sql_simple_type(self): | ||
examples = ( | ||
# a few legacy types | ||
|
@@ -457,6 +500,32 @@ def test_to_standard_sql_unknown_type(self): | |
bigquery.StandardSqlTypeNames.TYPE_KIND_UNSPECIFIED, | ||
) | ||
|
||
def test_to_standard_sql_foreign_type_valid(self): | ||
legacy_type = "FOREIGN" | ||
standard_type = bigquery.StandardSqlTypeNames.FOREIGN | ||
foreign_type_definition = "INTEGER" | ||
|
||
field = self._make_one( | ||
"some_field", | ||
field_type=legacy_type, | ||
foreign_type_definition=foreign_type_definition, | ||
) | ||
standard_field = field.to_standard_sql() | ||
self.assertEqual(standard_field.name, "some_field") | ||
self.assertEqual(standard_field.type.type_kind, standard_type) | ||
|
||
def test_to_standard_sql_foreign_type_invalid(self): | ||
legacy_type = "FOREIGN" | ||
foreign_type_definition = None | ||
|
||
with self.assertRaises(ValueError) as context: | ||
self._make_one( | ||
"some_field", | ||
field_type=legacy_type, | ||
foreign_type_definition=foreign_type_definition, | ||
) | ||
self.assertTrue("If the 'field_type'" in context.exception.args[0]) | ||
|
||
def test___eq___wrong_type(self): | ||
field = self._make_one("test", "STRING") | ||
other = object() | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I believe if we do a
class RoundingMode(str, enum.Enum):
withenum.auto()
values as above in StandardSqlTypeNames, this will give strings compatible with the BQ REST API (assuming these work like other enums in that regard).There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.