Skip to content

Commit 207aa50

Browse files
authored
feat: add default value expression (#1408)
* feat: Adds default_value_expression to SchemaField
1 parent 931285f commit 207aa50

File tree

5 files changed

+135
-16
lines changed

5 files changed

+135
-16
lines changed

google/cloud/bigquery/schema.py

+37-1
Original file line numberDiff line numberDiff line change
@@ -93,13 +93,38 @@ class SchemaField(object):
9393
Scale (digits after decimal) of fields with NUMERIC or BIGNUMERIC type.
9494
9595
max_length: Maximum length of fields with STRING or BYTES type.
96+
97+
default_value_expression: str, Optional
98+
Used to specify the default value of a field using a SQL expression. It can only be set for
99+
top level fields (columns).
100+
101+
You can use a struct or array expression to specify default value for the entire struct or
102+
array. The valid SQL expressions are:
103+
104+
- Literals for all data types, including STRUCT and ARRAY.
105+
106+
- The following functions:
107+
108+
`CURRENT_TIMESTAMP`
109+
`CURRENT_TIME`
110+
`CURRENT_DATE`
111+
`CURRENT_DATETIME`
112+
`GENERATE_UUID`
113+
`RAND`
114+
`SESSION_USER`
115+
`ST_GEOPOINT`
116+
117+
- Struct or array composed with the above allowed functions, for example:
118+
119+
"[CURRENT_DATE(), DATE '2020-01-01'"]
96120
"""
97121

98122
def __init__(
99123
self,
100124
name: str,
101125
field_type: str,
102126
mode: str = "NULLABLE",
127+
default_value_expression: str = None,
103128
description: Union[str, _DefaultSentinel] = _DEFAULT_VALUE,
104129
fields: Iterable["SchemaField"] = (),
105130
policy_tags: Union["PolicyTagList", None, _DefaultSentinel] = _DEFAULT_VALUE,
@@ -115,6 +140,8 @@ def __init__(
115140
self._properties["mode"] = mode.upper()
116141
if description is not _DEFAULT_VALUE:
117142
self._properties["description"] = description
143+
if default_value_expression is not None:
144+
self._properties["defaultValueExpression"] = default_value_expression
118145
if precision is not _DEFAULT_VALUE:
119146
self._properties["precision"] = precision
120147
if scale is not _DEFAULT_VALUE:
@@ -154,13 +181,16 @@ def from_api_repr(cls, api_repr: dict) -> "SchemaField":
154181
fields = api_repr.get("fields", ())
155182
policy_tags = api_repr.get("policyTags", _DEFAULT_VALUE)
156183

184+
default_value_expression = api_repr.get("defaultValueExpression", None)
185+
157186
if policy_tags is not None and policy_tags is not _DEFAULT_VALUE:
158187
policy_tags = PolicyTagList.from_api_repr(policy_tags)
159188

160189
return cls(
161190
field_type=field_type,
162191
fields=[cls.from_api_repr(f) for f in fields],
163192
mode=mode.upper(),
193+
default_value_expression=default_value_expression,
164194
description=description,
165195
name=api_repr["name"],
166196
policy_tags=policy_tags,
@@ -197,6 +227,11 @@ def is_nullable(self):
197227
"""bool: whether 'mode' is 'nullable'."""
198228
return self.mode == "NULLABLE"
199229

230+
@property
231+
def default_value_expression(self):
232+
"""Optional[str] default value of a field, using an SQL expression"""
233+
return self._properties.get("defaultValueExpression")
234+
200235
@property
201236
def description(self):
202237
"""Optional[str]: description for the field."""
@@ -260,7 +295,7 @@ def _key(self):
260295
field_type = self.field_type.upper() if self.field_type is not None else None
261296

262297
# Type can temporarily be set to None if the code needs a SchemaField instance,
263-
# but has npt determined the exact type of the field yet.
298+
# but has not determined the exact type of the field yet.
264299
if field_type is not None:
265300
if field_type == "STRING" or field_type == "BYTES":
266301
if self.max_length is not None:
@@ -281,6 +316,7 @@ def _key(self):
281316
field_type,
282317
# Mode is always str, if not given it defaults to a str value
283318
self.mode.upper(), # pytype: disable=attribute-error
319+
self.default_value_expression,
284320
self.description,
285321
self._fields,
286322
policy_tags,

google/cloud/bigquery/table.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1421,7 +1421,7 @@ def get(self, key: str, default: Any = None) -> Any:
14211421
>>> Row(('a', 'b'), {'x': 0, 'y': 1}).get('z')
14221422
None
14231423
1424-
The default value can be overrided with the ``default`` parameter.
1424+
The default value can be overridden with the ``default`` parameter.
14251425
14261426
>>> Row(('a', 'b'), {'x': 0, 'y': 1}).get('z', '')
14271427
''

tests/system/test_client.py

+62
Original file line numberDiff line numberDiff line change
@@ -441,6 +441,68 @@ def test_create_table_with_real_custom_policy(self):
441441
list(table.schema[1].policy_tags.names), [child_policy_tag.name]
442442
)
443443

444+
def test_create_table_with_default_value_expression(self):
445+
dataset = self.temp_dataset(
446+
_make_dataset_id("create_table_with_default_value_expression")
447+
)
448+
449+
table_id = "test_table"
450+
timestamp_field_name = "timestamp_field_with_default_value_expression"
451+
452+
string_default_val_expression = "'FOO'"
453+
timestamp_default_val_expression = "CURRENT_TIMESTAMP"
454+
455+
schema = [
456+
bigquery.SchemaField(
457+
"username",
458+
"STRING",
459+
default_value_expression=string_default_val_expression,
460+
),
461+
bigquery.SchemaField(
462+
timestamp_field_name,
463+
"TIMESTAMP",
464+
default_value_expression=timestamp_default_val_expression,
465+
),
466+
]
467+
table_arg = Table(dataset.table(table_id), schema=schema)
468+
self.assertFalse(_table_exists(table_arg))
469+
470+
table = helpers.retry_403(Config.CLIENT.create_table)(table_arg)
471+
self.to_delete.insert(0, table)
472+
473+
self.assertTrue(_table_exists(table))
474+
475+
# Fetch the created table and its metadata to verify that the default
476+
# value expression is assigned to fields
477+
remote_table = Config.CLIENT.get_table(table)
478+
remote_schema = remote_table.schema
479+
self.assertEqual(remote_schema, schema)
480+
481+
for field in remote_schema:
482+
if field.name == string_default_val_expression:
483+
self.assertEqual("'FOO'", field.default_value_expression)
484+
if field.name == timestamp_default_val_expression:
485+
self.assertEqual("CURRENT_TIMESTAMP", field.default_value_expression)
486+
487+
# Insert rows into the created table to verify default values are populated
488+
# when value is not provided
489+
NOW_SECONDS = 1448911495.484366
490+
NOW = datetime.datetime.utcfromtimestamp(NOW_SECONDS).replace(tzinfo=UTC)
491+
492+
# Rows to insert. Row #1 will have default `TIMESTAMP` defaultValueExpression CURRENT_TIME
493+
# Row #2 will have default `STRING` defaultValueExpression "'FOO"
494+
ROWS = [{"username": "john_doe"}, {timestamp_field_name: NOW}]
495+
496+
errors = Config.CLIENT.insert_rows(table, ROWS)
497+
self.assertEqual(len(errors), 0)
498+
499+
# Get list of inserted rows
500+
row_1, row_2 = [row for row in list(Config.CLIENT.list_rows(table))]
501+
502+
# Assert that row values are populated with default value expression
503+
self.assertIsInstance(row_1.get(timestamp_field_name), datetime.datetime)
504+
self.assertEqual("FOO", row_2.get("username"))
505+
444506
def test_create_table_w_time_partitioning_w_clustering_fields(self):
445507
from google.cloud.bigquery.table import TimePartitioning
446508
from google.cloud.bigquery.table import TimePartitioningType

tests/unit/test_client.py

+28-12
Original file line numberDiff line numberDiff line change
@@ -8395,9 +8395,19 @@ def test_schema_from_json_with_file_path(self):
83958395
]"""
83968396

83978397
expected = [
8398-
SchemaField("qtr", "STRING", "REQUIRED", "quarter"),
8399-
SchemaField("rep", "STRING", "NULLABLE", "sales representative"),
8400-
SchemaField("sales", "FLOAT", "NULLABLE", "total sales"),
8398+
SchemaField("qtr", "STRING", "REQUIRED", description="quarter"),
8399+
SchemaField(
8400+
"rep",
8401+
"STRING",
8402+
"NULLABLE",
8403+
description="sales representative",
8404+
),
8405+
SchemaField(
8406+
"sales",
8407+
"FLOAT",
8408+
"NULLABLE",
8409+
description="total sales",
8410+
),
84018411
]
84028412

84038413
client = self._make_client()
@@ -8441,9 +8451,11 @@ def test_schema_from_json_with_file_object(self):
84418451
]"""
84428452

84438453
expected = [
8444-
SchemaField("qtr", "STRING", "REQUIRED", "quarter"),
8445-
SchemaField("rep", "STRING", "NULLABLE", "sales representative"),
8446-
SchemaField("sales", "FLOAT", "NULLABLE", "total sales"),
8454+
SchemaField("qtr", "STRING", "REQUIRED", description="quarter"),
8455+
SchemaField(
8456+
"rep", "STRING", "NULLABLE", description="sales representative"
8457+
),
8458+
SchemaField("sales", "FLOAT", "NULLABLE", description="total sales"),
84478459
]
84488460

84498461
client = self._make_client()
@@ -8477,9 +8489,11 @@ def test_schema_to_json_with_file_path(self):
84778489
]
84788490

84798491
schema_list = [
8480-
SchemaField("qtr", "STRING", "REQUIRED", "quarter"),
8481-
SchemaField("rep", "STRING", "NULLABLE", "sales representative"),
8482-
SchemaField("sales", "FLOAT", "NULLABLE", "total sales"),
8492+
SchemaField("qtr", "STRING", "REQUIRED", description="quarter"),
8493+
SchemaField(
8494+
"rep", "STRING", "NULLABLE", description="sales representative"
8495+
),
8496+
SchemaField("sales", "FLOAT", "NULLABLE", description="total sales"),
84838497
]
84848498

84858499
client = self._make_client()
@@ -8521,9 +8535,11 @@ def test_schema_to_json_with_file_object(self):
85218535
]
85228536

85238537
schema_list = [
8524-
SchemaField("qtr", "STRING", "REQUIRED", "quarter"),
8525-
SchemaField("rep", "STRING", "NULLABLE", "sales representative"),
8526-
SchemaField("sales", "FLOAT", "NULLABLE", "total sales"),
8538+
SchemaField("qtr", "STRING", "REQUIRED", description="quarter"),
8539+
SchemaField(
8540+
"rep", "STRING", "NULLABLE", description="sales representative"
8541+
),
8542+
SchemaField("sales", "FLOAT", "NULLABLE", description="total sales"),
85278543
]
85288544

85298545
fake_file = io.StringIO()

tests/unit/test_schema.py

+7-2
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,10 @@ def test_constructor_defaults(self):
4545
self.assertIsNone(field.description)
4646
self.assertEqual(field.fields, ())
4747
self.assertIsNone(field.policy_tags)
48+
self.assertIsNone(field.default_value_expression)
4849

4950
def test_constructor_explicit(self):
51+
FIELD_DEFAULT_VALUE_EXPRESSION = "This is the default value for this field"
5052
field = self._make_one(
5153
"test",
5254
"STRING",
@@ -58,10 +60,12 @@ def test_constructor_explicit(self):
5860
"projects/f/locations/g/taxonomies/h/policyTags/i",
5961
)
6062
),
63+
default_value_expression=FIELD_DEFAULT_VALUE_EXPRESSION,
6164
)
6265
self.assertEqual(field.name, "test")
6366
self.assertEqual(field.field_type, "STRING")
6467
self.assertEqual(field.mode, "REQUIRED")
68+
self.assertEqual(field.default_value_expression, FIELD_DEFAULT_VALUE_EXPRESSION)
6569
self.assertEqual(field.description, "Testing")
6670
self.assertEqual(field.fields, ())
6771
self.assertEqual(
@@ -182,6 +186,7 @@ def test_from_api_repr_defaults(self):
182186
self.assertEqual(field.field_type, "RECORD")
183187
self.assertEqual(field.mode, "NULLABLE")
184188
self.assertEqual(len(field.fields), 0)
189+
self.assertEqual(field.default_value_expression, None)
185190

186191
# Keys not present in API representation shouldn't be included in
187192
# _properties.
@@ -527,12 +532,12 @@ def test___hash__not_equals(self):
527532

528533
def test___repr__(self):
529534
field1 = self._make_one("field1", "STRING")
530-
expected = "SchemaField('field1', 'STRING', 'NULLABLE', None, (), None)"
535+
expected = "SchemaField('field1', 'STRING', 'NULLABLE', None, None, (), None)"
531536
self.assertEqual(repr(field1), expected)
532537

533538
def test___repr__type_not_set(self):
534539
field1 = self._make_one("field1", field_type=None)
535-
expected = "SchemaField('field1', None, 'NULLABLE', None, (), None)"
540+
expected = "SchemaField('field1', None, 'NULLABLE', None, None, (), None)"
536541
self.assertEqual(repr(field1), expected)
537542

538543
def test___repr__evaluable_no_policy_tags(self):

0 commit comments

Comments
 (0)