Skip to content

Commit ad9c802

Browse files
grimmerktswasttseaver
authored
fix: make unicode characters working well in load_table_from_json (#865)
Co-authored-by: Tim Swast <[email protected]> Co-authored-by: Tres Seaver <[email protected]>
1 parent 519d99c commit ad9c802

File tree

2 files changed

+37
-1
lines changed

2 files changed

+37
-1
lines changed

google/cloud/bigquery/client.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -2762,7 +2762,7 @@ def load_table_from_json(
27622762

27632763
destination = _table_arg_to_table_ref(destination, default_project=self.project)
27642764

2765-
data_str = "\n".join(json.dumps(item) for item in json_rows)
2765+
data_str = "\n".join(json.dumps(item, ensure_ascii=False) for item in json_rows)
27662766
encoded_str = data_str.encode()
27672767
data_file = io.BytesIO(encoded_str)
27682768
return self.load_table_from_file(

tests/unit/test_client.py

+36
Original file line numberDiff line numberDiff line change
@@ -7775,6 +7775,42 @@ def test_load_table_from_json_w_invalid_job_config(self):
77757775
err_msg = str(exc.value)
77767776
assert "Expected an instance of LoadJobConfig" in err_msg
77777777

7778+
def test_load_table_from_json_unicode_emoji_data_case(self):
7779+
from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES
7780+
7781+
client = self._make_client()
7782+
7783+
emoji = "\U0001F3E6"
7784+
json_row = {"emoji": emoji}
7785+
json_rows = [json_row]
7786+
7787+
load_patch = mock.patch(
7788+
"google.cloud.bigquery.client.Client.load_table_from_file", autospec=True
7789+
)
7790+
7791+
with load_patch as load_table_from_file:
7792+
client.load_table_from_json(json_rows, self.TABLE_REF)
7793+
7794+
load_table_from_file.assert_called_once_with(
7795+
client,
7796+
mock.ANY,
7797+
self.TABLE_REF,
7798+
size=mock.ANY,
7799+
num_retries=_DEFAULT_NUM_RETRIES,
7800+
job_id=mock.ANY,
7801+
job_id_prefix=None,
7802+
location=client.location,
7803+
project=client.project,
7804+
job_config=mock.ANY,
7805+
timeout=None,
7806+
)
7807+
7808+
sent_data_file = load_table_from_file.mock_calls[0][1][1]
7809+
7810+
# make sure json_row's unicode characters are only encoded one time
7811+
expected_bytes = b'{"emoji": "' + emoji.encode("utf8") + b'"}'
7812+
assert sent_data_file.getvalue() == expected_bytes
7813+
77787814
# Low-level tests
77797815

77807816
@classmethod

0 commit comments

Comments
 (0)