diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index 00de968..c4bfa98 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -1,6 +1,10 @@ name: Tests on: [push, pull_request] +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + jobs: lint: runs-on: ubuntu-latest @@ -9,6 +13,7 @@ jobs: toxenv: - "pep8" - "black" + - "mypy" steps: - uses: actions/checkout@v2 diff --git a/MANIFEST.in b/MANIFEST.in index 0ac1524..63f6c5b 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -3,5 +3,6 @@ include *.py include *.md include LICENSE include tox.ini +include pyproject.toml prune .travis prune debian diff --git a/canonicaljson.py b/canonicaljson.py index 7841247..4f52317 100644 --- a/canonicaljson.py +++ b/canonicaljson.py @@ -16,9 +16,14 @@ # limitations under the License. import platform -from typing import Optional, Type +from typing import Any, Generator, Optional, Type -frozendict_type: Optional[Type] +try: + from typing import Protocol +except ImportError: # pragma: no cover + from typing_extensions import Protocol # type: ignore[misc] + +frozendict_type: Optional[Type[Any]] try: from frozendict import frozendict as frozendict_type except ImportError: @@ -27,22 +32,37 @@ __version__ = "1.6.0" -def _default(obj): # pragma: no cover +def _default(obj: object) -> object: # pragma: no cover if type(obj) is frozendict_type: # If frozendict is available and used, cast `obj` into a dict - return dict(obj) + return dict(obj) # type: ignore[call-overload] raise TypeError( "Object of type %s is not JSON serializable" % obj.__class__.__name__ ) +class Encoder(Protocol): # pragma: no cover + def encode(self, data: object) -> str: + pass + + def iterencode(self, data: object) -> Generator[str, None, None]: + pass + + def __call__(self, *args: Any, **kwargs: Any) -> "Encoder": + pass + + +class JsonLibrary(Protocol): + JSONEncoder: Encoder + + # Declare these in the module scope, but they get configured in # set_json_library. -_canonical_encoder = None -_pretty_encoder = None +_canonical_encoder: Encoder = None # type: ignore[assignment] +_pretty_encoder: Encoder = None # type: ignore[assignment] -def set_json_library(json_lib): +def set_json_library(json_lib: JsonLibrary) -> None: """ Set the underlying JSON library that canonicaljson uses to json_lib. @@ -69,55 +89,44 @@ def set_json_library(json_lib): ) -def encode_canonical_json(json_object): - """Encodes the shortest UTF-8 JSON encoding with dictionary keys - lexicographically sorted by unicode code point. +def encode_canonical_json(data: object) -> bytes: + """Encodes the given `data` as a UTF-8 canonical JSON bytestring. - Args: - json_object (dict): The JSON object to encode. - - Returns: - bytes encoding the JSON object""" - s = _canonical_encoder.encode(json_object) - return s.encode("utf-8") - - -def iterencode_canonical_json(json_object): - """Encodes the shortest UTF-8 JSON encoding with dictionary keys + This encoding is the shortest possible. Dictionary keys are lexicographically sorted by unicode code point. + """ + s = _canonical_encoder.encode(data) + return s.encode("utf-8") - Args: - json_object (dict): The JSON object to encode. - Returns: - generator which yields bytes encoding the JSON object""" - for chunk in _canonical_encoder.iterencode(json_object): - yield chunk.encode("utf-8") +def iterencode_canonical_json(data: object) -> Generator[bytes, None, None]: + """Iteratively encodes the given `data` as a UTF-8 canonical JSON bytestring. + This yields one or more bytestrings; concatenating them all together yields the + full encoding of `data`. Building up the encoding gradually in this way allows us to + encode large pieces of `data` without blocking other tasks. -def encode_pretty_printed_json(json_object): + This encoding is the shortest possible. Dictionary keys are + lexicographically sorted by unicode code point. """ - Encodes the JSON object dict as human readable UTF-8 bytes. - - Args: - json_object (dict): The JSON object to encode. - - Returns: - bytes encoding the JSON object""" + for chunk in _canonical_encoder.iterencode(data): + yield chunk.encode("utf-8") - return _pretty_encoder.encode(json_object).encode("utf-8") +def encode_pretty_printed_json(data: object) -> bytes: + """Encodes the given `data` as a UTF-8 human-readable JSON bytestring.""" -def iterencode_pretty_printed_json(json_object): - """Encodes the JSON object dict as human readable UTF-8 bytes. + return _pretty_encoder.encode(data).encode("utf-8") - Args: - json_object (dict): The JSON object to encode. - Returns: - generator which yields bytes encoding the JSON object""" +def iterencode_pretty_printed_json(data: object) -> Generator[bytes, None, None]: + """Iteratively encodes the given `data` as a UTF-8 human-readable JSON bytestring. - for chunk in _pretty_encoder.iterencode(json_object): + This yields one or more bytestrings; concatenating them all together yields the + full encoding of `data`. Building up the encoding gradually in this way allows us to + encode large pieces of `data` without blocking other tasks. + """ + for chunk in _pretty_encoder.iterencode(data): yield chunk.encode("utf-8") @@ -132,7 +141,7 @@ def iterencode_pretty_printed_json(json_object): # # Note that it seems performance is on par or better using json from the # standard library as of Python 3.7. - import simplejson as json + import simplejson as json # type: ignore[no-redef] # Set the JSON library to the backwards compatible version. set_json_library(json) diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..bd1d867 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,6 @@ +[tool.mypy] +show_error_codes = true +strict = true + +files = ["."] +exclude = "setup.py" diff --git a/setup.py b/setup.py index ed59aca..fec43db 100755 --- a/setup.py +++ b/setup.py @@ -49,6 +49,8 @@ def exec_file(path_segments, name): # simplerjson versions before 3.14.0 had a bug with some characters # (e.g. \u2028) if ensure_ascii was set to false. "simplejson>=3.14.0", + # typing.Protocol was only added to the stdlib in Python 3.8 + "typing_extensions>=4.0.0; python_version < '3.8'", ], extras_require={ # frozendict support can be enabled using the `canonicaljson[frozendict]` syntax diff --git a/test_canonicaljson.py b/test_canonicaljson.py index 0e081c0..fc19922 100644 --- a/test_canonicaljson.py +++ b/test_canonicaljson.py @@ -31,7 +31,7 @@ class TestCanonicalJson(unittest.TestCase): - def test_encode_canonical(self): + def test_encode_canonical(self) -> None: self.assertEqual(encode_canonical_json({}), b"{}") # ctrl-chars should be encoded. @@ -68,7 +68,7 @@ def test_encode_canonical(self): # Iteratively encoding should work. self.assertEqual(list(iterencode_canonical_json({})), [b"{}"]) - def test_ascii(self): + def test_ascii(self) -> None: """ Ensure the proper ASCII characters are escaped. @@ -95,10 +95,10 @@ def test_ascii(self): # And other characters are passed unescaped. unescaped = [0x20, 0x21] + list(range(0x23, 0x5C)) + list(range(0x5D, 0x7E)) for c in unescaped: - c = chr(c) - self.assertEqual(encode_canonical_json(c), b'"' + c.encode("ascii") + b'"') + s = chr(c) + self.assertEqual(encode_canonical_json(s), b'"' + s.encode("ascii") + b'"') - def test_encode_pretty_printed(self): + def test_encode_pretty_printed(self) -> None: self.assertEqual(encode_pretty_printed_json({}), b"{}") self.assertEqual(list(iterencode_pretty_printed_json({})), [b"{}"]) @@ -112,7 +112,9 @@ def test_encode_pretty_printed(self): frozendict_type is None, "If `frozendict` is not available, skip test", ) - def test_frozen_dict(self): + def test_frozen_dict(self) -> None: + # For mypy's benefit: + assert frozendict_type is not None self.assertEqual( encode_canonical_json(frozendict_type({"a": 1})), b'{"a":1}', @@ -122,7 +124,7 @@ def test_frozen_dict(self): b'{\n "a": 1\n}', ) - def test_unknown_type(self): + def test_unknown_type(self) -> None: class Unknown(object): pass @@ -133,7 +135,7 @@ class Unknown(object): with self.assertRaises(Exception): encode_pretty_printed_json(unknown_object) - def test_invalid_float_values(self): + def test_invalid_float_values(self) -> None: """Infinity/-Infinity/NaN are not allowed in canonicaljson.""" with self.assertRaises(ValueError): @@ -154,7 +156,7 @@ def test_invalid_float_values(self): with self.assertRaises(ValueError): encode_pretty_printed_json(nan) - def test_set_json(self): + def test_set_json(self) -> None: """Ensure that changing the underlying JSON implementation works.""" mock_json = mock.Mock(spec=["JSONEncoder"]) mock_json.JSONEncoder.return_value.encode.return_value = "sentinel" @@ -163,6 +165,6 @@ def test_set_json(self): self.assertEqual(encode_canonical_json({}), b"sentinel") finally: # Reset the JSON library to whatever was originally set. - from canonicaljson import json + from canonicaljson import json # type: ignore[attr-defined] set_json_library(json) diff --git a/tox.ini b/tox.ini index 2895be7..00ae34c 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = packaging, pep8, py37, py38, py39, py310, pypy3 +envlist = packaging, pep8, black, py37, py38, py39, py310, pypy3 [testenv] deps = @@ -25,4 +25,14 @@ commands = flake8 . basepython = python3.7 deps = black==21.9b0 + # Workaround black+click incompatability, see https://github.com/psf/black/issues/2964 + click==8.0.4 commands = python -m black --check --diff . + +[testenv:mypy] +deps = + mypy==0.942 + types-frozendict==2.0.8 + types-simplejson==3.17.5 + types-setuptools==57.4.14 +commands = mypy