Skip to content

feat(python-sources): add unit integration testing utilities for simplification #43338

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 28 commits into from
Aug 14, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
562be14
Draft basic read integration test
Aug 6, 2024
927f0f6
Add tests for WooCommerce simple streams
Aug 7, 2024
a298a11
Generalize test structure, test all streams
Aug 7, 2024
39714a6
Complete test suite, remove unused code
Aug 7, 2024
6d16578
Merge branch 'master' into strosek/test_utils
Aug 8, 2024
5cc3e6a
Fix matcher in multiple pages test
Aug 8, 2024
d5a79b1
Reduce code duplication, apply format code
Aug 9, 2024
f30ddcb
Add docstrings to test utils
Aug 9, 2024
f9dd62f
Merge branch 'master' into strosek/test_utils
strosek Aug 9, 2024
c9bb9ae
Make registered mock method configurable, address comments
Aug 9, 2024
f04513a
Add missing http_mocking module
Aug 9, 2024
476f75b
Add missing http_mocking module
Aug 9, 2024
b1ecee5
Apply formatting
Aug 9, 2024
840871b
Merge branch 'master' into strosek/test_utils
strosek Aug 12, 2024
d22df31
Rename test data loading function
Aug 12, 2024
d485355
restore unit tests to split PR in two
Aug 12, 2024
330bf1d
Update JSON reference in read_resource_file_contents
Aug 12, 2024
00fc44e
Refactor get_unit_tests_folder to be public and reused
Aug 12, 2024
a158d09
Fix file path type annotation
Aug 12, 2024
58d40b1
Merge branch 'master' into strosek/test_utils
strosek Aug 12, 2024
ea2dda0
Revert back to using private get_unit_test_folder
Aug 12, 2024
315d2ea
Merge branch 'master' into strosek/test_utils
strosek Aug 12, 2024
2efec5d
Merge branch 'master' into strosek/test_utils
strosek Aug 12, 2024
3888783
Fix formatting
Aug 12, 2024
2747636
Merge branch 'master' into strosek/test_utils
strosek Aug 13, 2024
8fd4435
Move get_unit_test_folder to utils/data
Aug 13, 2024
c779bf0
Merge branch 'master' into strosek/test_utils
strosek Aug 13, 2024
82ea347
Merge branch 'master' into strosek/test_utils
strosek Aug 14, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file.
23 changes: 23 additions & 0 deletions airbyte-cdk/python/airbyte_cdk/test/utils/assertions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import re

from airbyte_cdk.test.entrypoint_wrapper import EntrypointOutput


def is_in_logs(pattern: str, output: EntrypointOutput) -> bool:
return any(re.search(pattern, entry.log.message, flags=re.IGNORECASE) for entry in output.logs)


def is_not_in_logs(pattern: str, output: EntrypointOutput) -> bool:
return not is_in_logs(pattern, output)


def assert_good_read(output: EntrypointOutput, expected_records: int) -> None:
# checks there are no errors in output, plus the amount of records is correct.
assert len(output.records) == expected_records
assert is_not_in_logs("error|exception", output)


def assert_bad_read(output: EntrypointOutput, expected_records: int) -> None:
# checks there are errors in output, plus the amount of records expected amount of records.
assert len(output.records) == expected_records
assert is_in_logs("error|exception", output)
17 changes: 17 additions & 0 deletions airbyte-cdk/python/airbyte_cdk/test/utils/data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from pathlib import Path as FilePath


def _get_unit_test_folder(execution_folder: str) -> FilePath:
path = FilePath(execution_folder)
while path.name != "unit_tests":
if path.name == path.root or path.name == path.drive:
raise ValueError(f"Could not find `unit_tests` folder as a parent of {execution_folder}")
path = path.parent
return path


def get_json_contents(resource: str, test_location: FilePath) -> str:
json_path = str(_get_unit_test_folder(test_location) / "resource" / "http" / "response" / f"{resource}")
with open(json_path) as f:
response = f.read()
return response
22 changes: 22 additions & 0 deletions airbyte-cdk/python/airbyte_cdk/test/utils/reading.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
from typing import Optional, List, Mapping, Any

from airbyte_cdk import AbstractSource
from airbyte_cdk.test.catalog_builder import CatalogBuilder
from airbyte_protocol.models import AirbyteStateMessage, ConfiguredAirbyteCatalog, SyncMode
from airbyte_cdk.test.entrypoint_wrapper import EntrypointOutput, read


def catalog(stream_name: str, sync_mode: SyncMode) -> ConfiguredAirbyteCatalog:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It makes a ton of sense to me to bring these two methods in the CDK since they're reimplemented by all connectors!

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am personally skeptic about this one because of the "brittleness" I mentioned here. I feel like the builder is as readable as this method and I don't know what is the benefit of this method

Copy link
Contributor

@girarda girarda Aug 9, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd be fine with a builder. what I like about the current approach is we get to remove the duplicated _read static methods which is maybe not related to this specific function, so my bad

Copy link
Contributor Author

@strosek strosek Aug 9, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These functions are what many tests use, I just moved them. These are provided in a common location for people to use them when they don't need the full builder or an __init__() with default arguments. It's difficult to generalize everything, but these little functions come handy in many cases, if they are not handy, people can fall back to the builder.

return CatalogBuilder().with_stream(stream_name, sync_mode).build()


def read_records(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I find this interface brittle. If at some point we need more than just the stream_name, sync_mode to describe the catalog, we will have to update this interface. Why not pass the catalog directly? We have a builder that should ease the process of instantiating it

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same as the other comment above in reading.py

source: AbstractSource,
config: Mapping[str, Any],
stream_name: str,
sync_mode: SyncMode,
state: Optional[List[AirbyteStateMessage]] = None,
expecting_exception: Optional[bool] = False,
) -> EntrypointOutput:
_catalog = catalog(stream_name, sync_mode)
return read(source, config, _catalog, state, expecting_exception)
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
from typing import MutableMapping, Any

import source_woocommerce
from airbyte_cdk.connector_builder.connector_builder_handler import resolve_manifest
from airbyte_cdk import AbstractSource


def config() -> MutableMapping[str, Any]:
return {
"api_key": "test_api_key",
"api_secret": "test_api_secret",
"shop": "airbyte.store",
"start_date": "2017-01-01",
}


def source() -> AbstractSource:
return source_woocommerce.SourceWoocommerce()


def common_params():
return "orderby=id&order=asc&dates_are_gmt=true&per_page=100"


def endpoint_url(stream_name: str, path_parameter: str = None) -> str:
if path_parameter:
return f"{url_base()}/{path_parameter}/{stream_name}"

return f"{url_base()}/{stream_name}"


def url_base() -> str:
url = resolve_manifest(source()).record.data["manifest"]["definitions"]["requester"]["url_base"]
url = url.replace("{{ config['shop'] }}", config()["shop"])
return url
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.

import pytest
import requests_mock

from airbyte_cdk.test.utils.assertions import assert_good_read
from airbyte_cdk.test.utils.data import get_json_contents
from airbyte_protocol.models import SyncMode
from freezegun import freeze_time


from airbyte_cdk.test.utils.reading import read_records
from .common import common_params, config, source, endpoint_url


def modified_after() -> str:
return "2017-01-01T00:00:00"


def modified_before() -> str:
return "2017-01-30T23:59:59"


def date_range() -> str:
# Create and encode date range
return f"modified_after={modified_after()}&modified_before={modified_before()}".replace(":", "%3A")


def build_url(stream_name: str) -> str:
return f"{endpoint_url(stream_name)}?{common_params()}&{date_range()}"


@freeze_time(modified_before())
@requests_mock.Mocker(kw="mock")
@pytest.mark.parametrize(
"stream_name, json_response_file, num_records",
[
("coupons", "coupons.json", 2),
("orders", "orders.json", 2),
# ("customers", "customers.json", 2),
("products", "products.json", 2),
],
)
def test_read_simple_endpoints_successfully(stream_name, json_response_file, num_records, **kwargs) -> None:
"""Test basic read for all streams that use date ranges and don't have parent streams."""

# Register mock response
kwargs["mock"].get(
build_url(stream_name),
text=get_json_contents(json_response_file, __file__)
)

# Read records
output = read_records(source(), config(), stream_name, SyncMode.full_refresh)

# Check read was successful
assert_good_read(output, num_records)
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.

from pathlib import Path as FilePath
from typing import List, Optional
from typing import List, Optional, MutableMapping, Any

from airbyte_cdk.test.catalog_builder import CatalogBuilder
from airbyte_cdk.test.entrypoint_wrapper import EntrypointOutput, read
Expand All @@ -16,12 +16,13 @@ def catalog(stream_name: str, sync_mode: SyncMode) -> ConfiguredAirbyteCatalog:
return CatalogBuilder().with_stream(stream_name, sync_mode).build()


def config() -> ConfigBuilder:
return ConfigBuilder()


def source() -> SourceWoocommerce:
return SourceWoocommerce()
def config() -> MutableMapping[str, Any]:
return {
"api_key": "test_api_key",
"api_secret": "test_api_secret",
"shop": "airbyte.store",
"start_date": "2017-01-01",
}


def read_output(
Expand All @@ -33,20 +34,4 @@ def read_output(
) -> EntrypointOutput:
_catalog = catalog(stream_name, sync_mode)
_config = config_builder.build()
return read(source(), _config, _catalog, state, expecting_exception)


def _get_unit_test_folder(execution_folder: str) -> FilePath:
path = FilePath(execution_folder)
while path.name != "unit_tests":
if path.name == path.root or path.name == path.drive:
raise ValueError(f"Could not find `unit_tests` folder as a parent of {execution_folder}")
path = path.parent
return path


def get_json_http_response(resource: str, status_code: int) -> HttpResponse:
json_path = str(_get_unit_test_folder(__file__) / "resource" / "http" / "response" / f"{resource}")
with open(json_path) as f:
response = f.read()
return HttpResponse(response, status_code)
return read(SourceWoocommerce(), _config, _catalog, state, expecting_exception)
Loading