Skip to content

feat(python-sources): add unit integration testing utilities for simplification #43338

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 28 commits into from
Aug 14, 2024
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
562be14
Draft basic read integration test
Aug 6, 2024
927f0f6
Add tests for WooCommerce simple streams
Aug 7, 2024
a298a11
Generalize test structure, test all streams
Aug 7, 2024
39714a6
Complete test suite, remove unused code
Aug 7, 2024
6d16578
Merge branch 'master' into strosek/test_utils
Aug 8, 2024
5cc3e6a
Fix matcher in multiple pages test
Aug 8, 2024
d5a79b1
Reduce code duplication, apply format code
Aug 9, 2024
f30ddcb
Add docstrings to test utils
Aug 9, 2024
f9dd62f
Merge branch 'master' into strosek/test_utils
strosek Aug 9, 2024
c9bb9ae
Make registered mock method configurable, address comments
Aug 9, 2024
f04513a
Add missing http_mocking module
Aug 9, 2024
476f75b
Add missing http_mocking module
Aug 9, 2024
b1ecee5
Apply formatting
Aug 9, 2024
840871b
Merge branch 'master' into strosek/test_utils
strosek Aug 12, 2024
d22df31
Rename test data loading function
Aug 12, 2024
d485355
restore unit tests to split PR in two
Aug 12, 2024
330bf1d
Update JSON reference in read_resource_file_contents
Aug 12, 2024
00fc44e
Refactor get_unit_tests_folder to be public and reused
Aug 12, 2024
a158d09
Fix file path type annotation
Aug 12, 2024
58d40b1
Merge branch 'master' into strosek/test_utils
strosek Aug 12, 2024
ea2dda0
Revert back to using private get_unit_test_folder
Aug 12, 2024
315d2ea
Merge branch 'master' into strosek/test_utils
strosek Aug 12, 2024
2efec5d
Merge branch 'master' into strosek/test_utils
strosek Aug 12, 2024
3888783
Fix formatting
Aug 12, 2024
2747636
Merge branch 'master' into strosek/test_utils
strosek Aug 13, 2024
8fd4435
Move get_unit_test_folder to utils/data
Aug 13, 2024
c779bf0
Merge branch 'master' into strosek/test_utils
strosek Aug 13, 2024
82ea347
Merge branch 'master' into strosek/test_utils
strosek Aug 14, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions airbyte-cdk/python/airbyte_cdk/test/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
28 changes: 28 additions & 0 deletions airbyte-cdk/python/airbyte_cdk/test/utils/assertions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.

import re

from airbyte_cdk.test.entrypoint_wrapper import EntrypointOutput


def is_in_logs(pattern: str, output: EntrypointOutput) -> bool:
"""Check if any log message case-insensitive matches the pattern."""
return any(re.search(pattern, entry.log.message, flags=re.IGNORECASE) for entry in output.logs)


def is_not_in_logs(pattern: str, output: EntrypointOutput) -> bool:
"""Check if no log message matches the pattern."""
return not is_in_logs(pattern, output)


def assert_good_read(output: EntrypointOutput, expected_record_count: int) -> None:
"""Check if the output is successful read with an expected record count and no errors."""
assert len(output.errors) == 0
assert len(output.records) == expected_record_count
assert is_not_in_logs("error|exception", output)


def assert_bad_read(output: EntrypointOutput, expected_record_count: int) -> None:
"""Check if the output is unsuccessful read with an expected record count and errors."""
assert len(output.records) == expected_record_count
assert is_in_logs("error|exception", output)
20 changes: 20 additions & 0 deletions airbyte-cdk/python/airbyte_cdk/test/utils/data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.

from pathlib import Path as FilePath


def _get_resource_root_folder(execution_folder: FilePath) -> FilePath:
path = FilePath(execution_folder)
while path.name != "unit_tests":
if path.name == path.root or path.name == path.drive:
raise ValueError(f"Could not find `unit_tests` folder as a parent of {execution_folder}")
path = path.parent
return path


def read_json_contents(resource: str, test_location: FilePath) -> str:
"""Read the contents of a json file from the test resource folder."""
json_path = str(_get_resource_root_folder(test_location) / "resource" / "http" / "response" / f"{resource}")
with open(json_path) as f:
response = f.read()
return response
26 changes: 26 additions & 0 deletions airbyte-cdk/python/airbyte_cdk/test/utils/reading.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.

from typing import Any, List, Mapping, Optional

from airbyte_cdk import AbstractSource
from airbyte_cdk.test.catalog_builder import CatalogBuilder
from airbyte_cdk.test.entrypoint_wrapper import EntrypointOutput, read
from airbyte_protocol.models import AirbyteStateMessage, ConfiguredAirbyteCatalog, SyncMode


def catalog(stream_name: str, sync_mode: SyncMode) -> ConfiguredAirbyteCatalog:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It makes a ton of sense to me to bring these two methods in the CDK since they're reimplemented by all connectors!

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am personally skeptic about this one because of the "brittleness" I mentioned here. I feel like the builder is as readable as this method and I don't know what is the benefit of this method

Copy link
Contributor

@girarda girarda Aug 9, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd be fine with a builder. what I like about the current approach is we get to remove the duplicated _read static methods which is maybe not related to this specific function, so my bad

Copy link
Contributor Author

@strosek strosek Aug 9, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These functions are what many tests use, I just moved them. These are provided in a common location for people to use them when they don't need the full builder or an __init__() with default arguments. It's difficult to generalize everything, but these little functions come handy in many cases, if they are not handy, people can fall back to the builder.

"""Create a catalog with a single stream."""
return CatalogBuilder().with_stream(stream_name, sync_mode).build()


def read_records(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I find this interface brittle. If at some point we need more than just the stream_name, sync_mode to describe the catalog, we will have to update this interface. Why not pass the catalog directly? We have a builder that should ease the process of instantiating it

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same as the other comment above in reading.py

source: AbstractSource,
config: Mapping[str, Any],
stream_name: str,
sync_mode: SyncMode,
state: Optional[List[AirbyteStateMessage]] = None,
expecting_exception: bool = False,
) -> EntrypointOutput:
"""Read records from a stream."""
_catalog = catalog(stream_name, sync_mode)
return read(source, config, _catalog, state, expecting_exception)
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.

from typing import Any, MutableMapping, Optional

import source_woocommerce
from airbyte_cdk import AbstractSource
from airbyte_cdk.connector_builder.connector_builder_handler import resolve_manifest


def config() -> MutableMapping[str, Any]:
return {
"api_key": "test_api_key",
"api_secret": "test_api_secret",
"shop": "airbyte.store",
"start_date": "2017-01-01",
}


def source() -> AbstractSource:
return source_woocommerce.SourceWoocommerce()


def url_base() -> str:
url = resolve_manifest(source()).record.data["manifest"]["definitions"]["requester"]["url_base"]
url = url.replace("{{ config['shop'] }}", config()["shop"])
return url


def common_params():
return "orderby=id&order=asc&dates_are_gmt=true&per_page=100"


def build_url(resource_path: str, is_regex: bool = False, modified_after: str = None, modified_before: str = None) -> str:
"""Build a URL for a WooCommerce API endpoint."""
separator = "." if is_regex else "?"
url = f"{url_base()}/{resource_path}{separator}{common_params()}"
if modified_after:
url = f"{url}&modified_after={modified_after}"
if modified_before:
url = f"{url}&modified_before={modified_before}"

return url
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.

from airbyte_cdk.test.utils.data import read_json_contents

from .common import build_url


def request_response_mapping(stream_name, custom_resource=None, is_regex=False, modified_after=None, modified_before=None,
custom_json_filename=None, status_code=200, response=None):
"""Create an HTTP request-response mapping for a stream."""
json_filename = f"{stream_name}.json" if custom_json_filename is None else custom_json_filename
resource = stream_name if custom_resource is None else custom_resource

response_map = response
if response is None:
response_map = {"text": read_json_contents(json_filename, __file__), "status_code": status_code}

return {
"request": {
"url": build_url(resource, modified_after=modified_after, modified_before=modified_before, is_regex=is_regex),
"is_regex": is_regex
},
"response": response_map,
}


def orders_http_calls():
return [request_response_mapping("orders", is_regex=True)]


def coupons_http_calls():
return [request_response_mapping("coupons", is_regex=True)]


def customers_http_calls():
return [request_response_mapping("customers")]


def payment_gateways_http_calls():
return [request_response_mapping("payment_gateways")]


def product_attributes_http_calls():
return [request_response_mapping("product_attributes", custom_resource="products/attributes")]


def product_categories_http_calls():
return [request_response_mapping("product_categories", custom_resource="products/categories")]


def product_reviews_http_calls():
return [request_response_mapping("product_reviews", custom_resource="products/reviews")]


def products_http_calls():
return [request_response_mapping("products", is_regex=True)]


def product_shipping_classes_http_calls():
return [request_response_mapping("product_shipping_classes", custom_resource="products/shipping_classes")]


def product_tags_http_calls():
return [request_response_mapping("product_tags", custom_resource="products/tags")]


def shipping_methods_http_calls():
return [request_response_mapping("shipping_methods", custom_resource="shipping_methods")]


def shipping_zones_http_calls():
return [request_response_mapping("shipping_zones", custom_resource="shipping/zones")]


def system_status_tools_http_calls():
return [request_response_mapping("system_status_tools", custom_resource="system_status/tools")]


def order_notes_http_calls():
return [
request_response_mapping("orders", modified_after="2017-01-01.+", modified_before="2017-01-29.+", is_regex=True),
request_response_mapping("order_notes", custom_resource="orders/(727|723)/notes", is_regex=True),
]


def product_attribute_terms_http_calls():
return [
request_response_mapping("product_attributes", custom_resource="products/attributes", is_regex=True),
request_response_mapping("product_attribute_terms", custom_resource="products/attributes/.+/terms", is_regex=True),
]


def product_variations_http_calls():
return [
request_response_mapping("products"),
request_response_mapping("product_variations", custom_resource="products/(799|794)/variations", is_regex=True),
]


def refunds_http_calls():
return [
request_response_mapping("orders"),
request_response_mapping("refunds", custom_resource="orders/(727|723)/refunds", is_regex=True),
]


def shipping_zone_locations_http_calls():
return [
request_response_mapping("shipping_zones", custom_resource="shipping/zones"),
request_response_mapping("shipping_zone_locations", custom_resource="shipping/zones/(0|5)/locations", is_regex=True),
]


def shipping_zone_methods_http_calls():
return [
request_response_mapping("shipping_zones", custom_resource="shipping/zones"),
request_response_mapping("shipping_zone_methods", custom_resource="shipping/zones/(0|5)/methods", is_regex=True),
]


def tax_classes_http_calls():
return [request_response_mapping("tax_classes", custom_resource="taxes/classes")]


def tax_rates_http_calls():
return [request_response_mapping("tax_rates", custom_resource="taxes")]


def orders_empty_last_page():
return [
request_response_mapping("orders", is_regex=True, modified_after=".+", modified_before="2017-01-30.+"),
request_response_mapping("orders", is_regex=True, modified_after=".+", modified_before="2017-02-10.+", response={"text": "[]"}),
]
Loading
Loading