-
Notifications
You must be signed in to change notification settings - Fork 4.6k
feat(python-sources): add unit integration testing utilities for simplification #43338
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
562be14
927f0f6
a298a11
39714a6
6d16578
5cc3e6a
d5a79b1
f30ddcb
f9dd62f
c9bb9ae
f04513a
476f75b
b1ecee5
840871b
d22df31
d485355
330bf1d
00fc44e
a158d09
58d40b1
ea2dda0
315d2ea
2efec5d
3888783
2747636
8fd4435
c779bf0
82ea347
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
import re | ||
|
||
from airbyte_cdk.test.entrypoint_wrapper import EntrypointOutput | ||
|
||
|
||
def is_in_logs(pattern: str, output: EntrypointOutput) -> bool: | ||
return any(re.search(pattern, entry.log.message, flags=re.IGNORECASE) for entry in output.logs) | ||
|
||
|
||
def is_not_in_logs(pattern: str, output: EntrypointOutput) -> bool: | ||
return not is_in_logs(pattern, output) | ||
|
||
|
||
def assert_good_read(output: EntrypointOutput, expected_records: int) -> None: | ||
# checks there are no errors in output, plus the amount of records is correct. | ||
assert len(output.records) == expected_records | ||
assert is_not_in_logs("error|exception", output) | ||
|
||
|
||
def assert_bad_read(output: EntrypointOutput, expected_records: int) -> None: | ||
# checks there are errors in output, plus the amount of records expected amount of records. | ||
assert len(output.records) == expected_records | ||
assert is_in_logs("error|exception", output) | ||
strosek marked this conversation as resolved.
Show resolved
Hide resolved
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
from pathlib import Path as FilePath | ||
|
||
|
||
def _get_unit_test_folder(execution_folder: str) -> FilePath: | ||
path = FilePath(execution_folder) | ||
while path.name != "unit_tests": | ||
if path.name == path.root or path.name == path.drive: | ||
raise ValueError(f"Could not find `unit_tests` folder as a parent of {execution_folder}") | ||
path = path.parent | ||
return path | ||
|
||
|
||
def get_json_contents(resource: str, test_location: FilePath) -> str: | ||
json_path = str(_get_unit_test_folder(test_location) / "resource" / "http" / "response" / f"{resource}") | ||
with open(json_path) as f: | ||
response = f.read() | ||
return response |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
from typing import Optional, List, Mapping, Any | ||
|
||
from airbyte_cdk import AbstractSource | ||
from airbyte_cdk.test.catalog_builder import CatalogBuilder | ||
from airbyte_protocol.models import AirbyteStateMessage, ConfiguredAirbyteCatalog, SyncMode | ||
from airbyte_cdk.test.entrypoint_wrapper import EntrypointOutput, read | ||
|
||
|
||
def catalog(stream_name: str, sync_mode: SyncMode) -> ConfiguredAirbyteCatalog: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It makes a ton of sense to me to bring these two methods in the CDK since they're reimplemented by all connectors! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am personally skeptic about this one because of the "brittleness" I mentioned here. I feel like the builder is as readable as this method and I don't know what is the benefit of this method There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd be fine with a builder. what I like about the current approach is we get to remove the duplicated There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These functions are what many tests use, I just moved them. These are provided in a common location for people to use them when they don't need the full builder or an |
||
return CatalogBuilder().with_stream(stream_name, sync_mode).build() | ||
|
||
|
||
def read_records( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I find this interface brittle. If at some point we need more than just the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same as the other comment above in |
||
source: AbstractSource, | ||
config: Mapping[str, Any], | ||
stream_name: str, | ||
sync_mode: SyncMode, | ||
state: Optional[List[AirbyteStateMessage]] = None, | ||
expecting_exception: Optional[bool] = False, | ||
) -> EntrypointOutput: | ||
_catalog = catalog(stream_name, sync_mode) | ||
return read(source, config, _catalog, state, expecting_exception) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
from typing import MutableMapping, Any | ||
|
||
import source_woocommerce | ||
from airbyte_cdk.connector_builder.connector_builder_handler import resolve_manifest | ||
from airbyte_cdk import AbstractSource | ||
|
||
|
||
def config() -> MutableMapping[str, Any]: | ||
return { | ||
"api_key": "test_api_key", | ||
"api_secret": "test_api_secret", | ||
"shop": "airbyte.store", | ||
"start_date": "2017-01-01", | ||
} | ||
|
||
|
||
def source() -> AbstractSource: | ||
return source_woocommerce.SourceWoocommerce() | ||
|
||
|
||
def common_params(): | ||
return "orderby=id&order=asc&dates_are_gmt=true&per_page=100" | ||
|
||
|
||
def endpoint_url(stream_name: str, path_parameter: str = None) -> str: | ||
if path_parameter: | ||
return f"{url_base()}/{path_parameter}/{stream_name}" | ||
|
||
return f"{url_base()}/{stream_name}" | ||
|
||
|
||
def url_base() -> str: | ||
url = resolve_manifest(source()).record.data["manifest"]["definitions"]["requester"]["url_base"] | ||
url = url.replace("{{ config['shop'] }}", config()["shop"]) | ||
return url |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
# Copyright (c) 2024 Airbyte, Inc., all rights reserved. | ||
|
||
import pytest | ||
import requests_mock | ||
|
||
from airbyte_cdk.test.utils.assertions import assert_good_read | ||
from airbyte_cdk.test.utils.data import get_json_contents | ||
from airbyte_protocol.models import SyncMode | ||
from freezegun import freeze_time | ||
|
||
|
||
from airbyte_cdk.test.utils.reading import read_records | ||
from .common import common_params, config, source, endpoint_url | ||
|
||
|
||
def modified_after() -> str: | ||
return "2017-01-01T00:00:00" | ||
|
||
|
||
def modified_before() -> str: | ||
return "2017-01-30T23:59:59" | ||
|
||
|
||
def date_range() -> str: | ||
# Create and encode date range | ||
return f"modified_after={modified_after()}&modified_before={modified_before()}".replace(":", "%3A") | ||
|
||
|
||
def build_url(stream_name: str) -> str: | ||
return f"{endpoint_url(stream_name)}?{common_params()}&{date_range()}" | ||
|
||
|
||
@freeze_time(modified_before()) | ||
@requests_mock.Mocker(kw="mock") | ||
@pytest.mark.parametrize( | ||
"stream_name, json_response_file, num_records", | ||
[ | ||
("coupons", "coupons.json", 2), | ||
("orders", "orders.json", 2), | ||
# ("customers", "customers.json", 2), | ||
("products", "products.json", 2), | ||
], | ||
) | ||
def test_read_simple_endpoints_successfully(stream_name, json_response_file, num_records, **kwargs) -> None: | ||
"""Test basic read for all streams that use date ranges and don't have parent streams.""" | ||
|
||
# Register mock response | ||
kwargs["mock"].get( | ||
strosek marked this conversation as resolved.
Show resolved
Hide resolved
|
||
build_url(stream_name), | ||
text=get_json_contents(json_response_file, __file__) | ||
) | ||
|
||
# Read records | ||
output = read_records(source(), config(), stream_name, SyncMode.full_refresh) | ||
|
||
# Check read was successful | ||
assert_good_read(output, num_records) |
Uh oh!
There was an error while loading. Please reload this page.