Skip to content

feat(python-sources): add unit integration testing utilities for simplification #43338

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 28 commits into from
Aug 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
562be14
Draft basic read integration test
Aug 6, 2024
927f0f6
Add tests for WooCommerce simple streams
Aug 7, 2024
a298a11
Generalize test structure, test all streams
Aug 7, 2024
39714a6
Complete test suite, remove unused code
Aug 7, 2024
6d16578
Merge branch 'master' into strosek/test_utils
Aug 8, 2024
5cc3e6a
Fix matcher in multiple pages test
Aug 8, 2024
d5a79b1
Reduce code duplication, apply format code
Aug 9, 2024
f30ddcb
Add docstrings to test utils
Aug 9, 2024
f9dd62f
Merge branch 'master' into strosek/test_utils
strosek Aug 9, 2024
c9bb9ae
Make registered mock method configurable, address comments
Aug 9, 2024
f04513a
Add missing http_mocking module
Aug 9, 2024
476f75b
Add missing http_mocking module
Aug 9, 2024
b1ecee5
Apply formatting
Aug 9, 2024
840871b
Merge branch 'master' into strosek/test_utils
strosek Aug 12, 2024
d22df31
Rename test data loading function
Aug 12, 2024
d485355
restore unit tests to split PR in two
Aug 12, 2024
330bf1d
Update JSON reference in read_resource_file_contents
Aug 12, 2024
00fc44e
Refactor get_unit_tests_folder to be public and reused
Aug 12, 2024
a158d09
Fix file path type annotation
Aug 12, 2024
58d40b1
Merge branch 'master' into strosek/test_utils
strosek Aug 12, 2024
ea2dda0
Revert back to using private get_unit_test_folder
Aug 12, 2024
315d2ea
Merge branch 'master' into strosek/test_utils
strosek Aug 12, 2024
2efec5d
Merge branch 'master' into strosek/test_utils
strosek Aug 12, 2024
3888783
Fix formatting
Aug 12, 2024
2747636
Merge branch 'master' into strosek/test_utils
strosek Aug 13, 2024
8fd4435
Move get_unit_test_folder to utils/data
Aug 13, 2024
c779bf0
Merge branch 'master' into strosek/test_utils
strosek Aug 13, 2024
82ea347
Merge branch 'master' into strosek/test_utils
strosek Aug 14, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions airbyte-cdk/python/airbyte_cdk/test/entrypoint_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

import json
import logging
import re
import tempfile
import traceback
from io import StringIO
Expand Down Expand Up @@ -115,6 +116,14 @@ def _get_message_by_types(self, message_types: List[Type]) -> List[AirbyteMessag
def _get_trace_message_by_trace_type(self, trace_type: TraceType) -> List[AirbyteMessage]:
return [message for message in self._get_message_by_types([Type.TRACE]) if message.trace.type == trace_type]

def is_in_logs(self, pattern: str) -> bool:
"""Check if any log message case-insensitive matches the pattern."""
return any(re.search(pattern, entry.log.message, flags=re.IGNORECASE) for entry in self.logs)

def is_not_in_logs(self, pattern: str) -> bool:
"""Check if no log message matches the case-insensitive pattern."""
return not self.is_in_logs(pattern)


def _run_command(source: Source, args: List[str], expecting_exception: bool = False) -> EntrypointOutput:
log_capture_buffer = StringIO()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from typing import Any, Dict, List, Optional, Union

from airbyte_cdk.test.mock_http import HttpResponse
from airbyte_cdk.test.utils.data import get_unit_test_folder


def _extract(path: List[str], response_template: Dict[str, Any]) -> Any:
Expand Down Expand Up @@ -169,16 +170,12 @@ def build(self) -> HttpResponse:


def _get_unit_test_folder(execution_folder: str) -> FilePath:
path = FilePath(execution_folder)
while path.name != "unit_tests":
if path.name == path.root or path.name == path.drive:
raise ValueError(f"Could not find `unit_tests` folder as a parent of {execution_folder}")
path = path.parent
return path
# FIXME: This function should be removed after the next CDK release to avoid breaking amazon-seller-partner test code.
return get_unit_test_folder(execution_folder)


def find_template(resource: str, execution_folder: str) -> Dict[str, Any]:
response_template_filepath = str(_get_unit_test_folder(execution_folder) / "resource" / "http" / "response" / f"{resource}.json")
response_template_filepath = str(get_unit_test_folder(execution_folder) / "resource" / "http" / "response" / f"{resource}.json")
with open(response_template_filepath, "r") as template_file:
return json.load(template_file) # type: ignore # we assume the dev correctly set up the resource file

Expand Down
1 change: 1 addition & 0 deletions airbyte-cdk/python/airbyte_cdk/test/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
20 changes: 20 additions & 0 deletions airbyte-cdk/python/airbyte_cdk/test/utils/data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.

from pydantic import FilePath


def get_unit_test_folder(execution_folder: str) -> FilePath:
path = FilePath(execution_folder)
while path.name != "unit_tests":
if path.name == path.root or path.name == path.drive:
raise ValueError(f"Could not find `unit_tests` folder as a parent of {execution_folder}")
path = path.parent
return path


def read_resource_file_contents(resource: str, test_location: str) -> str:
"""Read the contents of a test data file from the test resource folder."""
file_path = str(get_unit_test_folder(test_location) / "resource" / "http" / "response" / f"{resource}")
with open(file_path) as f:
response = f.read()
return response
14 changes: 14 additions & 0 deletions airbyte-cdk/python/airbyte_cdk/test/utils/http_mocking.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.

import re
from typing import Any, Mapping

from requests_mock import Mocker


def register_mock_responses(mocker: Mocker, http_calls: list[Mapping[str, Mapping[str, Any]]]) -> None:
"""Register a list of HTTP request-response pairs."""
for call in http_calls:
request, response = call["request"], call["response"]
matcher = re.compile(request["url"]) if request["is_regex"] else request["url"]
mocker.register_uri(request["method"], matcher, **response)
26 changes: 26 additions & 0 deletions airbyte-cdk/python/airbyte_cdk/test/utils/reading.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.

from typing import Any, List, Mapping, Optional

from airbyte_cdk import AbstractSource
from airbyte_cdk.test.catalog_builder import CatalogBuilder
from airbyte_cdk.test.entrypoint_wrapper import EntrypointOutput, read
from airbyte_protocol.models import AirbyteStateMessage, ConfiguredAirbyteCatalog, SyncMode


def catalog(stream_name: str, sync_mode: SyncMode) -> ConfiguredAirbyteCatalog:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It makes a ton of sense to me to bring these two methods in the CDK since they're reimplemented by all connectors!

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am personally skeptic about this one because of the "brittleness" I mentioned here. I feel like the builder is as readable as this method and I don't know what is the benefit of this method

Copy link
Contributor

@girarda girarda Aug 9, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd be fine with a builder. what I like about the current approach is we get to remove the duplicated _read static methods which is maybe not related to this specific function, so my bad

Copy link
Contributor Author

@strosek strosek Aug 9, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These functions are what many tests use, I just moved them. These are provided in a common location for people to use them when they don't need the full builder or an __init__() with default arguments. It's difficult to generalize everything, but these little functions come handy in many cases, if they are not handy, people can fall back to the builder.

"""Create a catalog with a single stream."""
return CatalogBuilder().with_stream(stream_name, sync_mode).build()


def read_records(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I find this interface brittle. If at some point we need more than just the stream_name, sync_mode to describe the catalog, we will have to update this interface. Why not pass the catalog directly? We have a builder that should ease the process of instantiating it

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same as the other comment above in reading.py

source: AbstractSource,
config: Mapping[str, Any],
stream_name: str,
sync_mode: SyncMode,
state: Optional[List[AirbyteStateMessage]] = None,
expecting_exception: bool = False,
) -> EntrypointOutput:
"""Read records from a stream."""
_catalog = catalog(stream_name, sync_mode)
return read(source, config, _catalog, state, expecting_exception)
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ def get_stream_by_name(stream_name: str, config_: Mapping[str, Any]) -> Stream:

def find_template(resource: str, execution_folder: str, template_format: Optional[str] = "csv") -> str:
response_template_filepath = str(
# FIXME: the below function should be replaced with the public version after next CDK release
_get_unit_test_folder(execution_folder) / "resource" / "http" / "response" / f"{resource}.{template_format}"
)
with open(response_template_filepath, "r") as template_file:
Expand Down
Loading