diff --git a/airbyte-cdk/python/.gitignore b/airbyte-cdk/python/.gitignore index 27824851c9122..5996dfdc51e57 100644 --- a/airbyte-cdk/python/.gitignore +++ b/airbyte-cdk/python/.gitignore @@ -1,4 +1,2 @@ .coverage -# TODO: these are tmp files generated by unit tests. They should go to the /tmp directory. -cache_http_stream*.yml diff --git a/airbyte-cdk/python/CHANGELOG.md b/airbyte-cdk/python/CHANGELOG.md index d1690cdf748b8..adf9cb6c5b552 100644 --- a/airbyte-cdk/python/CHANGELOG.md +++ b/airbyte-cdk/python/CHANGELOG.md @@ -1,4 +1,6 @@ # Changelog +## 0.9.5 +Allow repeated cache removals & clean up unit test cache files ## 0.9.3 Low-code: Avoid duplicate HTTP query in `simple_retriever` diff --git a/airbyte-cdk/python/airbyte_cdk/sources/streams/http/http.py b/airbyte-cdk/python/airbyte_cdk/sources/streams/http/http.py index 28ea3f506606a..3968241d74ac0 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/streams/http/http.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/streams/http/http.py @@ -68,11 +68,9 @@ def clear_cache(self): """ remove cache file only once """ - STREAM_CACHE_FILES = globals().setdefault("STREAM_CACHE_FILES", set()) - if self.cache_filename not in STREAM_CACHE_FILES: - with suppress(FileNotFoundError): - os.remove(self.cache_filename) - STREAM_CACHE_FILES.add(self.cache_filename) + with suppress(FileNotFoundError): + os.remove(self.cache_filename) + print(f"Removed {self.cache_filename}") @property @abstractmethod diff --git a/airbyte-cdk/python/setup.py b/airbyte-cdk/python/setup.py index eb85d48753287..11222c1020ab1 100644 --- a/airbyte-cdk/python/setup.py +++ b/airbyte-cdk/python/setup.py @@ -15,7 +15,7 @@ setup( name="airbyte-cdk", - version="0.9.3", + version="0.9.5", description="A framework for writing Airbyte Connectors.", long_description=README, long_description_content_type="text/markdown", diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/retrievers/test_simple_retriever.py b/airbyte-cdk/python/unit_tests/sources/declarative/retrievers/test_simple_retriever.py index 9f8059cb8e4fe..a8f6e255cfd55 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/retrievers/test_simple_retriever.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/retrievers/test_simple_retriever.py @@ -7,6 +7,7 @@ import airbyte_cdk.sources.declarative.requesters.error_handlers.response_status as response_status import pytest import requests +import tempfile from airbyte_cdk.models import AirbyteLogMessage, Level, SyncMode from airbyte_cdk.sources.declarative.exceptions import ReadException from airbyte_cdk.sources.declarative.requesters.error_handlers.response_action import ResponseAction @@ -29,7 +30,7 @@ @patch.object(HttpStream, "_read_pages", return_value=[]) def test_simple_retriever_full(mock_http_stream): - requester = MagicMock() + requester = MagicMock(use_cache=False) request_params = {"param": "value"} requester.get_request_params.return_value = request_params @@ -69,7 +70,7 @@ def test_simple_retriever_full(mock_http_stream): requester.get_request_body_json.return_value = request_body_json request_kwargs = {"kwarg": "value"} requester.request_kwargs.return_value = request_kwargs - cache_filename = "cache" + cache_filename = tempfile.NamedTemporaryFile().name requester.cache_filename = cache_filename use_cache = True requester.use_cache = use_cache @@ -114,7 +115,7 @@ def test_simple_retriever_full(mock_http_stream): @patch.object(HttpStream, "_read_pages", return_value=[*request_response_logs, *records]) def test_simple_retriever_with_request_response_logs(mock_http_stream): - requester = MagicMock() + requester = MagicMock(use_cache=False) paginator = MagicMock() record_selector = MagicMock() iterator = DatetimeStreamSlicer( @@ -143,7 +144,7 @@ def test_simple_retriever_with_request_response_logs(mock_http_stream): @patch.object(HttpStream, "_read_pages", return_value=[]) def test_simple_retriever_with_request_response_log_last_records(mock_http_stream): - requester = MagicMock() + requester = MagicMock(use_cache=False) paginator = MagicMock() record_selector = MagicMock() record_selector.select_records.return_value = request_response_logs diff --git a/airbyte-cdk/python/unit_tests/sources/streams/http/test_http.py b/airbyte-cdk/python/unit_tests/sources/streams/http/test_http.py index ca2348bd22b0e..1df89abb1c863 100644 --- a/airbyte-cdk/python/unit_tests/sources/streams/http/test_http.py +++ b/airbyte-cdk/python/unit_tests/sources/streams/http/test_http.py @@ -5,6 +5,7 @@ import json from http import HTTPStatus +import tempfile from typing import Any, Iterable, Mapping, Optional from unittest.mock import ANY, MagicMock, patch @@ -352,7 +353,12 @@ def test_body_for_all_methods(self, mocker, requests_mock): class CacheHttpStream(StubBasicReadHttpStream): use_cache = True + + def __enter__(self): + return self + def __exit__(self, *args): + self.clear_cache() class CacheHttpSubStream(HttpSubStream): url_base = "https://example.com" @@ -372,32 +378,28 @@ def path(self, **kwargs) -> str: def test_caching_filename(): - stream = CacheHttpStream() - assert stream.cache_filename == f"{stream.name}.sqlite" + with CacheHttpStream() as stream: + assert stream.cache_filename == f"{stream.name}.sqlite" def test_caching_sessions_are_different(): - stream_1 = CacheHttpStream() - stream_2 = CacheHttpStream() - - assert stream_1._session != stream_2._session - assert stream_1.cache_filename == stream_2.cache_filename + with CacheHttpStream() as stream_1, CacheHttpStream() as stream_2: + assert stream_1._session != stream_2._session + assert stream_1.cache_filename == stream_2.cache_filename def test_parent_attribute_exist(): - parent_stream = CacheHttpStream() - child_stream = CacheHttpSubStream(parent=parent_stream) - - assert child_stream.parent == parent_stream + with CacheHttpStream() as parent_stream: + child_stream = CacheHttpSubStream(parent=parent_stream) + assert child_stream.parent == parent_stream def test_cache_response(mocker): - stream = CacheHttpStream() - mocker.patch.object(stream, "url_base", "https://google.com/") - list(stream.read_records(sync_mode=SyncMode.full_refresh)) - - with open(stream.cache_filename, "rb") as f: - assert f.read() + with CacheHttpStream() as stream: + mocker.patch.object(stream, "url_base", "https://google.com/") + list(stream.read_records(sync_mode=SyncMode.full_refresh)) + with open(stream.cache_filename, "rb") as f: + assert f.read() class CacheHttpStreamWithSlices(CacheHttpStream): @@ -419,27 +421,27 @@ def test_using_cache(mocker, requests_mock): requests_mock.register_uri("GET", "https://google.com/", text="text") requests_mock.register_uri("GET", "https://google.com/search", text="text") - parent_stream = CacheHttpStreamWithSlices() - mocker.patch.object(parent_stream, "url_base", "https://google.com/") + with CacheHttpStreamWithSlices() as parent_stream: + mocker.patch.object(parent_stream, "url_base", "https://google.com/") - assert requests_mock.call_count == 0 - assert parent_stream._session.cache.response_count() == 0 + assert requests_mock.call_count == 0 + assert parent_stream._session.cache.response_count() == 0 - for _slice in parent_stream.stream_slices(): - list(parent_stream.read_records(sync_mode=SyncMode.full_refresh, stream_slice=_slice)) + for _slice in parent_stream.stream_slices(): + list(parent_stream.read_records(sync_mode=SyncMode.full_refresh, stream_slice=_slice)) - assert requests_mock.call_count == 2 - assert parent_stream._session.cache.response_count() == 2 + assert requests_mock.call_count == 2 + assert parent_stream._session.cache.response_count() == 2 - child_stream = CacheHttpSubStream(parent=parent_stream) + child_stream = CacheHttpSubStream(parent=parent_stream) - for _slice in child_stream.stream_slices(sync_mode=SyncMode.full_refresh): - pass + for _slice in child_stream.stream_slices(sync_mode=SyncMode.full_refresh): + pass - assert requests_mock.call_count == 2 - assert parent_stream._session.cache.response_count() == 2 - assert parent_stream._session.cache.has_url("https://google.com/") - assert parent_stream._session.cache.has_url("https://google.com/search") + assert requests_mock.call_count == 2 + assert parent_stream._session.cache.response_count() == 2 + assert parent_stream._session.cache.has_url("https://google.com/") + assert parent_stream._session.cache.has_url("https://google.com/search") class AutoFailTrueHttpStream(StubBasicReadHttpStream):