Skip to content

File-based CDK: avoid error on empty stream when running discover #38230

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
May 21, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
from airbyte_cdk.sources.streams.concurrent.cursor import CursorField
from airbyte_cdk.utils.analytics_message import create_analytics_message
from airbyte_cdk.utils.traced_exception import AirbyteTracedException
from airbyte_protocol.models import AirbyteCatalog
from pydantic.error_wrappers import ValidationError

DEFAULT_CONCURRENCY = 100
Expand Down Expand Up @@ -278,3 +279,7 @@ def _validate_and_get_validation_policy(self, stream_config: FileBasedStreamConf
def _validate_input_schema(self, stream_config: FileBasedStreamConfig) -> None:
if stream_config.schemaless and stream_config.input_schema:
raise ValidationError("`input_schema` and `schemaless` options cannot both be set", model=FileBasedStreamConfig)

def discover(self, logger: logging.Logger, config: Mapping[str, Any]) -> AirbyteCatalog:
streams = [stream.as_airbyte_stream() for stream in self.streams(config=config) if stream.get_json_schema()]
return AirbyteCatalog(streams=streams)
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,10 @@ def get_json_schema(self) -> JsonSchema:
}
try:
schema = self._get_raw_json_schema()
except (InvalidSchemaError, NoFilesMatchingError) as config_exception:
except NoFilesMatchingError:
self.logger.warning(f"No files were identified in the stream {self.name}. Set empty schema for the stream.")
return {}
except InvalidSchemaError as config_exception:
self.logger.exception(FileBasedSourceError.SCHEMA_INFERENCE_ERROR.value, exc_info=config_exception)
raise AirbyteTracedException(
internal_message="Please check the logged errors for more information.",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3029,29 +3029,7 @@
.set_file_type("csv")
)
.set_expected_check_status("FAILED")
.set_expected_check_error(AirbyteTracedException, FileBasedSourceError.EMPTY_STREAM.value)
.set_expected_catalog(
{
"streams": [
{
"default_cursor_field": ["_ab_source_file_last_modified"],
"json_schema": {
"type": "object",
"properties": {
"col1": {"type": "string"},
"col2": {"type": "string"},
"_ab_source_file_last_modified": {"type": "string"},
"_ab_source_file_url": {"type": "string"},
},
},
"name": "stream1",
"source_defined_cursor": True,
"supported_sync_modes": ["full_refresh", "incremental"],
}
]
}
)
.set_expected_discover_error(AirbyteTracedException, FileBasedSourceError.SCHEMA_INFERENCE_ERROR.value)
.set_expected_catalog({"streams": []})
).build()

csv_no_records_scenario: TestScenario[InMemoryFilesSource] = (
Expand Down
Loading