Skip to content

chore(source-s3): Update CDK to v5 (old) #45199

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 17 commits into from
Closed
Show file tree
Hide file tree
Changes from 16 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
{
"documentationUrl": "https://docs.airbyte.com/integrations/sources/s3",
"supportsNormalization": false,
"supportsDBT": false,
"connectionSpecification": {
"title": "Config",
"description": "NOTE: When this Spec is changed, legacy_config_transformer.py must also be modified to uptake the changes\nbecause it is responsible for converting legacy S3 v3 configs into v4 configs using the File-Based CDK.",
Expand Down Expand Up @@ -338,6 +340,19 @@
},
"description": "Extract text from document formats (.pdf, .docx, .md, .pptx) and emit as one record per file.",
"required": ["filetype"]
},
{
"title": "Excel Format",
"type": "object",
"properties": {
"filetype": {
"title": "Filetype",
"default": "excel",
"const": "excel",
"type": "string"
}
},
"required": ["filetype"]
}
]
},
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
{
"documentationUrl": "https://docs.airbyte.com/integrations/sources/s3",
"supportsNormalization": false,
"supportsDBT": false,
"connectionSpecification": {
"title": "Config",
"description": "NOTE: When this Spec is changed, legacy_config_transformer.py must also be modified to uptake the changes\nbecause it is responsible for converting legacy S3 v3 configs into v4 configs using the File-Based CDK.",
Expand Down Expand Up @@ -338,6 +340,19 @@
},
"description": "Extract text from document formats (.pdf, .docx, .md, .pptx) and emit as one record per file.",
"required": ["filetype"]
},
{
"title": "Excel Format",
"type": "object",
"properties": {
"filetype": {
"title": "Filetype",
"default": "excel",
"const": "excel",
"type": "string"
}
},
"required": ["filetype"]
}
]
},
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.

"""Run a subset of acceptance tests in PyTest.

This test file provides a means to easily step through a subset of acceptance tests, debugging
and verifying the behavior of the source-s3 connector. The tests are redundant with CAT, but
are much faster to run and easier to debug.
"""

from __future__ import annotations

from pathlib import Path

import pytest
import yaml
from pydantic import BaseModel
from source_s3.run import get_source

from airbyte_cdk import AirbyteTracedException, launch


class AcceptanceTestExpectRecords(BaseModel):
path: str
exact_order: bool = False


class AcceptanceTestFileTypes(BaseModel):
skip_test: bool
bypass_reason: str


class AcceptanceTestInstance(BaseModel):
config_path: str
timeout_seconds: int
expect_records: AcceptanceTestExpectRecords | None = None
file_types: AcceptanceTestFileTypes | None = None


class AcceptanceTestFullRefreshInstance(AcceptanceTestInstance):
config_path: str
timeout_seconds: int
expect_records: AcceptanceTestExpectRecords | None = None
file_types: AcceptanceTestFileTypes | None = None
configured_catalog_path: str


def get_tests(category: str) -> list[AcceptanceTestInstance]:
all_tests_config = yaml.safe_load(Path("acceptance-test-config.yml").read_text())
if category == "basic_read" and category in all_tests_config["acceptance_tests"]:
return [AcceptanceTestInstance.model_validate(test) for test in all_tests_config["acceptance_tests"][category]["tests"]]

if category == "full_refresh" and category in all_tests_config["acceptance_tests"]:
return [
AcceptanceTestFullRefreshInstance.model_validate(
test,
)
for test in all_tests_config["acceptance_tests"][category]["tests"]
]

return []


@pytest.mark.parametrize("instance", get_tests("full_refresh"), ids=lambda instance: instance.config_path.split("/")[-1])
def test_full_refresh(instance: AcceptanceTestFullRefreshInstance) -> None:
"""Run acceptance tests."""
args = [
"read",
"--config",
instance.config_path,
"--catalog",
instance.configured_catalog_path,
]
source = get_source(args=args)
assert source
launch(source, args=args)


def test_invalid_config(capsys: pytest.CaptureFixture[str]) -> None:
"""Ensure that the invalid config is properly reported."""

args = [
"check",
"--config",
"integration_tests/invalid_config.json",
]
source = get_source(args=args)
assert source
try:
launch(source, args=args)
except AirbyteTracedException as ex:
captured = capsys.readouterr()
if "CONNECTOR_CONFIG" not in captured.out:
raise AssertionError( # noqa: TRY003
"The `CHECK` exception was raised but not printed.",
) from ex

# Else, the exception was raised and the status messages was printed as expected.

except Exception as ex:
captured = capsys.readouterr()
assert "CONNECTOR_CONFIG" in captured.out, "The `CHECK` exception was not printed."

raise AssertionError( # noqa: TRY003
"Unexpected exception raised during `CHECK`. Handled exceptions should be of type `AirbyteTracedException`.",
) from ex

else:
# No exception was raised.
captured = capsys.readouterr()
assert "expected_output" in captured.out, "The `CHECK` exception was not printed."
2 changes: 1 addition & 1 deletion airbyte-integrations/connectors/source-s3/metadata.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ data:
connectorSubtype: file
connectorType: source
definitionId: 69589781-7828-43c5-9f63-8925b1c1ccc2
dockerImageTag: 4.8.1
dockerImageTag: 4.9.0
dockerRepository: airbyte/source-s3
documentationUrl: https://docs.airbyte.com/integrations/sources/s3
githubIssueLabel: source-s3
Expand Down
Loading
Loading