Skip to content

Update file-based connectors for compatibility with concurrent CDK #34681

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jan 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ data:
connectorSubtype: file
connectorType: source
definitionId: fdaaba68-4875-4ed9-8fcd-4ae1e0a25093
dockerImageTag: 0.3.2
dockerImageTag: 0.3.3
dockerRepository: airbyte/source-azure-blob-storage
documentationUrl: https://docs.airbyte.com/integrations/sources/azure-blob-storage
githubIssueLabel: source-azure-blob-storage
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from setuptools import find_packages, setup

MAIN_REQUIREMENTS = [
"airbyte-cdk[file-based]==0.59.2", # pinned until compatible with https://github.com/airbytehq/airbyte/pull/34411
"airbyte-cdk[file-based]>=0.60.1",
"smart_open[azure]",
"pytz",
]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,16 @@
def run():
args = sys.argv[1:]
catalog_path = AirbyteEntrypoint.extract_catalog(args)
config_path = AirbyteEntrypoint.extract_config(args)
state_path = AirbyteEntrypoint.extract_state(args)
try:
source = SourceAzureBlobStorage(SourceAzureBlobStorageStreamReader(), Config, catalog_path)
source = SourceAzureBlobStorage(
SourceAzureBlobStorageStreamReader(),
Config,
SourceAzureBlobStorage.read_catalog(catalog_path) if catalog_path else None,
SourceAzureBlobStorage.read_config(config_path) if catalog_path else None,
SourceAzureBlobStorage.read_state(state_path) if catalog_path else None,
)
except Exception:
print(
AirbyteMessage(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,15 @@


class SourceAzureBlobStorage(FileBasedSource):
def read_config(self, config_path: str) -> Mapping[str, Any]:
@classmethod
def read_config(cls, config_path: str) -> Mapping[str, Any]:
"""
Used to override the default read_config so that when the new file-based Azure Blob Storage connector processes a config
in the legacy format, it can be transformed into the new config. This happens in entrypoint before we
validate the config against the new spec.
"""
config = super().read_config(config_path)
if not self._is_v1_config(config):
config = FileBasedSource.read_config(config_path)
if not cls._is_v1_config(config):
converted_config = LegacyConfigTransformer.convert(config)
emit_configuration_as_airbyte_control_message(converted_config)
return converted_config
Expand Down
2 changes: 1 addition & 1 deletion airbyte-integrations/connectors/source-gcs/metadata.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ data:
connectorSubtype: file
connectorType: source
definitionId: 2a8c41ae-8c23-4be0-a73f-2ab10ca1a820
dockerImageTag: 0.3.5
dockerImageTag: 0.3.6
dockerRepository: airbyte/source-gcs
documentationUrl: https://docs.airbyte.com/integrations/sources/gcs
githubIssueLabel: source-gcs
Expand Down
2 changes: 1 addition & 1 deletion airbyte-integrations/connectors/source-gcs/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from setuptools import find_packages, setup

MAIN_REQUIREMENTS = [
"airbyte-cdk[file-based]==0.59.2", # pinned until compatible with https://github.com/airbytehq/airbyte/pull/34411
"airbyte-cdk[file-based]>=0.60.1",
"google-cloud-storage==2.12.0",
"smart-open[s3]==5.1.0",
"pandas==1.5.3",
Expand Down
34 changes: 31 additions & 3 deletions airbyte-integrations/connectors/source-gcs/source_gcs/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,41 @@


import sys
import traceback
from datetime import datetime

from airbyte_cdk.entrypoint import AirbyteEntrypoint, launch
from airbyte_cdk.models import AirbyteErrorTraceMessage, AirbyteMessage, AirbyteTraceMessage, TraceType, Type
from source_gcs import Config, Cursor, SourceGCS, SourceGCSStreamReader


def run():
_args = sys.argv[1:]
catalog_path = AirbyteEntrypoint.extract_catalog(_args)
source = SourceGCS(SourceGCSStreamReader(), Config, catalog_path, cursor_cls=Cursor)
launch(source, sys.argv[1:])
try:
catalog_path = AirbyteEntrypoint.extract_catalog(_args)
config_path = AirbyteEntrypoint.extract_config(_args)
state_path = AirbyteEntrypoint.extract_state(_args)
source = SourceGCS(
SourceGCSStreamReader(),
Config,
SourceGCS.read_catalog(catalog_path) if catalog_path else None,
SourceGCS.read_config(config_path) if config_path else None,
SourceGCS.read_state(state_path) if state_path else None,
cursor_cls=Cursor,
)
except Exception:
print(
AirbyteMessage(
type=Type.TRACE,
trace=AirbyteTraceMessage(
type=TraceType.ERROR,
emitted_at=int(datetime.now().timestamp() * 1000),
error=AirbyteErrorTraceMessage(
message="Error starting the sync. This could be due to an invalid configuration or catalog. Please contact Support for assistance.",
stack_trace=traceback.format_exc(),
),
),
).json()
)
else:
launch(source, sys.argv[1:])
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,14 @@


class SourceGCS(FileBasedSource):
def read_config(self, config_path: str) -> Mapping[str, Any]:
@classmethod
def read_config(cls, config_path: str) -> Mapping[str, Any]:
"""
Override the default read_config to transform the legacy config format
into the new one before validating it against the new spec.
"""
config = super().read_config(config_path)
if not self._is_file_based_config(config):
config = FileBasedSource.read_config(config_path)
if not cls._is_file_based_config(config):
parsed_legacy_config = SourceGCSSpec(**config)
converted_config = LegacyConfigTransformer.convert(parsed_legacy_config)
emit_configuration_as_airbyte_control_message(converted_config)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ data:
connectorSubtype: file
connectorType: source
definitionId: 9f8dda77-1048-4368-815b-269bf54ee9b8
dockerImageTag: 0.0.7
dockerImageTag: 0.0.8
dockerRepository: airbyte/source-google-drive
githubIssueLabel: source-google-drive
icon: google-drive.svg
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from setuptools import find_packages, setup

MAIN_REQUIREMENTS = [
"airbyte-cdk[file-based]==0.59.2", # pinned until compatible with https://github.com/airbytehq/airbyte/pull/34411
"airbyte-cdk[file-based]>=0.60.1",
"google-api-python-client==2.104.0",
"google-auth-httplib2==0.1.1",
"google-auth-oauthlib==1.1.0",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,11 @@
def run():
args = sys.argv[1:]
catalog_path = AirbyteEntrypoint.extract_catalog(args)
source = SourceGoogleDrive(catalog_path)
config_path = AirbyteEntrypoint.extract_config(args)
state_path = AirbyteEntrypoint.extract_state(args)
source = SourceGoogleDrive(
SourceGoogleDrive.read_catalog(catalog_path) if catalog_path else None,
SourceGoogleDrive.read_config(config_path) if config_path else None,
SourceGoogleDrive.read_state(state_path) if state_path else None,
)
launch(source, args)
Original file line number Diff line number Diff line change
@@ -1,21 +1,24 @@
#
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
#
from typing import Any
from typing import Any, Mapping, Optional

from airbyte_cdk.models import AdvancedAuth, ConnectorSpecification, OAuthConfigSpecification
from airbyte_cdk.models import AdvancedAuth, ConfiguredAirbyteCatalog, ConnectorSpecification, OAuthConfigSpecification
from airbyte_cdk.sources.file_based.file_based_source import FileBasedSource
from airbyte_cdk.sources.file_based.stream.cursor.default_file_based_cursor import DefaultFileBasedCursor
from airbyte_cdk.sources.source import TState
from source_google_drive.spec import SourceGoogleDriveSpec
from source_google_drive.stream_reader import SourceGoogleDriveStreamReader


class SourceGoogleDrive(FileBasedSource):
def __init__(self, catalog_path: str):
def __init__(self, catalog: Optional[ConfiguredAirbyteCatalog], config: Optional[Mapping[str, Any]], state: Optional[TState]):
super().__init__(
stream_reader=SourceGoogleDriveStreamReader(),
spec_class=SourceGoogleDriveSpec,
catalog_path=catalog_path,
catalog=catalog,
config=config,
state=state,
cursor_cls=DefaultFileBasedCursor,
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ data:
connectorSubtype: api
connectorType: source
definitionId: 01d1c685-fd4a-4837-8f4c-93fe5a0d2188
dockerImageTag: 0.1.4
dockerImageTag: 0.1.5
dockerRepository: airbyte/source-microsoft-onedrive
githubIssueLabel: source-microsoft-onedrive
icon: microsoft-onedrive.svg
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from setuptools import find_packages, setup

MAIN_REQUIREMENTS = [
"airbyte-cdk[file-based]==0.59.2", # pinned until compatible with https://github.com/airbytehq/airbyte/pull/34411
"airbyte-cdk[file-based]>=0.60.1",
"msal~=1.25.0",
"Office365-REST-Python-Client~=2.5.2",
"smart-open~=6.4.0",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,11 @@
def run():
args = sys.argv[1:]
catalog_path = AirbyteEntrypoint.extract_catalog(args)
source = SourceMicrosoftOneDrive(catalog_path)
config_path = AirbyteEntrypoint.extract_config(args)
state_path = AirbyteEntrypoint.extract_state(args)
source = SourceMicrosoftOneDrive(
SourceMicrosoftOneDrive.read_catalog(catalog_path) if catalog_path else None,
SourceMicrosoftOneDrive.read_config(config_path) if config_path else None,
SourceMicrosoftOneDrive.read_state(state_path) if state_path else None,
)
launch(source, args)
Original file line number Diff line number Diff line change
@@ -1,21 +1,24 @@
#
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
#
from typing import Any
from typing import Any, Mapping, Optional

from airbyte_cdk.models import AdvancedAuth, ConnectorSpecification, OAuthConfigSpecification
from airbyte_cdk.models import AdvancedAuth, ConfiguredAirbyteCatalog, ConnectorSpecification, OAuthConfigSpecification
from airbyte_cdk.sources.file_based.file_based_source import FileBasedSource
from airbyte_cdk.sources.file_based.stream.cursor.default_file_based_cursor import DefaultFileBasedCursor
from airbyte_cdk.sources.source import TState
from source_microsoft_onedrive.spec import SourceMicrosoftOneDriveSpec
from source_microsoft_onedrive.stream_reader import SourceMicrosoftOneDriveStreamReader


class SourceMicrosoftOneDrive(FileBasedSource):
def __init__(self, catalog_path: str):
def __init__(self, catalog: Optional[ConfiguredAirbyteCatalog], config: Optional[Mapping[str, Any]], state: Optional[TState]):
super().__init__(
stream_reader=SourceMicrosoftOneDriveStreamReader(),
spec_class=SourceMicrosoftOneDriveSpec,
catalog_path=catalog_path,
catalog=catalog,
config=config,
state=state,
cursor_cls=DefaultFileBasedCursor,
)

Expand Down
1 change: 1 addition & 0 deletions docs/integrations/sources/azure-blob-storage.md
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,7 @@ To perform the text extraction from PDF and Docx files, the connector uses the [

| Version | Date | Pull Request | Subject |
|:--------|:-----------|:---------------------------------------------------------|:---------------------------------------------------------------------------------------------|
| 0.3.3 | 2024-01-30 | [34681](https://github.com/airbytehq/airbyte/pull/34681) | Unpin CDK version to make compatible with the Concurrent CDK |
| 0.3.2 | 2024-01-30 | [34661](https://github.com/airbytehq/airbyte/pull/34661) | Pin CDK version until upgrade for compatibility with the Concurrent CDK |
| 0.3.1 | 2024-01-10 | [34084](https://github.com/airbytehq/airbyte/pull/34084) | Fix bug for running check with document file format |
| 0.3.0 | 2023-12-14 | [33411](https://github.com/airbytehq/airbyte/pull/33411) | Bump CDK version to auto-set primary key for document file streams and support raw txt files |
Expand Down
21 changes: 11 additions & 10 deletions docs/integrations/sources/gcs.md
Original file line number Diff line number Diff line change
Expand Up @@ -146,13 +146,14 @@ Leaving this field blank (default option) will disallow escaping.

## Changelog

| Version | Date | Pull Request | Subject |
|:--------|:-----------|:---------------------------------------------------------|:----------------------------------------------------|
| 0.3.5 | 2024-01-30 | [34661](https://github.com/airbytehq/airbyte/pull/34661) | Pin CDK version until upgrade for compatibility with the Concurrent CDK |
| 0.3.4 | 2024-01-11 | [34158](https://github.com/airbytehq/airbyte/pull/34158) | Fix issue in stream reader for document file type parser |
| 0.3.3 | 2023-12-06 | [33187](https://github.com/airbytehq/airbyte/pull/33187) | Bump CDK version to hide source-defined primary key |
| 0.3.2 | 2023-11-16 | [32608](https://github.com/airbytehq/airbyte/pull/32608) | Improve document file type parser |
| 0.3.1 | 2023-11-13 | [32357](https://github.com/airbytehq/airbyte/pull/32357) | Improve spec schema |
| 0.3.0 | 2023-10-11 | [31212](https://github.com/airbytehq/airbyte/pull/31212) | Migrated to file based CDK |
| 0.2.0 | 2023-06-26 | [27725](https://github.com/airbytehq/airbyte/pull/27725) | License Update: Elv2 |
| 0.1.0 | 2023-02-16 | [23186](https://github.com/airbytehq/airbyte/pull/23186) | New Source: GCS |
| Version | Date | Pull Request | Subject |
|:--------|:-----------|:---------------------------------------------------------|:---------------------------------------------------------------------------------------|
| 0.3.6 | 2024-01-30 | [34681](https://github.com/airbytehq/airbyte/pull/34681) | Unpin CDK version to make compatible with the Concurrent CDK |
| 0.3.5 | 2024-01-30 | [34661](https://github.com/airbytehq/airbyte/pull/34661) | Pin CDK version until upgrade for compatibility with the Concurrent CDK |
| 0.3.4 | 2024-01-11 | [34158](https://github.com/airbytehq/airbyte/pull/34158) | Fix issue in stream reader for document file type parser |
| 0.3.3 | 2023-12-06 | [33187](https://github.com/airbytehq/airbyte/pull/33187) | Bump CDK version to hide source-defined primary key |
| 0.3.2 | 2023-11-16 | [32608](https://github.com/airbytehq/airbyte/pull/32608) | Improve document file type parser |
| 0.3.1 | 2023-11-13 | [32357](https://github.com/airbytehq/airbyte/pull/32357) | Improve spec schema |
| 0.3.0 | 2023-10-11 | [31212](https://github.com/airbytehq/airbyte/pull/31212) | Migrated to file based CDK |
| 0.2.0 | 2023-06-26 | [27725](https://github.com/airbytehq/airbyte/pull/27725) | License Update: Elv2 |
| 0.1.0 | 2023-02-16 | [23186](https://github.com/airbytehq/airbyte/pull/23186) | New Source: GCS |
1 change: 1 addition & 0 deletions docs/integrations/sources/google-drive.md
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,7 @@ Before parsing each document, the connector exports Google Document files to Doc

| Version | Date | Pull Request | Subject |
|---------|------------|-----------------------------------------------------------|--------------------------------------------------------------|
| 0.0.8 | 2024-01-30 | [34681](https://github.com/airbytehq/airbyte/pull/34681) | Unpin CDK version to make compatible with the Concurrent CDK |
| 0.0.7 | 2024-01-30 | [34661](https://github.com/airbytehq/airbyte/pull/34661) | Pin CDK version until upgrade for compatibility with the Concurrent CDK |
| 0.0.6 | 2023-12-16 | [33414](https://github.com/airbytehq/airbyte/pull/33414) | Prepare for airbyte-lib |
| 0.0.5 | 2023-12-14 | [33411](https://github.com/airbytehq/airbyte/pull/33411) | Bump CDK version to auto-set primary key for document file streams and support raw txt files |
Expand Down
1 change: 1 addition & 0 deletions docs/integrations/sources/microsoft-onedrive.md
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ The connector is restricted by normal Microsoft Graph [requests limitation](http

| Version | Date | Pull Request | Subject |
|:--------|:-----------|:---------------------------------------------------------|:--------------------------|
| 0.1.5 | 2024-01-30 | [34681](https://github.com/airbytehq/airbyte/pull/34681) | Unpin CDK version to make compatible with the Concurrent CDK |
| 0.1.4 | 2024-01-30 | [34661](https://github.com/airbytehq/airbyte/pull/34661) | Pin CDK version until upgrade for compatibility with the Concurrent CDK |
| 0.1.3 | 2024-01-24 | [34478](https://github.com/airbytehq/airbyte/pull/34478) | Fix OAuth |
| 0.1.2 | 2021-12-22 | [33745](https://github.com/airbytehq/airbyte/pull/33745) | Add ql and sl to metadata |
Expand Down