|
2 | 2 | # Copyright (c) 2022 Airbyte, Inc., all rights reserved.
|
3 | 3 | #
|
4 | 4 |
|
| 5 | +import json |
| 6 | +import logging |
| 7 | +from json import JSONDecodeError |
| 8 | +from typing import Any, Dict, Iterable, Optional, Union |
| 9 | +from urllib.parse import parse_qs, urljoin, urlparse |
5 | 10 |
|
| 11 | +from airbyte_cdk.models import AirbyteLogMessage, AirbyteMessage, Type |
6 | 12 | from connector_builder.generated.apis.default_api_interface import DefaultApi
|
| 13 | +from connector_builder.generated.models.http_request import HttpRequest |
| 14 | +from connector_builder.generated.models.http_response import HttpResponse |
7 | 15 | from connector_builder.generated.models.stream_read import StreamRead
|
| 16 | +from connector_builder.generated.models.stream_read_pages import StreamReadPages |
8 | 17 | from connector_builder.generated.models.stream_read_request_body import StreamReadRequestBody
|
| 18 | +from connector_builder.generated.models.stream_read_slices import StreamReadSlices |
9 | 19 | from connector_builder.generated.models.streams_list_read import StreamsListRead
|
| 20 | +from connector_builder.generated.models.streams_list_read_streams import StreamsListReadStreams |
10 | 21 | from connector_builder.generated.models.streams_list_request_body import StreamsListRequestBody
|
11 |
| -from fastapi import Body |
| 22 | +from connector_builder.impl.low_code_cdk_adapter import LowCodeSourceAdapter |
| 23 | +from fastapi import Body, HTTPException |
| 24 | +from jsonschema import ValidationError |
12 | 25 |
|
13 | 26 |
|
14 | 27 | class DefaultApiImpl(DefaultApi):
|
| 28 | + logger = logging.getLogger("airbyte.connector-builder") |
| 29 | + |
15 | 30 | async def get_manifest_template(self) -> str:
|
16 | 31 | return """version: "0.1.0"
|
17 | 32 |
|
@@ -65,7 +80,125 @@ async def get_manifest_template(self) -> str:
|
65 | 80 | """
|
66 | 81 |
|
67 | 82 | async def list_streams(self, streams_list_request_body: StreamsListRequestBody = Body(None, description="")) -> StreamsListRead:
|
68 |
| - raise Exception("not yet implemented") |
| 83 | + """ |
| 84 | + Takes in a low code manifest and a config to resolve the list of streams that are available for testing |
| 85 | + :param streams_list_request_body: Input parameters to retrieve the list of available streams |
| 86 | + :return: Stream objects made up of a stream name and the HTTP URL it will send requests to |
| 87 | + """ |
| 88 | + adapter = self._create_low_code_adapter(manifest=streams_list_request_body.manifest) |
| 89 | + |
| 90 | + stream_list_read = [] |
| 91 | + try: |
| 92 | + for http_stream in adapter.get_http_streams(streams_list_request_body.config): |
| 93 | + stream_list_read.append( |
| 94 | + StreamsListReadStreams( |
| 95 | + name=http_stream.name, |
| 96 | + url=urljoin(http_stream.url_base, http_stream.path()), |
| 97 | + ) |
| 98 | + ) |
| 99 | + except Exception as error: |
| 100 | + raise HTTPException(status_code=400, detail=f"Could not list streams with with error: {error.args[0]}") |
| 101 | + return StreamsListRead(streams=stream_list_read) |
69 | 102 |
|
70 | 103 | async def read_stream(self, stream_read_request_body: StreamReadRequestBody = Body(None, description="")) -> StreamRead:
|
71 |
| - raise Exception("not yet implemented") |
| 104 | + """ |
| 105 | + Using the provided manifest and config, invokes a sync for the specified stream and returns groups of Airbyte messages |
| 106 | + that are produced during the read operation |
| 107 | + :param stream_read_request_body: Input parameters to trigger the read operation for a stream |
| 108 | + :return: Airbyte record messages produced by the sync grouped by slice and page |
| 109 | + """ |
| 110 | + adapter = self._create_low_code_adapter(manifest=stream_read_request_body.manifest) |
| 111 | + |
| 112 | + single_slice = StreamReadSlices(pages=[]) |
| 113 | + log_messages = [] |
| 114 | + try: |
| 115 | + for message_group in self._get_message_groups( |
| 116 | + adapter.read_stream(stream_read_request_body.stream, stream_read_request_body.config) |
| 117 | + ): |
| 118 | + if isinstance(message_group, AirbyteLogMessage): |
| 119 | + log_messages.append({"message": message_group.message}) |
| 120 | + else: |
| 121 | + single_slice.pages.append(message_group) |
| 122 | + except Exception as error: |
| 123 | + # TODO: We're temporarily using FastAPI's default exception model. Ideally we should use exceptions defined in the OpenAPI spec |
| 124 | + raise HTTPException(status_code=400, detail=f"Could not perform read with with error: {error.args[0]}") |
| 125 | + |
| 126 | + return StreamRead(logs=log_messages, slices=[single_slice]) |
| 127 | + |
| 128 | + def _get_message_groups(self, messages: Iterable[AirbyteMessage]) -> Iterable[Union[StreamReadPages, AirbyteLogMessage]]: |
| 129 | + """ |
| 130 | + Message groups are partitioned according to when request log messages are received. Subsequent response log messages |
| 131 | + and record messages belong to the prior request log message and when we encounter another request, append the latest |
| 132 | + message group. |
| 133 | +
|
| 134 | + Messages received from the CDK read operation will always arrive in the following order: |
| 135 | + {type: LOG, log: {message: "request: ..."}} |
| 136 | + {type: LOG, log: {message: "response: ..."}} |
| 137 | + ... 0 or more record messages |
| 138 | + {type: RECORD, record: {data: ...}} |
| 139 | + {type: RECORD, record: {data: ...}} |
| 140 | + Repeats for each request/response made |
| 141 | +
|
| 142 | + Note: The exception is that normal log messages can be received at any time which are not incorporated into grouping |
| 143 | + """ |
| 144 | + first_page = True |
| 145 | + current_records = [] |
| 146 | + current_page_request: Optional[HttpRequest] = None |
| 147 | + current_page_response: Optional[HttpResponse] = None |
| 148 | + for message in messages: |
| 149 | + if first_page and message.type == Type.LOG and message.log.message.startswith("request:"): |
| 150 | + first_page = False |
| 151 | + request = self._create_request_from_log_message(message.log) |
| 152 | + current_page_request = request |
| 153 | + elif message.type == Type.LOG and message.log.message.startswith("request:"): |
| 154 | + if not current_page_request or not current_page_response: |
| 155 | + raise ValueError("Every message grouping should have at least one request and response") |
| 156 | + yield StreamReadPages(request=current_page_request, response=current_page_response, records=current_records) |
| 157 | + current_page_request = self._create_request_from_log_message(message.log) |
| 158 | + current_records = [] |
| 159 | + elif message.type == Type.LOG and message.log.message.startswith("response:"): |
| 160 | + current_page_response = self._create_response_from_log_message(message.log) |
| 161 | + elif message.type == Type.LOG: |
| 162 | + yield message.log |
| 163 | + elif message.type == Type.RECORD: |
| 164 | + current_records.append(message.record.data) |
| 165 | + else: |
| 166 | + if not current_page_request or not current_page_response: |
| 167 | + raise ValueError("Every message grouping should have at least one request and response") |
| 168 | + yield StreamReadPages(request=current_page_request, response=current_page_response, records=current_records) |
| 169 | + |
| 170 | + def _create_request_from_log_message(self, log_message: AirbyteLogMessage) -> Optional[HttpRequest]: |
| 171 | + # TODO: As a temporary stopgap, the CDK emits request data as a log message string. Ideally this should come in the |
| 172 | + # form of a custom message object defined in the Airbyte protocol, but this unblocks us in the immediate while the |
| 173 | + # protocol change is worked on. |
| 174 | + raw_request = log_message.message.partition("request:")[2] |
| 175 | + try: |
| 176 | + request = json.loads(raw_request) |
| 177 | + url = urlparse(request.get("url", "")) |
| 178 | + full_path = f"{url.scheme}://{url.hostname}{url.path}" if url else "" |
| 179 | + parameters = parse_qs(url.query) or None |
| 180 | + return HttpRequest(url=full_path, headers=request.get("headers"), parameters=parameters, body=request.get("body")) |
| 181 | + except JSONDecodeError as error: |
| 182 | + self.logger.warning(f"Failed to parse log message into request object with error: {error}") |
| 183 | + return None |
| 184 | + |
| 185 | + def _create_response_from_log_message(self, log_message: AirbyteLogMessage) -> Optional[HttpResponse]: |
| 186 | + # TODO: As a temporary stopgap, the CDK emits response data as a log message string. Ideally this should come in the |
| 187 | + # form of a custom message object defined in the Airbyte protocol, but this unblocks us in the immediate while the |
| 188 | + # protocol change is worked on. |
| 189 | + raw_response = log_message.message.partition("response:")[2] |
| 190 | + try: |
| 191 | + response = json.loads(raw_response) |
| 192 | + body = json.loads(response.get("body", "{}")) |
| 193 | + return HttpResponse(status=response.get("status_code"), body=body, headers=response.get("headers")) |
| 194 | + except JSONDecodeError as error: |
| 195 | + self.logger.warning(f"Failed to parse log message into response object with error: {error}") |
| 196 | + return None |
| 197 | + |
| 198 | + @staticmethod |
| 199 | + def _create_low_code_adapter(manifest: Dict[str, Any]) -> LowCodeSourceAdapter: |
| 200 | + try: |
| 201 | + return LowCodeSourceAdapter(manifest=manifest) |
| 202 | + except ValidationError as error: |
| 203 | + # TODO: We're temporarily using FastAPI's default exception model. Ideally we should use exceptions defined in the OpenAPI spec |
| 204 | + raise HTTPException(status_code=400, detail=f"Invalid connector manifest with error: {error.message}") |
0 commit comments