|
4 | 4 |
|
5 | 5 | import logging
|
6 | 6 | from datetime import datetime
|
7 |
| -from typing import TYPE_CHECKING, Any, Iterable, List, MutableMapping |
| 7 | +from typing import TYPE_CHECKING, Any, Iterable, List, MutableMapping, Optional |
8 | 8 |
|
| 9 | +from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager |
9 | 10 | from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig
|
10 | 11 | from airbyte_cdk.sources.file_based.remote_file import RemoteFile
|
11 | 12 | from airbyte_cdk.sources.file_based.stream.concurrent.cursor.abstract_concurrent_file_based_cursor import AbstractConcurrentFileBasedCursor
|
12 | 13 | from airbyte_cdk.sources.file_based.types import StreamState
|
| 14 | +from airbyte_cdk.sources.message import MessageRepository |
| 15 | +from airbyte_cdk.sources.streams import FULL_REFRESH_SENTINEL_STATE_KEY |
13 | 16 | from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
|
14 | 17 | from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
|
15 | 18 |
|
16 | 19 | if TYPE_CHECKING:
|
17 | 20 | from airbyte_cdk.sources.file_based.stream.concurrent.adapters import FileBasedStreamPartition
|
18 | 21 |
|
19 | 22 |
|
20 |
| -class FileBasedNoopCursor(AbstractConcurrentFileBasedCursor): |
21 |
| - def __init__(self, stream_config: FileBasedStreamConfig, **kwargs: Any): |
22 |
| - pass |
| 23 | +class FileBasedFinalStateCursor(AbstractConcurrentFileBasedCursor): |
| 24 | + """Cursor that is used to guarantee at least one state message is emitted for a concurrent file-based stream.""" |
| 25 | + |
| 26 | + def __init__( |
| 27 | + self, stream_config: FileBasedStreamConfig, message_repository: MessageRepository, stream_namespace: Optional[str], **kwargs: Any |
| 28 | + ): |
| 29 | + self._stream_name = stream_config.name |
| 30 | + self._stream_namespace = stream_namespace |
| 31 | + self._message_repository = message_repository |
| 32 | + # Normally the connector state manager operates at the source-level. However, we only need it to write the sentinel |
| 33 | + # state message rather than manage overall source state. This is also only temporary as we move to the resumable |
| 34 | + # full refresh world where every stream uses a FileBasedConcurrentCursor with incremental state. |
| 35 | + self._connector_state_manager = ConnectorStateManager(stream_instance_map={}) |
23 | 36 |
|
24 | 37 | @property
|
25 | 38 | def state(self) -> MutableMapping[str, Any]:
|
26 |
| - return {} |
| 39 | + return {FULL_REFRESH_SENTINEL_STATE_KEY: True} |
27 | 40 |
|
28 | 41 | def observe(self, record: Record) -> None:
|
29 | 42 | pass
|
@@ -53,4 +66,6 @@ def emit_state_message(self) -> None:
|
53 | 66 | pass
|
54 | 67 |
|
55 | 68 | def ensure_at_least_one_state_emitted(self) -> None:
|
56 |
| - pass |
| 69 | + self._connector_state_manager.update_state_for_stream(self._stream_name, self._stream_namespace, self.state) |
| 70 | + state_message = self._connector_state_manager.create_state_message(self._stream_name, self._stream_namespace) |
| 71 | + self._message_repository.emit_message(state_message) |
0 commit comments