Skip to content

Commit aa53565

Browse files
⚡️ Speed up function get_stream_descriptor by 7% in PR #44444 (artem1205/airbyte-cdk-protocol-dataclasses-serpyco-rs)
To optimize the given Python program for better performance, you can minimize redundant accesses and checks, and utilize pattern matching efficiently. Here’s an optimized version. ### Explanation. - The `message.type` and other repeated attribute accesses are cached to local variables to avoid redundant attribute lookups. - The logic and functionality remain the same to ensure the return value is the same as before. - Slight restructuring results in cleaner and more efficient code. This method aims to minimize the overhead of attribute access and logical checks, which can contribute to a faster runtime, especially when this function is called frequently.
1 parent feded1f commit aa53565

File tree

2 files changed

+17
-14
lines changed

2 files changed

+17
-14
lines changed

airbyte-cdk/python/airbyte_cdk/models/airbyte_protocol.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -61,9 +61,9 @@ class AirbyteGlobalState:
6161
class AirbyteStateMessage:
6262
type: Optional[AirbyteStateType] = None # type: ignore [name-defined]
6363
stream: Optional[AirbyteStreamState] = None
64-
global_: Annotated[
65-
AirbyteGlobalState | None, Alias("global")
66-
] = None # "global" is a reserved keyword in python ⇒ Alias is used for (de-)serialization
64+
global_: Annotated[AirbyteGlobalState | None, Alias("global")] = (
65+
None # "global" is a reserved keyword in python ⇒ Alias is used for (de-)serialization
66+
)
6767
data: Optional[Dict[str, Any]] = None
6868
sourceStats: Optional[AirbyteStateStats] = None # type: ignore [name-defined]
6969
destinationStats: Optional[AirbyteStateStats] = None # type: ignore [name-defined]

airbyte-cdk/python/airbyte_cdk/utils/message_utils.py

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,17 +2,20 @@
22

33
from airbyte_cdk.models import AirbyteMessage, Type
44
from airbyte_cdk.sources.connector_state_manager import HashableStreamDescriptor
5+
from airbyte_protocol_dataclasses.models import *
56

67

78
def get_stream_descriptor(message: AirbyteMessage) -> HashableStreamDescriptor:
8-
match message.type:
9-
case Type.RECORD:
10-
return HashableStreamDescriptor(name=message.record.stream, namespace=message.record.namespace) # type: ignore[union-attr] # record has `stream` and `namespace`
11-
case Type.STATE:
12-
if not message.state.stream or not message.state.stream.stream_descriptor: # type: ignore[union-attr] # state has `stream`
13-
raise ValueError("State message was not in per-stream state format, which is required for record counts.")
14-
return HashableStreamDescriptor(
15-
name=message.state.stream.stream_descriptor.name, namespace=message.state.stream.stream_descriptor.namespace # type: ignore[union-attr] # state has `stream`
16-
)
17-
case _:
18-
raise NotImplementedError(f"get_stream_descriptor is not implemented for message type '{message.type}'.")
9+
mtype = message.type # Cache the access to message.type
10+
if mtype == Type.RECORD:
11+
record = message.record # Cache the access to message.record
12+
return HashableStreamDescriptor(name=record.stream, namespace=record.namespace)
13+
elif mtype == Type.STATE:
14+
state = message.state # Cache the access to message.state
15+
stream = state.stream # Cache the access to state.stream
16+
descriptor = stream.stream_descriptor # Cache the access to stream.stream_descriptor
17+
if not stream or not descriptor:
18+
raise ValueError("State message was not in per-stream state format, which is required for record counts.")
19+
return HashableStreamDescriptor(name=descriptor.name, namespace=descriptor.namespace)
20+
else:
21+
raise NotImplementedError(f"get_stream_descriptor is not implemented for message type '{mtype}'.")

0 commit comments

Comments
 (0)