Skip to content

Commit ef7c77b

Browse files
committed
Chore: Inline OrderedDictX, remove dependency to tikray
1 parent a37688c commit ef7c77b

File tree

4 files changed

+169
-21
lines changed

4 files changed

+169
-21
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,6 @@ dependencies = [
7979
"protobuf<6.31",
8080
"pycryptodome<3.23",
8181
"sqlalchemy-cratedb==0.42.0.dev2",
82-
"tikray<0.3",
8382
"toolz<2",
8483
"zstandard<0.24",
8584
]
@@ -160,6 +159,7 @@ markers = [
160159

161160
[tool.coverage.run]
162161
omit = [
162+
"src/cratedb_fivetran_destination/dictx.py",
163163
"tests/*",
164164
]
165165
source = [ "cratedb_fivetran_destination" ]

requirements.txt

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,44 +1,26 @@
11
# This file was autogenerated by uv via the following command:
22
# uv export --format=requirements.txt --no-annotate --no-hashes
33
-e .
4-
attrs==25.3.0
54
beautifulsoup4==4.13.4
6-
cattrs==24.1.3
75
cffi==1.17.1 ; platform_python_implementation == 'PyPy'
86
click==8.1.8
97
colorama==0.4.6 ; sys_platform == 'win32'
108
crate==2.0.0
11-
exceptiongroup==1.2.2 ; python_full_version < '3.11'
129
geojson==3.2.0
1310
google==3.0.0
1411
greenlet==3.2.1 ; (python_full_version < '3.14' and platform_machine == 'AMD64') or (python_full_version < '3.14' and platform_machine == 'WIN32') or (python_full_version < '3.14' and platform_machine == 'aarch64') or (python_full_version < '3.14' and platform_machine == 'amd64') or (python_full_version < '3.14' and platform_machine == 'ppc64le') or (python_full_version < '3.14' and platform_machine == 'win32') or (python_full_version < '3.14' and platform_machine == 'x86_64')
1512
grpcio==1.70.0
1613
grpcio-tools==1.70.0
17-
importlib-resources==6.4.5 ; python_full_version < '3.10'
18-
isal==1.7.2 ; platform_machine == 'AMD64' or platform_machine == 'aarch64' or platform_machine == 'x86_64'
19-
jmespath==1.0.1
20-
jq==1.8.0
21-
jsonpointer==3.0.0
2214
orjson==3.10.18
23-
orjsonl==1.0.0
2415
protobuf==5.29.4
2516
pycparser==2.22 ; platform_python_implementation == 'PyPy'
2617
pycryptodome==3.22.0
27-
python-dateutil==2.9.0.post0
28-
pyyaml==6.0.2
2918
setuptools==80.1.0
30-
six==1.17.0
3119
soupsieve==2.7
3220
sqlalchemy==2.0.40
3321
sqlalchemy-cratedb==0.42.0.dev2
34-
tikray==0.2.1
3522
toolz==1.0.0
36-
tqdm==4.67.1
37-
transon==0.0.7
3823
typing-extensions==4.13.2
3924
urllib3==2.4.0
4025
verlib2==0.3.1
41-
xopen==2.0.2
42-
zipp==3.21.0 ; python_full_version < '3.10'
43-
zlib-ng==0.5.1 ; platform_machine == 'AMD64' or platform_machine == 'aarch64' or platform_machine == 'x86_64'
4426
zstandard==0.23.0
Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
"""
2+
OrderedDictX by Zuzu Corneliu.
3+
4+
For the keeping of order case (the other one is trivial, remove old and add new
5+
one): I was not satisfied with the ordered-dictionary needing reconstruction
6+
(at least partially), obviously for efficiency reasons, so I've put together a
7+
class (OrderedDictX) that extends OrderedDict and allows you to do key changes
8+
efficiently, i.e. in O(1) complexity. The implementation can also be adjusted
9+
for the now-ordered built-in dict class.
10+
11+
It uses 2 extra dictionaries to remap the changed keys ("external" - i.e. as
12+
they appear externally to the user) to the ones in the underlying OrderedDict
13+
("internal") - the dictionaries will only hold keys that were changed so as
14+
long as no key changing is done they will be empty.
15+
16+
As expected, the splicing method is extremely slow (didn't expect it to be that
17+
much slower either though) and uses a lot of memory, and the O(N) solution of
18+
@Ashwini Chaudhary (bug-fixed though, del also needed) is also slower, 17X
19+
times in this example.
20+
21+
Of course, this solution being O(1), compared to the O(N) OrderedDictRaymond
22+
the time difference becomes much more apparent as the dictionary size
23+
increases, e.g. for 5 times more elements (100000), the O(N) is 100X slower.
24+
25+
https://stackoverflow.com/questions/16475384/rename-a-dictionary-key/75115645#75115645
26+
"""
27+
28+
from collections import OrderedDict
29+
30+
31+
class OrderedDictX(OrderedDict):
32+
def __init__(self, *args, **kwargs):
33+
# Mappings from new->old (ext2int), old->new (int2ext).
34+
# Only the keys that are changed (internal key
35+
# doesn't match what the user sees) are contained.
36+
self._keys_ext2int = OrderedDict()
37+
self._keys_int2ext = OrderedDict()
38+
self.update(*args, **kwargs)
39+
40+
def rename_key(self, k_old, k_new):
41+
# Validate that the old key is part of the dict
42+
if not self.__contains__(k_old):
43+
raise KeyError(f"Cannot rename key {k_old} to {k_new}: {k_old} not existing in dict")
44+
45+
# Return if no changing is actually to be done
46+
if len(OrderedDict.fromkeys([k_old, k_new])) == 1:
47+
return
48+
49+
# Validate that the new key would not conflict with another one
50+
if self.__contains__(k_new):
51+
raise KeyError(f"Cannot rename key {k_old} to {k_new}: {k_new} already in dict")
52+
53+
# Change the key using internal dicts mechanism
54+
if k_old in self._keys_ext2int:
55+
# Revert change temporarily
56+
k_old_int = self._keys_ext2int[k_old]
57+
del self._keys_ext2int[k_old]
58+
k_old = k_old_int
59+
# Check if new key matches the internal key
60+
if len(OrderedDict.fromkeys([k_old, k_new])) == 1:
61+
del self._keys_int2ext[k_old]
62+
return
63+
64+
# Finalize key change
65+
self._keys_ext2int[k_new] = k_old
66+
self._keys_int2ext[k_old] = k_new
67+
68+
def __contains__(self, k) -> bool:
69+
if k in self._keys_ext2int:
70+
return True
71+
if not super().__contains__(k):
72+
return False
73+
return k not in self._keys_int2ext
74+
75+
def __getitem__(self, k):
76+
if not self.__contains__(k):
77+
# Intentionally raise KeyError in ext2int
78+
return self._keys_ext2int[k]
79+
return super().__getitem__(self._keys_ext2int.get(k, k))
80+
81+
def __setitem__(self, k, v):
82+
if k in self._keys_ext2int:
83+
return super().__setitem__(self._keys_ext2int[k], v)
84+
# If the key exists in the internal state but was renamed to a k_ext,
85+
# employ this trick: make it such that it appears as if k_ext has also been renamed to k
86+
if k in self._keys_int2ext:
87+
k_ext = self._keys_int2ext[k]
88+
self._keys_ext2int[k] = k_ext
89+
k = k_ext
90+
return super().__setitem__(k, v)
91+
92+
def __delitem__(self, k):
93+
if not self.__contains__(k):
94+
# Intentionally raise KeyError in ext2int
95+
del self._keys_ext2int[k]
96+
if k in self._keys_ext2int:
97+
k_int = self._keys_ext2int[k]
98+
del self._keys_ext2int[k]
99+
del self._keys_int2ext[k_int]
100+
k = k_int
101+
return super().__delitem__(k)
102+
103+
def __iter__(self):
104+
yield from self.keys()
105+
106+
def __reversed__(self):
107+
for k in reversed(super().keys()):
108+
yield self._keys_int2ext.get(k, k)
109+
110+
def __eq__(self, other: object) -> bool:
111+
if not isinstance(other, dict):
112+
return False
113+
if len(self) != len(other):
114+
return False
115+
for (k, v), (k_other, v_other) in zip(self.items(), other.items()):
116+
if k != k_other or v != v_other:
117+
return False
118+
return True
119+
120+
def update(self, *args, **kwargs):
121+
for k, v in OrderedDict(*args, **kwargs).items():
122+
self.__setitem__(k, v)
123+
124+
def popitem(self, last=True) -> tuple:
125+
if not last:
126+
k = next(iter(self.keys()))
127+
else:
128+
k = next(iter(reversed(self.keys())))
129+
v = self.__getitem__(k)
130+
self.__delitem__(k)
131+
return k, v
132+
133+
class OrderedDictXKeysView:
134+
def __init__(self, odx: "OrderedDictX", orig_keys):
135+
self._odx = odx
136+
self._orig_keys = orig_keys
137+
138+
def __iter__(self):
139+
for k in self._orig_keys:
140+
yield self._odx._keys_int2ext.get(k, k)
141+
142+
def __reversed__(self):
143+
for k in reversed(self._orig_keys):
144+
yield self._odx._keys_int2ext.get(k, k)
145+
146+
class OrderedDictXItemsView:
147+
def __init__(self, odx: "OrderedDictX", orig_items):
148+
self._odx = odx
149+
self._orig_items = orig_items
150+
151+
def __iter__(self):
152+
for k, v in self._orig_items:
153+
yield self._odx._keys_int2ext.get(k, k), v
154+
155+
def __reversed__(self):
156+
for k, v in reversed(self._orig_items):
157+
yield self._odx._keys_int2ext.get(k, k), v
158+
159+
def keys(self):
160+
return self.OrderedDictXKeysView(self, super().keys())
161+
162+
def items(self):
163+
return self.OrderedDictXItemsView(self, super().items())
164+
165+
def copy(self):
166+
return OrderedDictX(self.items())

src/cratedb_fivetran_destination/model.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@
55
from attr import Factory
66
from attrs import define
77
from sqlalchemy_cratedb import ObjectType
8-
from tikray.util.dictx import OrderedDictX
98

9+
from cratedb_fivetran_destination.dictx import OrderedDictX
1010
from fivetran_sdk import common_pb2
1111
from fivetran_sdk.common_pb2 import DataType
1212

@@ -26,7 +26,7 @@ class FieldMap:
2626
@classmethod
2727
def rename_keys(cls, record):
2828
"""
29-
Rename keys according to field map.
29+
Rename keys according to the field map.
3030
"""
3131
record = OrderedDictX(record)
3232
for key, value in cls.field_map.items():

0 commit comments

Comments
 (0)