Skip to content

Commit 5bbdee1

Browse files
committed
Use a global cache not thread-local one
1 parent 5c3710c commit 5bbdee1

File tree

3 files changed

+44
-3
lines changed

3 files changed

+44
-3
lines changed

librarian_server/api/validate.py

Lines changed: 39 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
"""
66

77
import asyncio
8+
import datetime
89
from functools import lru_cache
910
from pathlib import Path
1011
from time import perf_counter
@@ -38,8 +39,44 @@
3839

3940
router = APIRouter(prefix="/api/v2/validate")
4041

42+
VALIDATION_TIMEOUT = datetime.timedelta(hours=8)
43+
VALIDATION_CACHE = {}
44+
45+
46+
async def cached_calculate_checksum_of_local_copy(
47+
original_checksum: str,
48+
original_size: int,
49+
path_info_function: callable,
50+
path: Path,
51+
store_id: int,
52+
instance_id: int,
53+
):
54+
key = f"{original_checksum}-{instance_id}"
55+
56+
cached = VALIDATION_CACHE.get(key, None)
57+
58+
if cached is None or (
59+
(datetime.datetime.now(datetime.timezone.utc) - cached[1]) > VALIDATION_TIMEOUT
60+
):
61+
result = await asyncify(calculate_checksum_of_local_copy)(
62+
original_checksum=original_checksum,
63+
original_size=original_size,
64+
path_info_function=path_info_function,
65+
path=path,
66+
store_id=store_id,
67+
instance_id=instance_id,
68+
)
69+
70+
VALIDATION_CACHE[key] = (result, datetime.datetime.now(datetime.timezone.utc))
71+
else:
72+
log.info(
73+
f"Using cached result for instance {instance_id}", instance_id=instance_id
74+
)
75+
result = cached[0]
76+
77+
return result
78+
4179

42-
@lru_cache(maxsize=1024)
4380
def calculate_checksum_of_local_copy(
4481
original_checksum: str,
4582
original_size: int,
@@ -187,7 +224,7 @@ async def validate_file(
187224
if not instance.available:
188225
continue
189226

190-
this_checksum_info = asyncify(calculate_checksum_of_local_copy)(
227+
this_checksum_info = cached_calculate_checksum_of_local_copy(
191228
original_checksum=file.checksum,
192229
original_size=file.size,
193230
path_info_function=instance.store.store_manager.path_info,

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ exclude=["*tests*"]
77
[project]
88
name="hera_librarian"
99
requires-python = ">=3.10"
10-
version = "3.1.0"
10+
version = "3.1.1"
1111
dependencies = [
1212
"alembic",
1313
"argon2-cffi",

tests/integration_test/test_send_queue.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -371,6 +371,10 @@ def test_send_from_existing_file_row(
371371
# Should have _ours_ and _theirs_.
372372
assert len(instance_validations) == 2
373373

374+
# Check again (should use the cache)!
375+
instance_validations = mocked_admin_client.validate_file(file_name=file_name)
376+
assert len(instance_validations) == 2
377+
374378
source_librarians_for_validations = {x.librarian for x in instance_validations}
375379

376380
assert len(source_librarians_for_validations) == 2 # I.e. they are different

0 commit comments

Comments
 (0)