Skip to content

Commit 28fa16a

Browse files
authored
fix: wait for meilisearch index creation to succeed (#166)
In `search.meilisearch.create_indexes`, we were not waiting for the index creation tasks to complete. This was causing a potential race condition, where the `create_indexes` function would fail because it took a few seconds for the index creation to succeed. See the relevant conversation here: openedx/edx-platform#35743 (comment)
1 parent 91686e9 commit 28fa16a

File tree

3 files changed

+133
-52
lines changed

3 files changed

+133
-52
lines changed

edxsearch/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
""" Container module for testing / demoing search """
22

3-
__version__ = '4.1.0'
3+
__version__ = '4.1.1'

search/meilisearch.py

Lines changed: 68 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -115,9 +115,20 @@ class MeilisearchEngine(SearchEngine):
115115
compliant with edx-search's ElasticSearchEngine.
116116
"""
117117

118-
def __init__(self, index=None):
118+
def __init__(self, index=None) -> None:
119119
super().__init__(index=index)
120-
self.meilisearch_index = get_meilisearch_index(self.index_name)
120+
self._meilisearch_index: t.Optional[meilisearch.index.Index] = None
121+
122+
@property
123+
def meilisearch_index(self) -> meilisearch.index.Index:
124+
"""
125+
Lazy load meilisearch index.
126+
"""
127+
if self._meilisearch_index is None:
128+
meilisearch_index_name = get_meilisearch_index_name(self.index_name)
129+
meilisearch_client = get_meilisearch_client()
130+
self._meilisearch_index = meilisearch_client.index(meilisearch_index_name)
131+
return self._meilisearch_index
121132

122133
@property
123134
def meilisearch_index_name(self):
@@ -211,7 +222,7 @@ def print_failed_meilisearch_tasks(count: int = 10):
211222
print(result)
212223

213224

214-
def create_indexes(index_filterables: dict[str, list[str]] = None):
225+
def create_indexes(index_filterables: t.Optional[dict[str, list[str]]] = None):
215226
"""
216227
This is an initialization function that creates indexes and makes sure that they
217228
support the right facetting.
@@ -225,38 +236,68 @@ def create_indexes(index_filterables: dict[str, list[str]] = None):
225236
client = get_meilisearch_client()
226237
for index_name, filterables in index_filterables.items():
227238
meilisearch_index_name = get_meilisearch_index_name(index_name)
228-
try:
229-
index = client.get_index(meilisearch_index_name)
230-
except meilisearch.errors.MeilisearchApiError as e:
231-
if e.code != "index_not_found":
232-
raise
233-
client.create_index(
234-
meilisearch_index_name, {"primaryKey": PRIMARY_KEY_FIELD_NAME}
235-
)
236-
# Get the index again
237-
index = client.get_index(meilisearch_index_name)
239+
index = get_or_create_meilisearch_index(client, meilisearch_index_name)
240+
update_index_filterables(client, index, filterables)
238241

239-
# Update filterables if there are some new elements
240-
if filterables:
241-
existing_filterables = set(index.get_filterable_attributes())
242-
if not set(filterables).issubset(existing_filterables):
243-
all_filterables = list(existing_filterables.union(filterables))
244-
index.update_filterable_attributes(all_filterables)
245242

243+
def get_or_create_meilisearch_index(
244+
client: meilisearch.Client, index_name: str
245+
) -> meilisearch.index.Index:
246+
"""
247+
Get an index. If it does not exist, create it.
246248
247-
def get_meilisearch_index(index_name: str):
249+
This will fail with a RuntimeError if we fail to create the index. It will fail with
250+
a MeilisearchApiError in other failure cases.
248251
"""
249-
Return a meilisearch index.
252+
try:
253+
return client.get_index(index_name)
254+
except meilisearch.errors.MeilisearchApiError as e:
255+
if e.code != "index_not_found":
256+
raise
257+
task_info = client.create_index(
258+
index_name, {"primaryKey": PRIMARY_KEY_FIELD_NAME}
259+
)
260+
wait_for_task_to_succeed(client, task_info)
261+
# Get the index again
262+
return client.get_index(index_name)
250263

251-
Note that the index may not exist, and it will be created on first insertion.
252-
ideally, the initialisation function `create_indexes` should be run first.
264+
265+
def update_index_filterables(
266+
client: meilisearch.Client, index: meilisearch.index.Index, filterables: list[str]
267+
) -> None:
253268
"""
254-
meilisearch_client = get_meilisearch_client()
255-
meilisearch_index_name = get_meilisearch_index_name(index_name)
256-
return meilisearch_client.index(meilisearch_index_name)
269+
Make sure that the filterable fields of an index include the given list of fields.
270+
271+
If existing fields are present, they are preserved.
272+
"""
273+
if not filterables:
274+
return
275+
existing_filterables = set(index.get_filterable_attributes())
276+
if set(filterables).issubset(existing_filterables):
277+
# all filterables fields are already present
278+
return
279+
all_filterables = list(existing_filterables.union(filterables))
280+
task_info = index.update_filterable_attributes(all_filterables)
281+
wait_for_task_to_succeed(client, task_info)
282+
283+
284+
def wait_for_task_to_succeed(
285+
client: meilisearch.Client,
286+
task_info: meilisearch.task.TaskInfo,
287+
timeout_in_ms: int = 5000,
288+
) -> None:
289+
"""
290+
Wait for a Meilisearch task to succeed. If it does not, raise RuntimeError.
291+
"""
292+
task = client.wait_for_task(task_info.task_uid, timeout_in_ms=timeout_in_ms)
293+
if task.status != "succeeded":
294+
raise RuntimeError(f"Failed meilisearch task: {task}")
257295

258296

259297
def get_meilisearch_client():
298+
"""
299+
Return a Meilisearch client with the right settings.
300+
"""
260301
return meilisearch.Client(MEILISEARCH_URL, api_key=MEILISEARCH_API_KEY)
261302

262303

@@ -332,7 +373,7 @@ def get_search_params(
332373
Return a dictionary of parameters that should be passed to the Meilisearch client
333374
`.search()` method.
334375
"""
335-
params = {"showRankingScore": True}
376+
params: dict[str, t.Any] = {"showRankingScore": True}
336377

337378
# Aggregation
338379
if aggregation_terms:

search/tests/test_meilisearch.py

Lines changed: 64 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,13 @@
33
"""
44

55
from datetime import datetime
6-
from unittest.mock import Mock
6+
from unittest.mock import Mock, patch
77

88
import django.test
99
from django.utils import timezone
10+
import meilisearch
1011
import pytest
12+
from requests import Response
1113

1214
from search.utils import DateRange, ValueRange
1315
import search.meilisearch
@@ -294,20 +296,22 @@ def test_engine_index(self):
294296

295297
def test_engine_search(self):
296298
engine = search.meilisearch.MeilisearchEngine(index="my_index")
297-
engine.meilisearch_index.search = Mock(return_value={
298-
"hits": [
299-
{
300-
"pk": "f381d4f1914235c9532576c0861d09b484ade634",
301-
"id": "course-v1:OpenedX+DemoX+DemoCourse",
302-
"_rankingScore": 0.865,
303-
},
304-
],
305-
"query": "demo",
306-
"processingTimeMs": 0,
307-
"limit": 20,
308-
"offset": 0,
309-
"estimatedTotalHits": 1
310-
})
299+
engine.meilisearch_index.search = Mock(
300+
return_value={
301+
"hits": [
302+
{
303+
"pk": "f381d4f1914235c9532576c0861d09b484ade634",
304+
"id": "course-v1:OpenedX+DemoX+DemoCourse",
305+
"_rankingScore": 0.865,
306+
},
307+
],
308+
"query": "demo",
309+
"processingTimeMs": 0,
310+
"limit": 20,
311+
"offset": 0,
312+
"estimatedTotalHits": 1,
313+
}
314+
)
311315

312316
results = engine.search(
313317
query_string="abc",
@@ -321,15 +325,19 @@ def test_engine_search(self):
321325
log_search_params=True,
322326
)
323327

324-
engine.meilisearch_index.search.assert_called_with("abc", {
325-
"showRankingScore": True,
326-
"facets": ["org", "course"],
327-
"filter": [
328-
'course = "course-v1:testorg+test1+alpha"',
329-
'org = "testorg"', 'key = "value" OR key NOT EXISTS',
330-
'NOT _pk = "81fe8bfe87576c3ecb22426f8e57847382917acf"',
331-
]
332-
})
328+
engine.meilisearch_index.search.assert_called_with(
329+
"abc",
330+
{
331+
"showRankingScore": True,
332+
"facets": ["org", "course"],
333+
"filter": [
334+
'course = "course-v1:testorg+test1+alpha"',
335+
'org = "testorg"',
336+
'key = "value" OR key NOT EXISTS',
337+
'NOT _pk = "81fe8bfe87576c3ecb22426f8e57847382917acf"',
338+
],
339+
},
340+
)
333341
assert results == {
334342
"aggs": {},
335343
"max_score": 0.865,
@@ -357,3 +365,35 @@ def test_engine_remove(self):
357365
doc_pk = "81fe8bfe87576c3ecb22426f8e57847382917acf"
358366
engine.remove(doc_ids=[doc_id])
359367
engine.meilisearch_index.delete_documents.assert_called_with([doc_pk])
368+
369+
370+
class UtilitiesTests(django.test.TestCase):
371+
"""
372+
Tests associated to the utility functions of the meilisearch engine.
373+
"""
374+
375+
@patch.object(search.meilisearch, "wait_for_task_to_succeed")
376+
def test_create_index(self, mock_wait_for_task_to_succeed) -> None:
377+
class ClientMock:
378+
"""
379+
Mocked client
380+
"""
381+
number_of_calls = 0
382+
383+
def get_index(self, index_name):
384+
"""Mocked client.get_index method"""
385+
self.number_of_calls += 1
386+
if self.number_of_calls == 1:
387+
error = meilisearch.errors.MeilisearchApiError("", Response())
388+
error.code = "index_not_found"
389+
raise error
390+
if self.number_of_calls == 2:
391+
return f"index created: {index_name}"
392+
# We shouldn't be there
393+
assert False
394+
395+
client = Mock()
396+
client.get_index = Mock(side_effect=ClientMock().get_index)
397+
result = search.meilisearch.get_or_create_meilisearch_index(client, "my_index")
398+
assert result == "index created: my_index"
399+
mock_wait_for_task_to_succeed.assert_called_once()

0 commit comments

Comments
 (0)