Skip to content

Commit 59963f5

Browse files
authored
Clean up flaky tests (opensearch-project#247)
Signed-off-by: John Mazanec <[email protected]>
1 parent 37664bd commit 59963f5

File tree

4 files changed

+197
-217
lines changed

4 files changed

+197
-217
lines changed

src/test/java/org/opensearch/knn/KNNRestTestCase.java

+28
Original file line numberDiff line numberDiff line change
@@ -877,6 +877,34 @@ public void assertTrainingSucceeds(String modelId, int attempts, int delayInMill
877877
fail("Training did not succeed after " + attempts + " attempts with a delay of " + delayInMillis + " ms.");
878878
}
879879

880+
public void assertTrainingFails(String modelId, int attempts, int delayInMillis) throws InterruptedException,
881+
IOException {
882+
int attemptNum = 0;
883+
Response response;
884+
Map<String, Object> responseMap;
885+
ModelState modelState;
886+
while (attemptNum < attempts) {
887+
Thread.sleep(delayInMillis);
888+
attemptNum++;
889+
890+
response = getModel(modelId, null);
891+
892+
responseMap = createParser(
893+
XContentType.JSON.xContent(),
894+
EntityUtils.toString(response.getEntity())
895+
).map();
896+
897+
modelState = ModelState.getModelState((String) responseMap.get(MODEL_STATE));
898+
if (modelState == ModelState.FAILED) {
899+
return;
900+
}
901+
902+
assertNotEquals(ModelState.CREATED, modelState);
903+
}
904+
905+
fail("Training did not succeed after " + attempts + " attempts with a delay of " + delayInMillis + " ms.");
906+
}
907+
880908
/**
881909
* We need to be able to dump the jacoco coverage before cluster is shut down.
882910
* The new internal testing framework removed some of the gradle tasks we were listening to

src/test/java/org/opensearch/knn/plugin/action/RestKNNStatsHandlerIT.java

-50
Original file line numberDiff line numberDiff line change
@@ -141,56 +141,6 @@ public void testInvalidMetricsStats() {
141141
Collections.singletonList("invalid_metric")));
142142
}
143143

144-
//TODO: Fix broken test case
145-
// This test case intended to check whether the "graph_query_error" stat gets incremented when a query fails.
146-
// It sets the circuit breaker limit to 1 kb and then indexes documents into the index and force merges so that
147-
// the sole segment's graph will not fit in the cache. Then, it runs a query and expects an exception. Then it
148-
// checks that the query errors get incremented. This test is flaky:
149-
// https://github.com/opensearch-project/k-NN/issues/43. During query, when a segment to be
150-
// searched is not present in the cache, it will first be loaded. Then it will be searched.
151-
//
152-
// The problem is that the cache built from CacheBuilder will not throw an exception if the entry exceeds the
153-
// size of the cache - tested this via log statements. However, the entry gets marked as expired immediately.
154-
// So, after loading the entry, sometimes the expired entry will get evicted before the search logic. This causes
155-
// the search to fail. However, it appears sometimes that the entry doesnt get immediately evicted, causing the
156-
// search to succeed.
157-
// public void testGraphQueryErrorsGetIncremented() throws Exception {
158-
// // Get initial query errors because it may not always be 0
159-
// String graphQueryErrors = StatNames.GRAPH_QUERY_ERRORS.getName();
160-
// Response response = getKnnStats(Collections.emptyList(), Collections.singletonList(graphQueryErrors));
161-
// String responseBody = EntityUtils.toString(response.getEntity());
162-
// Map<String, Object> nodeStats = parseNodeStatsResponse(responseBody).get(0);
163-
// int beforeErrors = (int) nodeStats.get(graphQueryErrors);
164-
//
165-
// // Set the circuit breaker very low so that loading an index will definitely fail
166-
// updateClusterSettings("knn.memory.circuit_breaker.limit", "1kb");
167-
//
168-
// Settings settings = Settings.builder()
169-
// .put("number_of_shards", 1)
170-
// .put("index.knn", true)
171-
// .build();
172-
// createKnnIndex(INDEX_NAME, settings, createKnnIndexMapping(FIELD_NAME, 2));
173-
//
174-
// // Add enough docs to trip the circuit breaker
175-
// Float[] vector = {1.3f, 2.2f};
176-
// int docsInIndex = 25;
177-
// for (int i = 0; i < docsInIndex; i++) {
178-
// addKnnDoc(INDEX_NAME, Integer.toString(i), FIELD_NAME, vector);
179-
// }
180-
// forceMergeKnnIndex(INDEX_NAME);
181-
//
182-
// // Execute a query that should fail
183-
// float[] qvector = {1.9f, 2.4f};
184-
// expectThrows(ResponseException.class, () ->
185-
// searchKNNIndex(INDEX_NAME, new KNNQueryBuilder(FIELD_NAME, qvector, 10), 10));
186-
//
187-
// // Check that the graphQuery errors gets incremented
188-
// response = getKnnStats(Collections.emptyList(), Collections.singletonList(graphQueryErrors));
189-
// responseBody = EntityUtils.toString(response.getEntity());
190-
// nodeStats = parseNodeStatsResponse(responseBody).get(0);
191-
// assertTrue((int) nodeStats.get(graphQueryErrors) > beforeErrors);
192-
// }
193-
194144
/**
195145
* Test checks that handler correctly returns stats for a single node
196146
*

0 commit comments

Comments
 (0)