Skip to content

Commit 5fbe6ea

Browse files
Bulk index findings and sequentially invoke auto-correlations (#1355)
* Bulk index findings and sequentially invoke auto-correlations Signed-off-by: Megha Goyal <[email protected]> * Bulk index findings in batches of 10000 and make it configurable Signed-off-by: Megha Goyal <[email protected]> * Addressing review comments Signed-off-by: Megha Goyal <[email protected]> * Add integ tests to test bulk index findings Signed-off-by: Megha Goyal <[email protected]> * Fix ktlint formatting Signed-off-by: Megha Goyal <[email protected]> --------- Signed-off-by: Megha Goyal <[email protected]> (cherry picked from commit b561965) Signed-off-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
1 parent ee15812 commit 5fbe6ea

File tree

6 files changed

+159
-55
lines changed

6 files changed

+159
-55
lines changed

alerting/src/main/kotlin/org/opensearch/alerting/AlertingPlugin.kt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -357,7 +357,8 @@ internal class AlertingPlugin : PainlessExtension, ActionPlugin, ScriptPlugin, R
357357
AlertingSettings.FINDING_HISTORY_MAX_DOCS,
358358
AlertingSettings.FINDING_HISTORY_INDEX_MAX_AGE,
359359
AlertingSettings.FINDING_HISTORY_ROLLOVER_PERIOD,
360-
AlertingSettings.FINDING_HISTORY_RETENTION_PERIOD
360+
AlertingSettings.FINDING_HISTORY_RETENTION_PERIOD,
361+
AlertingSettings.FINDINGS_INDEXING_BATCH_SIZE
361362
)
362363
}
363364

alerting/src/main/kotlin/org/opensearch/alerting/DocumentLevelMonitorRunner.kt

Lines changed: 92 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,15 @@ package org.opensearch.alerting
88
import org.apache.logging.log4j.LogManager
99
import org.opensearch.ExceptionsHelper
1010
import org.opensearch.OpenSearchStatusException
11+
import org.opensearch.action.DocWriteRequest
12+
import org.opensearch.action.admin.indices.refresh.RefreshAction
13+
import org.opensearch.action.admin.indices.refresh.RefreshRequest
14+
import org.opensearch.action.bulk.BulkRequest
15+
import org.opensearch.action.bulk.BulkResponse
1116
import org.opensearch.action.index.IndexRequest
12-
import org.opensearch.action.index.IndexResponse
1317
import org.opensearch.action.search.SearchAction
1418
import org.opensearch.action.search.SearchRequest
1519
import org.opensearch.action.search.SearchResponse
16-
import org.opensearch.action.support.WriteRequest
1720
import org.opensearch.alerting.model.DocumentExecutionContext
1821
import org.opensearch.alerting.model.DocumentLevelTriggerRunResult
1922
import org.opensearch.alerting.model.InputRunResults
@@ -248,10 +251,7 @@ object DocumentLevelMonitorRunner : MonitorRunner() {
248251
// If there are no triggers defined, we still want to generate findings
249252
if (monitor.triggers.isEmpty()) {
250253
if (dryrun == false && monitor.id != Monitor.NO_ID) {
251-
docsToQueries.forEach {
252-
val triggeredQueries = it.value.map { queryId -> idQueryMap[queryId]!! }
253-
createFindings(monitor, monitorCtx, triggeredQueries, it.key, true)
254-
}
254+
createFindings(monitor, monitorCtx, docsToQueries, idQueryMap, true)
255255
}
256256
} else {
257257
monitor.triggers.forEach {
@@ -340,7 +340,7 @@ object DocumentLevelMonitorRunner : MonitorRunner() {
340340
trigger: DocumentLevelTrigger,
341341
monitor: Monitor,
342342
idQueryMap: Map<String, DocLevelQuery>,
343-
docsToQueries: Map<String, List<String>>,
343+
docsToQueries: MutableMap<String, MutableList<String>>,
344344
queryToDocIds: Map<DocLevelQuery, Set<String>>,
345345
dryrun: Boolean,
346346
workflowRunContext: WorkflowRunContext?,
@@ -349,35 +349,33 @@ object DocumentLevelMonitorRunner : MonitorRunner() {
349349
val triggerCtx = DocumentLevelTriggerExecutionContext(monitor, trigger)
350350
val triggerResult = monitorCtx.triggerService!!.runDocLevelTrigger(monitor, trigger, queryToDocIds)
351351

352-
val findings = mutableListOf<String>()
353-
val findingDocPairs = mutableListOf<Pair<String, String>>()
352+
val triggerFindingDocPairs = mutableListOf<Pair<String, String>>()
354353

355354
// TODO: Implement throttling for findings
356-
docsToQueries.forEach {
357-
val triggeredQueries = it.value.map { queryId -> idQueryMap[queryId]!! }
358-
val findingId = createFindings(
359-
monitor,
360-
monitorCtx,
361-
triggeredQueries,
362-
it.key,
363-
!dryrun && monitor.id != Monitor.NO_ID,
364-
executionId
365-
)
366-
findings.add(findingId)
355+
val findingToDocPairs = createFindings(
356+
monitor,
357+
monitorCtx,
358+
docsToQueries,
359+
idQueryMap,
360+
!dryrun && monitor.id != Monitor.NO_ID,
361+
executionId
362+
)
367363

368-
if (triggerResult.triggeredDocs.contains(it.key)) {
369-
findingDocPairs.add(Pair(findingId, it.key))
364+
findingToDocPairs.forEach {
365+
// Only pick those entries whose docs have triggers associated with them
366+
if (triggerResult.triggeredDocs.contains(it.second)) {
367+
triggerFindingDocPairs.add(Pair(it.first, it.second))
370368
}
371369
}
372370

373371
val actionCtx = triggerCtx.copy(
374372
triggeredDocs = triggerResult.triggeredDocs,
375-
relatedFindings = findings,
373+
relatedFindings = findingToDocPairs.map { it.first },
376374
error = monitorResult.error ?: triggerResult.error
377375
)
378376

379377
val alerts = mutableListOf<Alert>()
380-
findingDocPairs.forEach {
378+
triggerFindingDocPairs.forEach {
381379
val alert = monitorCtx.alertService!!.composeDocLevelAlert(
382380
listOf(it.first),
383381
listOf(it.second),
@@ -436,51 +434,92 @@ object DocumentLevelMonitorRunner : MonitorRunner() {
436434
return triggerResult
437435
}
438436

437+
/**
438+
* 1. Bulk index all findings based on shouldCreateFinding flag
439+
* 2. invoke publishFinding() to kickstart auto-correlations
440+
* 3. Returns a list of pairs for finding id to doc id
441+
*/
439442
private suspend fun createFindings(
440443
monitor: Monitor,
441444
monitorCtx: MonitorRunnerExecutionContext,
442-
docLevelQueries: List<DocLevelQuery>,
443-
matchingDocId: String,
445+
docsToQueries: MutableMap<String, MutableList<String>>,
446+
idQueryMap: Map<String, DocLevelQuery>,
444447
shouldCreateFinding: Boolean,
445448
workflowExecutionId: String? = null,
446-
): String {
447-
// Before the "|" is the doc id and after the "|" is the index
448-
val docIndex = matchingDocId.split("|")
449+
): List<Pair<String, String>> {
449450

450-
val finding = Finding(
451-
id = UUID.randomUUID().toString(),
452-
relatedDocIds = listOf(docIndex[0]),
453-
correlatedDocIds = listOf(docIndex[0]),
454-
monitorId = monitor.id,
455-
monitorName = monitor.name,
456-
index = docIndex[1],
457-
docLevelQueries = docLevelQueries,
458-
timestamp = Instant.now(),
459-
executionId = workflowExecutionId
460-
)
451+
val findingDocPairs = mutableListOf<Pair<String, String>>()
452+
val findings = mutableListOf<Finding>()
453+
val indexRequests = mutableListOf<IndexRequest>()
461454

462-
val findingStr = finding.toXContent(XContentBuilder.builder(XContentType.JSON.xContent()), ToXContent.EMPTY_PARAMS).string()
463-
logger.debug("Findings: $findingStr")
455+
docsToQueries.forEach {
456+
val triggeredQueries = it.value.map { queryId -> idQueryMap[queryId]!! }
464457

465-
if (shouldCreateFinding) {
466-
val indexRequest = IndexRequest(monitor.dataSources.findingsIndex)
467-
.setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE)
468-
.source(findingStr, XContentType.JSON)
469-
.id(finding.id)
470-
.routing(finding.id)
458+
// Before the "|" is the doc id and after the "|" is the index
459+
val docIndex = it.key.split("|")
471460

472-
monitorCtx.client!!.suspendUntil<Client, IndexResponse> {
473-
monitorCtx.client!!.index(indexRequest, it)
461+
val finding = Finding(
462+
id = UUID.randomUUID().toString(),
463+
relatedDocIds = listOf(docIndex[0]),
464+
correlatedDocIds = listOf(docIndex[0]),
465+
monitorId = monitor.id,
466+
monitorName = monitor.name,
467+
index = docIndex[1],
468+
docLevelQueries = triggeredQueries,
469+
timestamp = Instant.now(),
470+
executionId = workflowExecutionId
471+
)
472+
findingDocPairs.add(Pair(finding.id, it.key))
473+
findings.add(finding)
474+
475+
val findingStr =
476+
finding.toXContent(XContentBuilder.builder(XContentType.JSON.xContent()), ToXContent.EMPTY_PARAMS)
477+
.string()
478+
logger.debug("Findings: $findingStr")
479+
480+
if (shouldCreateFinding) {
481+
indexRequests += IndexRequest(monitor.dataSources.findingsIndex)
482+
.source(findingStr, XContentType.JSON)
483+
.id(finding.id)
484+
.opType(DocWriteRequest.OpType.CREATE)
474485
}
475486
}
476487

488+
if (indexRequests.isNotEmpty()) {
489+
bulkIndexFindings(monitor, monitorCtx, indexRequests)
490+
}
491+
477492
try {
478-
publishFinding(monitor, monitorCtx, finding)
493+
findings.forEach { finding ->
494+
publishFinding(monitor, monitorCtx, finding)
495+
}
479496
} catch (e: Exception) {
480497
// suppress exception
481498
logger.error("Optional finding callback failed", e)
482499
}
483-
return finding.id
500+
return findingDocPairs
501+
}
502+
503+
private suspend fun bulkIndexFindings(
504+
monitor: Monitor,
505+
monitorCtx: MonitorRunnerExecutionContext,
506+
indexRequests: List<IndexRequest>
507+
) {
508+
indexRequests.chunked(monitorCtx.findingsIndexBatchSize).forEach { batch ->
509+
val bulkResponse: BulkResponse = monitorCtx.client!!.suspendUntil {
510+
bulk(BulkRequest().add(batch), it)
511+
}
512+
if (bulkResponse.hasFailures()) {
513+
bulkResponse.items.forEach { item ->
514+
if (item.isFailed) {
515+
logger.error("Failed indexing the finding ${item.id} of monitor [${monitor.id}]")
516+
}
517+
}
518+
} else {
519+
logger.debug("[${bulkResponse.items.size}] All findings successfully indexed.")
520+
}
521+
}
522+
monitorCtx.client!!.execute(RefreshAction.INSTANCE, RefreshRequest(monitor.dataSources.findingsIndex))
484523
}
485524

486525
private fun publishFinding(
@@ -605,7 +644,7 @@ object DocumentLevelMonitorRunner : MonitorRunner() {
605644
matchingDocs.addAll(getAllDocs(hits, index, concreteIndex, monitor.id, conflictingFields))
606645
}
607646
} catch (e: Exception) {
608-
logger.warn("Failed to run for shard $shard. Error: ${e.message}")
647+
logger.error("Failed to run for shard $shard. Error: ${e.message}")
609648
}
610649
}
611650
return matchingDocs

alerting/src/main/kotlin/org/opensearch/alerting/MonitorRunnerExecutionContext.kt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,5 +47,6 @@ data class MonitorRunnerExecutionContext(
4747
@Volatile var destinationContextFactory: DestinationContextFactory? = null,
4848

4949
@Volatile var maxActionableAlertCount: Long = AlertingSettings.DEFAULT_MAX_ACTIONABLE_ALERT_COUNT,
50-
@Volatile var indexTimeout: TimeValue? = null
50+
@Volatile var indexTimeout: TimeValue? = null,
51+
@Volatile var findingsIndexBatchSize: Int = AlertingSettings.DEFAULT_FINDINGS_INDEXING_BATCH_SIZE
5152
)

alerting/src/main/kotlin/org/opensearch/alerting/MonitorRunnerService.kt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,10 @@ import org.opensearch.alerting.model.WorkflowRunResult
2222
import org.opensearch.alerting.model.destination.DestinationContextFactory
2323
import org.opensearch.alerting.opensearchapi.retry
2424
import org.opensearch.alerting.script.TriggerExecutionContext
25+
import org.opensearch.alerting.settings.AlertingSettings
2526
import org.opensearch.alerting.settings.AlertingSettings.Companion.ALERT_BACKOFF_COUNT
2627
import org.opensearch.alerting.settings.AlertingSettings.Companion.ALERT_BACKOFF_MILLIS
28+
import org.opensearch.alerting.settings.AlertingSettings.Companion.FINDINGS_INDEXING_BATCH_SIZE
2729
import org.opensearch.alerting.settings.AlertingSettings.Companion.INDEX_TIMEOUT
2830
import org.opensearch.alerting.settings.AlertingSettings.Companion.MAX_ACTIONABLE_ALERT_COUNT
2931
import org.opensearch.alerting.settings.AlertingSettings.Companion.MOVE_ALERTS_BACKOFF_COUNT
@@ -169,6 +171,11 @@ object MonitorRunnerService : JobRunner, CoroutineScope, AbstractLifecycleCompon
169171

170172
monitorCtx.indexTimeout = INDEX_TIMEOUT.get(monitorCtx.settings)
171173

174+
monitorCtx.findingsIndexBatchSize = FINDINGS_INDEXING_BATCH_SIZE.get(monitorCtx.settings)
175+
monitorCtx.clusterService!!.clusterSettings.addSettingsUpdateConsumer(AlertingSettings.FINDINGS_INDEXING_BATCH_SIZE) {
176+
monitorCtx.findingsIndexBatchSize = it
177+
}
178+
172179
return this
173180
}
174181

alerting/src/main/kotlin/org/opensearch/alerting/settings/AlertingSettings.kt

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ class AlertingSettings {
1717

1818
companion object {
1919
const val DEFAULT_MAX_ACTIONABLE_ALERT_COUNT = 50L
20+
const val DEFAULT_FINDINGS_INDEXING_BATCH_SIZE = 1000
2021

2122
val ALERTING_MAX_MONITORS = Setting.intSetting(
2223
"plugins.alerting.monitor.max_monitors",
@@ -176,5 +177,12 @@ class AlertingSettings {
176177
Setting.Property.NodeScope,
177178
Setting.Property.Dynamic
178179
)
180+
181+
val FINDINGS_INDEXING_BATCH_SIZE = Setting.intSetting(
182+
"plugins.alerting.alert_findings_indexing_batch_size",
183+
DEFAULT_FINDINGS_INDEXING_BATCH_SIZE,
184+
1,
185+
Setting.Property.NodeScope, Setting.Property.Dynamic
186+
)
179187
}
180188
}

alerting/src/test/kotlin/org/opensearch/alerting/DocumentMonitorRunnerIT.kt

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -393,6 +393,54 @@ class DocumentMonitorRunnerIT : AlertingRestTestCase() {
393393
assertEquals("Didn't find findings for docs 1 and 5", 2, foundFindings.size)
394394
}
395395

396+
fun `test execute monitor for bulk index findings`() {
397+
val testIndexPrefix = "test-index-${randomAlphaOfLength(10).lowercase(Locale.ROOT)}"
398+
val testQueryName = "wildcard-test-query"
399+
val testIndex = createTestIndex("${testIndexPrefix}1")
400+
val testIndex2 = createTestIndex("${testIndexPrefix}2")
401+
402+
val testTime = DateTimeFormatter.ISO_OFFSET_DATE_TIME.format(ZonedDateTime.now().truncatedTo(MILLIS))
403+
val testDoc = """{
404+
"message" : "This is an error from IAD region",
405+
"test_strict_date_time" : "$testTime",
406+
"test_field" : "us-west-2"
407+
}"""
408+
409+
val docQuery = DocLevelQuery(query = "test_field:\"us-west-2\"", name = testQueryName, fields = listOf())
410+
val docLevelInput = DocLevelMonitorInput("description", listOf("$testIndexPrefix*"), listOf(docQuery))
411+
412+
val trigger = randomDocumentLevelTrigger(condition = Script("query[name=$testQueryName]"))
413+
val monitor = createMonitor(randomDocumentLevelMonitor(inputs = listOf(docLevelInput), triggers = listOf(trigger)))
414+
assertNotNull(monitor.id)
415+
416+
for (i in 0 until 9) {
417+
indexDoc(testIndex, i.toString(), testDoc)
418+
}
419+
indexDoc(testIndex2, "3", testDoc)
420+
adminClient().updateSettings("plugins.alerting.alert_findings_indexing_batch_size", 2)
421+
422+
val response = executeMonitor(monitor.id)
423+
424+
val output = entityAsMap(response)
425+
426+
assertEquals(monitor.name, output["monitor_name"])
427+
@Suppress("UNCHECKED_CAST")
428+
val searchResult = (output.objectMap("input_results")["results"] as List<Map<String, Any>>).first()
429+
@Suppress("UNCHECKED_CAST")
430+
val matchingDocsToQuery = searchResult[docQuery.id] as List<String>
431+
assertEquals("Correct search result", 10, matchingDocsToQuery.size)
432+
assertTrue("Correct search result", matchingDocsToQuery.containsAll(listOf("1|$testIndex", "2|$testIndex", "3|$testIndex2")))
433+
434+
val alerts = searchAlertsWithFilter(monitor)
435+
assertEquals("Alert saved for test monitor", 10, alerts.size)
436+
437+
val findings = searchFindings(monitor)
438+
assertEquals("Findings saved for test monitor", 10, findings.size)
439+
val foundFindings =
440+
findings.filter { it.relatedDocIds.contains("1") || it.relatedDocIds.contains("2") || it.relatedDocIds.contains("3") }
441+
assertEquals("Found findings for all docs", 4, foundFindings.size)
442+
}
443+
396444
fun `test execute monitor with wildcard index that generates alerts and findings for NOT EQUALS query operator`() {
397445
val testIndexPrefix = "test-index-${randomAlphaOfLength(10).lowercase(Locale.ROOT)}"
398446
val testQueryName = "wildcard-test-query"

0 commit comments

Comments
 (0)