Skip to content

Commit ae046ee

Browse files
committed
[Backport 2.x] Enhance per bucket, and per document monitor notification message ctx. (opensearch-project#1450) (opensearch-project#1477)
* Enhance per bucket, and per document monitor notification message ctx. (opensearch-project#1450) * Adding dev logs. Signed-off-by: AWSHurneyt <[email protected]> * Added support for returning sample documents for bucket level monitors. Signed-off-by: AWSHurneyt <[email protected]> * Added support for printing query/rule info in notification messages. Signed-off-by: AWSHurneyt <[email protected]> * Extracted out helper function. Signed-off-by: AWSHurneyt <[email protected]> * Extracted out helper function. Signed-off-by: AWSHurneyt <[email protected]> * Added support for printing document data in notification messages for document level monitors. Signed-off-by: AWSHurneyt <[email protected]> * Refactored logic after making AlertContext a separate class from Alert instead of inheriting/extending it in common utils. Signed-off-by: AWSHurneyt <[email protected]> * Moved AlertContext data model from common utils to alerting plugin. Signed-off-by: AWSHurneyt <[email protected]> * Fixed ktlint errors. Signed-off-by: AWSHurneyt <[email protected]> * Added additional unit tests. Signed-off-by: AWSHurneyt <[email protected]> * Extracted sample doc aggs logic into helper function. Added support for sorting sample docs based on metric aggregations. Signed-off-by: AWSHurneyt <[email protected]> * Extracted get sample doc logic into helper function. Added sorting for sample docs. Signed-off-by: AWSHurneyt <[email protected]> * Removed dev code. Signed-off-by: AWSHurneyt <[email protected]> * Fixed ktlint errors. Signed-off-by: AWSHurneyt <[email protected]> * Added comments based on PR feedback. Signed-off-by: AWSHurneyt <[email protected]> * Added logic to make mGet calls in batches. Signed-off-by: AWSHurneyt <[email protected]> --------- Signed-off-by: AWSHurneyt <[email protected]> (cherry picked from commit 5dc690c) Signed-off-by: AWSHurneyt <[email protected]> * Fixed imports. Signed-off-by: AWSHurneyt <[email protected]> --------- Signed-off-by: AWSHurneyt <[email protected]>
1 parent dde6aab commit ae046ee

14 files changed

+1388
-59
lines changed

alerting/src/main/kotlin/org/opensearch/alerting/BucketLevelMonitorRunner.kt

+119-6
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,12 @@ import org.opensearch.action.search.SearchRequest
1313
import org.opensearch.action.search.SearchResponse
1414
import org.opensearch.action.support.WriteRequest
1515
import org.opensearch.alerting.model.ActionRunResult
16+
import org.opensearch.alerting.model.AlertContext
1617
import org.opensearch.alerting.model.BucketLevelTriggerRunResult
1718
import org.opensearch.alerting.model.InputRunResults
1819
import org.opensearch.alerting.model.MonitorRunResult
1920
import org.opensearch.alerting.opensearchapi.InjectorContextElement
21+
import org.opensearch.alerting.opensearchapi.convertToMap
2022
import org.opensearch.alerting.opensearchapi.retry
2123
import org.opensearch.alerting.opensearchapi.suspendUntil
2224
import org.opensearch.alerting.opensearchapi.withClosableContext
@@ -25,7 +27,9 @@ import org.opensearch.alerting.util.defaultToPerExecutionAction
2527
import org.opensearch.alerting.util.getActionExecutionPolicy
2628
import org.opensearch.alerting.util.getBucketKeysHash
2729
import org.opensearch.alerting.util.getCombinedTriggerRunResult
30+
import org.opensearch.alerting.util.printsSampleDocData
2831
import org.opensearch.alerting.workflow.WorkflowRunContext
32+
import org.opensearch.client.Client
2933
import org.opensearch.common.xcontent.LoggingDeprecationHandler
3034
import org.opensearch.common.xcontent.XContentType
3135
import org.opensearch.commons.alerting.model.Alert
@@ -221,6 +225,8 @@ object BucketLevelMonitorRunner : MonitorRunner() {
221225
}
222226
}
223227

228+
// The alertSampleDocs map structure is Map<TriggerId, Map<BucketKeysHash, List<Alert>>>
229+
val alertSampleDocs = mutableMapOf<String, Map<String, List<Map<String, Any>>>>()
224230
for (trigger in monitor.triggers) {
225231
val alertsToUpdate = mutableSetOf<Alert>()
226232
val completedAlertsToUpdate = mutableSetOf<Alert>()
@@ -231,6 +237,32 @@ object BucketLevelMonitorRunner : MonitorRunner() {
231237
?: mutableListOf()
232238
// Update nextAlerts so the filtered DEDUPED Alerts are reflected for PER_ALERT Action execution
233239
nextAlerts[trigger.id]?.set(AlertCategory.DEDUPED, dedupedAlerts)
240+
241+
// Only collect sample docs for triggered triggers, and only when at least 1 action prints sample doc data.
242+
val isTriggered = !nextAlerts[trigger.id]?.get(AlertCategory.NEW).isNullOrEmpty()
243+
if (isTriggered && printsSampleDocData(trigger)) {
244+
try {
245+
val searchRequest = monitorCtx.inputService!!.getSearchRequest(
246+
monitor = monitor.copy(triggers = listOf(trigger)),
247+
searchInput = monitor.inputs[0] as SearchInput,
248+
periodStart = periodStart,
249+
periodEnd = periodEnd,
250+
prevResult = monitorResult.inputResults,
251+
matchingDocIdsPerIndex = null,
252+
returnSampleDocs = true
253+
)
254+
val sampleDocumentsByBucket = getSampleDocs(
255+
client = monitorCtx.client!!,
256+
monitorId = monitor.id,
257+
triggerId = trigger.id,
258+
searchRequest = searchRequest
259+
)
260+
alertSampleDocs[trigger.id] = sampleDocumentsByBucket
261+
} catch (e: Exception) {
262+
logger.error("Error retrieving sample documents for trigger {} of monitor {}.", trigger.id, monitor.id, e)
263+
}
264+
}
265+
234266
val newAlerts = nextAlerts[trigger.id]?.get(AlertCategory.NEW) ?: mutableListOf()
235267
val completedAlerts = nextAlerts[trigger.id]?.get(AlertCategory.COMPLETED) ?: mutableListOf()
236268

@@ -256,9 +288,12 @@ object BucketLevelMonitorRunner : MonitorRunner() {
256288
for (alertCategory in actionExecutionScope.actionableAlerts) {
257289
val alertsToExecuteActionsFor = nextAlerts[trigger.id]?.get(alertCategory) ?: mutableListOf()
258290
for (alert in alertsToExecuteActionsFor) {
291+
val alertContext = if (alertCategory != AlertCategory.NEW) AlertContext(alert = alert)
292+
else getAlertContext(alert = alert, alertSampleDocs = alertSampleDocs)
293+
259294
val actionCtx = getActionContextForAlertCategory(
260295
alertCategory,
261-
alert,
296+
alertContext,
262297
triggerCtx,
263298
monitorOrTriggerError
264299
)
@@ -292,7 +327,9 @@ object BucketLevelMonitorRunner : MonitorRunner() {
292327

293328
val actionCtx = triggerCtx.copy(
294329
dedupedAlerts = dedupedAlerts,
295-
newAlerts = newAlerts,
330+
newAlerts = newAlerts.map {
331+
getAlertContext(alert = it, alertSampleDocs = alertSampleDocs)
332+
},
296333
completedAlerts = completedAlerts,
297334
error = monitorResult.error ?: triggerResult.error
298335
)
@@ -487,17 +524,93 @@ object BucketLevelMonitorRunner : MonitorRunner() {
487524

488525
private fun getActionContextForAlertCategory(
489526
alertCategory: AlertCategory,
490-
alert: Alert,
527+
alertContext: AlertContext,
491528
ctx: BucketLevelTriggerExecutionContext,
492529
error: Exception?
493530
): BucketLevelTriggerExecutionContext {
494531
return when (alertCategory) {
495532
AlertCategory.DEDUPED ->
496-
ctx.copy(dedupedAlerts = listOf(alert), newAlerts = emptyList(), completedAlerts = emptyList(), error = error)
533+
ctx.copy(dedupedAlerts = listOf(alertContext.alert), newAlerts = emptyList(), completedAlerts = emptyList(), error = error)
497534
AlertCategory.NEW ->
498-
ctx.copy(dedupedAlerts = emptyList(), newAlerts = listOf(alert), completedAlerts = emptyList(), error = error)
535+
ctx.copy(dedupedAlerts = emptyList(), newAlerts = listOf(alertContext), completedAlerts = emptyList(), error = error)
499536
AlertCategory.COMPLETED ->
500-
ctx.copy(dedupedAlerts = emptyList(), newAlerts = emptyList(), completedAlerts = listOf(alert), error = error)
537+
ctx.copy(dedupedAlerts = emptyList(), newAlerts = emptyList(), completedAlerts = listOf(alertContext.alert), error = error)
538+
}
539+
}
540+
541+
private fun getAlertContext(
542+
alert: Alert,
543+
alertSampleDocs: Map<String, Map<String, List<Map<String, Any>>>>
544+
): AlertContext {
545+
val bucketKey = alert.aggregationResultBucket?.getBucketKeysHash()
546+
val sampleDocs = alertSampleDocs[alert.triggerId]?.get(bucketKey)
547+
return if (!bucketKey.isNullOrEmpty() && !sampleDocs.isNullOrEmpty()) {
548+
AlertContext(alert = alert, sampleDocs = sampleDocs)
549+
} else {
550+
logger.error(
551+
"Failed to retrieve sample documents for alert {} from trigger {} of monitor {} during execution {}.",
552+
alert.id,
553+
alert.triggerId,
554+
alert.monitorId,
555+
alert.executionId
556+
)
557+
AlertContext(alert = alert, sampleDocs = listOf())
501558
}
502559
}
560+
561+
/**
562+
* Executes the monitor's query with the addition of 2 top_hits aggregations that are used to return the top 5,
563+
* and bottom 5 documents for each bucket.
564+
*
565+
* @return Map<BucketKeysHash, List<Alert>>
566+
*/
567+
@Suppress("UNCHECKED_CAST")
568+
private suspend fun getSampleDocs(
569+
client: Client,
570+
monitorId: String,
571+
triggerId: String,
572+
searchRequest: SearchRequest
573+
): Map<String, List<Map<String, Any>>> {
574+
val sampleDocumentsByBucket = mutableMapOf<String, List<Map<String, Any>>>()
575+
val searchResponse: SearchResponse = client.suspendUntil { client.search(searchRequest, it) }
576+
val aggs = searchResponse.convertToMap().getOrDefault("aggregations", mapOf<String, Any>()) as Map<String, Any>
577+
val compositeAgg = aggs.getOrDefault("composite_agg", mapOf<String, Any>()) as Map<String, Any>
578+
val buckets = compositeAgg.getOrDefault("buckets", emptyList<Map<String, Any>>()) as List<Map<String, Any>>
579+
580+
buckets.forEach { bucket ->
581+
val bucketKey = getBucketKeysHash((bucket.getOrDefault("key", mapOf<String, String>()) as Map<String, String>).values.toList())
582+
if (bucketKey.isEmpty()) throw IllegalStateException("Cannot format bucket keys.")
583+
584+
val unwrappedTopHits = (bucket.getOrDefault("top_hits", mapOf<String, Any>()) as Map<String, Any>)
585+
.getOrDefault("hits", mapOf<String, Any>()) as Map<String, Any>
586+
val topHits = unwrappedTopHits.getOrDefault("hits", listOf<Map<String, Any>>()) as List<Map<String, Any>>
587+
588+
val unwrappedLowHits = (bucket.getOrDefault("low_hits", mapOf<String, Any>()) as Map<String, Any>)
589+
.getOrDefault("hits", mapOf<String, Any>()) as Map<String, Any>
590+
val lowHits = unwrappedLowHits.getOrDefault("hits", listOf<Map<String, Any>>()) as List<Map<String, Any>>
591+
592+
// Reversing the order of lowHits so allHits will be in descending order.
593+
val allHits = topHits + lowHits.reversed()
594+
595+
if (allHits.isEmpty()) {
596+
// We expect sample documents to be available for each bucket.
597+
logger.error("Sample documents not found for trigger {} of monitor {}.", triggerId, monitorId)
598+
}
599+
600+
// Removing duplicate hits. The top_hits, and low_hits results return a max of 5 docs each.
601+
// The same document could be present in both hit lists if there are fewer than 10 documents in the bucket of data.
602+
val uniqueHitIds = mutableSetOf<String>()
603+
val dedupedHits = mutableListOf<Map<String, Any>>()
604+
allHits.forEach { hit ->
605+
val hitId = hit["_id"] as String
606+
if (!uniqueHitIds.contains(hitId)) {
607+
uniqueHitIds.add(hitId)
608+
dedupedHits.add(hit)
609+
}
610+
}
611+
sampleDocumentsByBucket[bucketKey] = dedupedHits
612+
}
613+
614+
return sampleDocumentsByBucket
615+
}
503616
}

alerting/src/main/kotlin/org/opensearch/alerting/DocumentLevelMonitorRunner.kt

+77-6
Original file line numberDiff line numberDiff line change
@@ -13,22 +13,28 @@ import org.opensearch.action.admin.indices.refresh.RefreshAction
1313
import org.opensearch.action.admin.indices.refresh.RefreshRequest
1414
import org.opensearch.action.bulk.BulkRequest
1515
import org.opensearch.action.bulk.BulkResponse
16+
import org.opensearch.action.get.MultiGetItemResponse
17+
import org.opensearch.action.get.MultiGetRequest
1618
import org.opensearch.action.index.IndexRequest
1719
import org.opensearch.action.search.SearchAction
1820
import org.opensearch.action.search.SearchRequest
1921
import org.opensearch.action.search.SearchResponse
22+
import org.opensearch.alerting.model.AlertContext
2023
import org.opensearch.alerting.model.DocumentLevelTriggerRunResult
2124
import org.opensearch.alerting.model.IndexExecutionContext
2225
import org.opensearch.alerting.model.InputRunResults
2326
import org.opensearch.alerting.model.MonitorMetadata
2427
import org.opensearch.alerting.model.MonitorRunResult
2528
import org.opensearch.alerting.model.userErrorMessage
29+
import org.opensearch.alerting.opensearchapi.convertToMap
2630
import org.opensearch.alerting.opensearchapi.suspendUntil
2731
import org.opensearch.alerting.script.DocumentLevelTriggerExecutionContext
2832
import org.opensearch.alerting.util.AlertingException
2933
import org.opensearch.alerting.util.IndexUtils
3034
import org.opensearch.alerting.util.defaultToPerExecutionAction
3135
import org.opensearch.alerting.util.getActionExecutionPolicy
36+
import org.opensearch.alerting.util.parseSampleDocTags
37+
import org.opensearch.alerting.util.printsSampleDocData
3238
import org.opensearch.alerting.workflow.WorkflowRunContext
3339
import org.opensearch.client.node.NodeClient
3440
import org.opensearch.cluster.metadata.IndexMetadata
@@ -64,6 +70,7 @@ import org.opensearch.percolator.PercolateQueryBuilderExt
6470
import org.opensearch.search.SearchHit
6571
import org.opensearch.search.SearchHits
6672
import org.opensearch.search.builder.SearchSourceBuilder
73+
import org.opensearch.search.fetch.subphase.FetchSourceContext
6774
import org.opensearch.search.sort.SortOrder
6875
import java.io.IOException
6976
import java.time.Instant
@@ -83,6 +90,9 @@ class DocumentLevelMonitorRunner : MonitorRunner() {
8390
* Docs are fetched from the source index per shard and transformed.*/
8491
val transformedDocs = mutableListOf<Pair<String, TransformedDocDto>>()
8592

93+
// Maps a finding ID to the related document.
94+
private val findingIdToDocSource = mutableMapOf<String, MultiGetItemResponse>()
95+
8696
override suspend fun runMonitor(
8797
monitor: Monitor,
8898
monitorCtx: MonitorRunnerExecutionContext,
@@ -95,6 +105,7 @@ class DocumentLevelMonitorRunner : MonitorRunner() {
95105
logger.debug("Document-level-monitor is running ...")
96106
val isTempMonitor = dryrun || monitor.id == Monitor.NO_ID
97107
var monitorResult = MonitorRunResult<DocumentLevelTriggerRunResult>(monitor.name, periodStart, periodEnd)
108+
monitorCtx.findingsToTriggeredQueries = mutableMapOf()
98109

99110
try {
100111
monitorCtx.alertIndices!!.createOrUpdateAlertIndex(monitor.dataSources)
@@ -455,7 +466,15 @@ class DocumentLevelMonitorRunner : MonitorRunner() {
455466
error = monitorResult.error ?: triggerResult.error
456467
)
457468

469+
if (printsSampleDocData(trigger) && triggerFindingDocPairs.isNotEmpty())
470+
getDocSources(
471+
findingToDocPairs = findingToDocPairs,
472+
monitorCtx = monitorCtx,
473+
monitor = monitor
474+
)
475+
458476
val alerts = mutableListOf<Alert>()
477+
val alertContexts = mutableListOf<AlertContext>()
459478
triggerFindingDocPairs.forEach {
460479
val alert = monitorCtx.alertService!!.composeDocLevelAlert(
461480
listOf(it.first),
@@ -466,6 +485,18 @@ class DocumentLevelMonitorRunner : MonitorRunner() {
466485
workflorwRunContext = workflowRunContext
467486
)
468487
alerts.add(alert)
488+
489+
val docSource = findingIdToDocSource[alert.findingIds.first()]?.response?.convertToMap()
490+
491+
alertContexts.add(
492+
AlertContext(
493+
alert = alert,
494+
associatedQueries = alert.findingIds.flatMap { findingId ->
495+
monitorCtx.findingsToTriggeredQueries?.getOrDefault(findingId, emptyList()) ?: emptyList()
496+
},
497+
sampleDocs = listOfNotNull(docSource)
498+
)
499+
)
469500
}
470501

471502
val shouldDefaultToPerExecution = defaultToPerExecutionAction(
@@ -479,13 +510,13 @@ class DocumentLevelMonitorRunner : MonitorRunner() {
479510
for (action in trigger.actions) {
480511
val actionExecutionScope = action.getActionExecutionPolicy(monitor)!!.actionExecutionScope
481512
if (actionExecutionScope is PerAlertActionScope && !shouldDefaultToPerExecution) {
482-
for (alert in alerts) {
483-
val actionResults = this.runAction(action, actionCtx.copy(alerts = listOf(alert)), monitorCtx, monitor, dryrun)
484-
triggerResult.actionResultsMap.getOrPut(alert.id) { mutableMapOf() }
485-
triggerResult.actionResultsMap[alert.id]?.set(action.id, actionResults)
513+
for (alertContext in alertContexts) {
514+
val actionResults = this.runAction(action, actionCtx.copy(alerts = listOf(alertContext)), monitorCtx, monitor, dryrun)
515+
triggerResult.actionResultsMap.getOrPut(alertContext.alert.id) { mutableMapOf() }
516+
triggerResult.actionResultsMap[alertContext.alert.id]?.set(action.id, actionResults)
486517
}
487-
} else if (alerts.isNotEmpty()) {
488-
val actionResults = this.runAction(action, actionCtx.copy(alerts = alerts), monitorCtx, monitor, dryrun)
518+
} else if (alertContexts.isNotEmpty()) {
519+
val actionResults = this.runAction(action, actionCtx.copy(alerts = alertContexts), monitorCtx, monitor, dryrun)
489520
for (alert in alerts) {
490521
triggerResult.actionResultsMap.getOrPut(alert.id) { mutableMapOf() }
491522
triggerResult.actionResultsMap[alert.id]?.set(action.id, actionResults)
@@ -532,6 +563,7 @@ class DocumentLevelMonitorRunner : MonitorRunner() {
532563
val findingDocPairs = mutableListOf<Pair<String, String>>()
533564
val findings = mutableListOf<Finding>()
534565
val indexRequests = mutableListOf<IndexRequest>()
566+
val findingsToTriggeredQueries = mutableMapOf<String, List<DocLevelQuery>>()
535567

536568
docsToQueries.forEach {
537569
val triggeredQueries = it.value.map { queryId -> idQueryMap[queryId]!! }
@@ -552,6 +584,7 @@ class DocumentLevelMonitorRunner : MonitorRunner() {
552584
)
553585
findingDocPairs.add(Pair(finding.id, it.key))
554586
findings.add(finding)
587+
findingsToTriggeredQueries[finding.id] = triggeredQueries
555588

556589
val findingStr =
557590
finding.toXContent(XContentBuilder.builder(XContentType.JSON.xContent()), ToXContent.EMPTY_PARAMS)
@@ -578,6 +611,10 @@ class DocumentLevelMonitorRunner : MonitorRunner() {
578611
// suppress exception
579612
logger.error("Optional finding callback failed", e)
580613
}
614+
615+
if (monitorCtx.findingsToTriggeredQueries == null) monitorCtx.findingsToTriggeredQueries = findingsToTriggeredQueries
616+
else monitorCtx.findingsToTriggeredQueries = monitorCtx.findingsToTriggeredQueries!! + findingsToTriggeredQueries
617+
581618
return findingDocPairs
582619
}
583620

@@ -1047,6 +1084,40 @@ class DocumentLevelMonitorRunner : MonitorRunner() {
10471084
return numDocs >= maxNumDocsThreshold
10481085
}
10491086

1087+
/**
1088+
* Performs an mGet request to retrieve the documents associated with findings.
1089+
*
1090+
* When possible, this will only retrieve the document fields that are specifically
1091+
* referenced for printing in the mustache template.
1092+
*/
1093+
private suspend fun getDocSources(
1094+
findingToDocPairs: List<Pair<String, String>>,
1095+
monitorCtx: MonitorRunnerExecutionContext,
1096+
monitor: Monitor
1097+
) {
1098+
val docFieldTags = parseSampleDocTags(monitor.triggers)
1099+
val request = MultiGetRequest()
1100+
1101+
// Perform mGet request in batches.
1102+
findingToDocPairs.chunked(monitorCtx.findingsIndexBatchSize).forEach { batch ->
1103+
batch.forEach { (findingId, docIdAndIndex) ->
1104+
val docIdAndIndexSplit = docIdAndIndex.split("|")
1105+
val docId = docIdAndIndexSplit[0]
1106+
val concreteIndex = docIdAndIndexSplit[1]
1107+
if (findingId.isNotEmpty() && docId.isNotEmpty() && concreteIndex.isNotEmpty()) {
1108+
val docItem = MultiGetRequest.Item(concreteIndex, docId)
1109+
if (docFieldTags.isNotEmpty())
1110+
docItem.fetchSourceContext(FetchSourceContext(true, docFieldTags.toTypedArray(), emptyArray()))
1111+
request.add(docItem)
1112+
}
1113+
val response = monitorCtx.client!!.suspendUntil { monitorCtx.client!!.multiGet(request, it) }
1114+
response.responses.forEach { item ->
1115+
findingIdToDocSource[findingId] = item
1116+
}
1117+
}
1118+
}
1119+
}
1120+
10501121
/**
10511122
* POJO holding information about each doc's concrete index, id, input index pattern/alias/datastream name
10521123
* and doc source. A list of these POJOs would be passed to percolate query execution logic.

0 commit comments

Comments
 (0)