Skip to content
This repository was archived by the owner on Aug 2, 2022. It is now read-only.

Commit b86bf66

Browse files
committed
Merge branch 'master' into issue-39
2 parents af41a34 + 56c49d9 commit b86bf66

File tree

13 files changed

+289
-240
lines changed

13 files changed

+289
-240
lines changed

alerting/build.gradle

+4
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,10 @@ configurations.all {
4949
force "commons-logging:commons-logging:${versions.commonslogging}"
5050
force "org.apache.httpcomponents:httpcore:${versions.httpcore}"
5151
force "commons-codec:commons-codec:${versions.commonscodec}"
52+
53+
// This is required because kotlin coroutines core-1.1.1 still requires kotin stdlib 1.3.20 and we're using 1.3.21
54+
force "org.jetbrains.kotlin:kotlin-stdlib:${kotlin_version}"
55+
force "org.jetbrains.kotlin:kotlin-stdlib-common:${kotlin_version}"
5256
}
5357
}
5458

alerting/src/main/kotlin/com/amazon/opendistroforelasticsearch/alerting/AlertingPlugin.kt

+7-11
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,16 @@
1414
*/
1515
package com.amazon.opendistroforelasticsearch.alerting
1616

17+
import com.amazon.opendistroforelasticsearch.alerting.alerts.AlertIndices
1718
import com.amazon.opendistroforelasticsearch.alerting.core.JobSweeper
1819
import com.amazon.opendistroforelasticsearch.alerting.core.ScheduledJobIndices
1920
import com.amazon.opendistroforelasticsearch.alerting.core.action.node.ScheduledJobsStatsAction
2021
import com.amazon.opendistroforelasticsearch.alerting.core.action.node.ScheduledJobsStatsTransportAction
21-
import com.amazon.opendistroforelasticsearch.alerting.alerts.AlertIndices
22+
import com.amazon.opendistroforelasticsearch.alerting.core.model.ScheduledJob
23+
import com.amazon.opendistroforelasticsearch.alerting.core.model.SearchInput
24+
import com.amazon.opendistroforelasticsearch.alerting.core.resthandler.RestScheduledJobStatsHandler
25+
import com.amazon.opendistroforelasticsearch.alerting.core.schedule.JobScheduler
26+
import com.amazon.opendistroforelasticsearch.alerting.core.settings.ScheduledJobSettings
2227
import com.amazon.opendistroforelasticsearch.alerting.model.Monitor
2328
import com.amazon.opendistroforelasticsearch.alerting.resthandler.RestAcknowledgeAlertAction
2429
import com.amazon.opendistroforelasticsearch.alerting.resthandler.RestDeleteDestinationAction
@@ -30,11 +35,6 @@ import com.amazon.opendistroforelasticsearch.alerting.resthandler.RestIndexMonit
3035
import com.amazon.opendistroforelasticsearch.alerting.resthandler.RestSearchMonitorAction
3136
import com.amazon.opendistroforelasticsearch.alerting.script.TriggerScript
3237
import com.amazon.opendistroforelasticsearch.alerting.settings.AlertingSettings
33-
import com.amazon.opendistroforelasticsearch.alerting.core.model.ScheduledJob
34-
import com.amazon.opendistroforelasticsearch.alerting.core.model.SearchInput
35-
import com.amazon.opendistroforelasticsearch.alerting.core.resthandler.RestScheduledJobStatsHandler
36-
import com.amazon.opendistroforelasticsearch.alerting.core.schedule.JobScheduler
37-
import com.amazon.opendistroforelasticsearch.alerting.core.settings.ScheduledJobSettings
3838
import org.elasticsearch.action.ActionRequest
3939
import org.elasticsearch.action.ActionResponse
4040
import org.elasticsearch.client.Client
@@ -61,10 +61,10 @@ import org.elasticsearch.rest.RestController
6161
import org.elasticsearch.rest.RestHandler
6262
import org.elasticsearch.script.ScriptContext
6363
import org.elasticsearch.script.ScriptService
64-
import org.elasticsearch.threadpool.ExecutorBuilder
6564
import org.elasticsearch.threadpool.ThreadPool
6665
import org.elasticsearch.watcher.ResourceWatcherService
6766
import java.util.function.Supplier
67+
6868
/**
6969
* Entry point of the OpenDistro for Elasticsearch alerting plugin
7070
* This class initializes the [RestGetMonitorAction], [RestDeleteMonitorAction], [RestIndexMonitorAction] rest handlers.
@@ -175,8 +175,4 @@ internal class AlertingPlugin : PainlessExtension, ActionPlugin, ScriptPlugin, P
175175
override fun getContexts(): List<ScriptContext<*>> {
176176
return listOf(TriggerScript.CONTEXT)
177177
}
178-
179-
override fun getExecutorBuilders(settings: Settings): List<ExecutorBuilder<*>> {
180-
return listOf(MonitorRunner.executorBuilder(settings))
181-
}
182178
}

alerting/src/main/kotlin/com/amazon/opendistroforelasticsearch/alerting/MonitorRunner.kt

+102-105
Large diffs are not rendered by default.

alerting/src/main/kotlin/com/amazon/opendistroforelasticsearch/alerting/alerts/AlertIndices.kt

+12-6
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,13 @@ import com.amazon.opendistroforelasticsearch.alerting.settings.AlertingSettings.
2323
import com.amazon.opendistroforelasticsearch.alerting.settings.AlertingSettings.Companion.ALERT_HISTORY_ROLLOVER_PERIOD
2424
import com.amazon.opendistroforelasticsearch.alerting.settings.AlertingSettings.Companion.REQUEST_TIMEOUT
2525
import org.apache.logging.log4j.LogManager
26+
import com.amazon.opendistroforelasticsearch.alerting.elasticapi.suspendUntil
2627
import org.elasticsearch.ResourceAlreadyExistsException
2728
import org.elasticsearch.action.admin.indices.alias.Alias
2829
import org.elasticsearch.action.admin.indices.create.CreateIndexRequest
30+
import org.elasticsearch.action.admin.indices.create.CreateIndexResponse
2931
import org.elasticsearch.action.admin.indices.exists.indices.IndicesExistsRequest
32+
import org.elasticsearch.action.admin.indices.exists.indices.IndicesExistsResponse
3033
import org.elasticsearch.action.admin.indices.rollover.RolloverRequest
3134
import org.elasticsearch.client.IndicesAdminClient
3235
import org.elasticsearch.cluster.ClusterChangedEvent
@@ -148,31 +151,34 @@ class AlertIndices(
148151
return alertIndexInitialized && historyIndexInitialized
149152
}
150153

151-
fun createAlertIndex() {
154+
suspend fun createAlertIndex() {
152155
if (!alertIndexInitialized) {
153156
alertIndexInitialized = createIndex(ALERT_INDEX)
154157
}
155158
alertIndexInitialized
156159
}
157160

158-
fun createInitialHistoryIndex() {
161+
suspend fun createInitialHistoryIndex() {
159162
if (!historyIndexInitialized) {
160163
historyIndexInitialized = createIndex(HISTORY_INDEX_PATTERN, HISTORY_WRITE_INDEX)
161164
}
162165
historyIndexInitialized
163166
}
164167

165-
private fun createIndex(index: String, alias: String? = null): Boolean {
168+
private suspend fun createIndex(index: String, alias: String? = null): Boolean {
166169
// This should be a fast check of local cluster state. Should be exceedingly rare that the local cluster
167170
// state does not contain the index and multiple nodes concurrently try to create the index.
168171
// If it does happen that error is handled we catch the ResourceAlreadyExistsException
169-
val exists = client.exists(IndicesExistsRequest(index).local(true)).actionGet(requestTimeout).isExists
170-
if (exists) return true
172+
val existsResponse: IndicesExistsResponse = client.suspendUntil {
173+
client.exists(IndicesExistsRequest(index).local(true), it)
174+
}
175+
if (existsResponse.isExists) return true
171176

172177
val request = CreateIndexRequest(index).mapping(MAPPING_TYPE, alertMapping(), XContentType.JSON)
173178
if (alias != null) request.alias(Alias(alias))
174179
return try {
175-
client.create(request).actionGet(requestTimeout).isAcknowledged
180+
val createIndexResponse: CreateIndexResponse = client.suspendUntil { client.create(request, it) }
181+
createIndexResponse.isAcknowledged
176182
} catch (e: ResourceAlreadyExistsException) {
177183
true
178184
}

alerting/src/main/kotlin/com/amazon/opendistroforelasticsearch/alerting/alerts/AlertMover.kt

+56-107
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,11 @@
1515

1616
package com.amazon.opendistroforelasticsearch.alerting.alerts
1717

18-
import com.amazon.opendistroforelasticsearch.alerting.MonitorRunner
1918
import com.amazon.opendistroforelasticsearch.alerting.alerts.AlertIndices.Companion.ALERT_INDEX
2019
import com.amazon.opendistroforelasticsearch.alerting.alerts.AlertIndices.Companion.HISTORY_WRITE_INDEX
2120
import com.amazon.opendistroforelasticsearch.alerting.model.Alert
2221
import com.amazon.opendistroforelasticsearch.alerting.model.Monitor
23-
import org.apache.logging.log4j.Logger
24-
import org.elasticsearch.action.ActionListener
22+
import com.amazon.opendistroforelasticsearch.alerting.elasticapi.suspendUntil
2523
import org.elasticsearch.action.bulk.BulkRequest
2624
import org.elasticsearch.action.bulk.BulkResponse
2725
import org.elasticsearch.action.delete.DeleteRequest
@@ -30,7 +28,6 @@ import org.elasticsearch.action.search.SearchRequest
3028
import org.elasticsearch.action.search.SearchResponse
3129
import org.elasticsearch.client.Client
3230
import org.elasticsearch.common.bytes.BytesReference
33-
import org.elasticsearch.common.unit.TimeValue
3431
import org.elasticsearch.common.xcontent.LoggingDeprecationHandler
3532
import org.elasticsearch.common.xcontent.NamedXContentRegistry
3633
import org.elasticsearch.common.xcontent.ToXContent
@@ -39,12 +36,13 @@ import org.elasticsearch.common.xcontent.XContentHelper
3936
import org.elasticsearch.common.xcontent.XContentParser
4037
import org.elasticsearch.common.xcontent.XContentParserUtils
4138
import org.elasticsearch.common.xcontent.XContentType
39+
import org.elasticsearch.index.VersionType
4240
import org.elasticsearch.index.query.QueryBuilders
41+
import org.elasticsearch.rest.RestStatus
4342
import org.elasticsearch.search.builder.SearchSourceBuilder
44-
import org.elasticsearch.threadpool.ThreadPool
4543

4644
/**
47-
* Class to manage the moving of active alerts when a monitor or trigger is deleted.
45+
* Moves defunct active alerts to the alert history index when the corresponding monitor or trigger is deleted.
4846
*
4947
* The logic for moving alerts consists of:
5048
* 1. Find active alerts:
@@ -54,114 +52,65 @@ import org.elasticsearch.threadpool.ThreadPool
5452
* 3. Delete alerts from [ALERT_INDEX]
5553
* 4. Schedule a retry if there were any failures
5654
*/
57-
class AlertMover(
58-
private val client: Client,
59-
private val threadPool: ThreadPool,
60-
private val monitorRunner: MonitorRunner,
61-
private val alertIndices: AlertIndices,
62-
private val backoff: Iterator<TimeValue>,
63-
private val logger: Logger,
64-
private val monitorId: String,
65-
private val monitor: Monitor? = null
66-
) {
55+
suspend fun moveAlerts(client: Client, monitorId: String, monitor: Monitor? = null) {
56+
val boolQuery = QueryBuilders.boolQuery()
57+
.filter(QueryBuilders.termQuery(Alert.MONITOR_ID_FIELD, monitorId))
6758

68-
private var hasFailures: Boolean = false
69-
70-
fun run() {
71-
if (alertIndices.isInitialized()) {
72-
findActiveAlerts()
73-
}
74-
}
75-
76-
private fun findActiveAlerts() {
77-
val boolQuery = QueryBuilders.boolQuery()
78-
.filter(QueryBuilders.termQuery(Alert.MONITOR_ID_FIELD, monitorId))
79-
80-
if (monitor != null) {
81-
boolQuery.mustNot(QueryBuilders.termsQuery(Alert.TRIGGER_ID_FIELD, monitor.triggers.map { it.id }))
82-
}
83-
84-
val activeAlertsQuery = SearchSourceBuilder.searchSource()
85-
.query(boolQuery)
86-
.version(true)
87-
88-
val activeAlertsRequest = SearchRequest(AlertIndices.ALERT_INDEX)
89-
.routing(monitorId)
90-
.source(activeAlertsQuery)
91-
client.search(activeAlertsRequest, ActionListener.wrap(::onSearchResponse, ::onFailure))
92-
}
93-
94-
private fun onSearchResponse(response: SearchResponse) {
95-
// If no alerts are found, simply return
96-
if (response.hits.totalHits.value == 0L) return
97-
val indexRequests = response.hits.map { hit ->
98-
IndexRequest(AlertIndices.HISTORY_WRITE_INDEX)
99-
.routing(monitorId)
100-
.source(Alert.parse(alertContentParser(hit.sourceRef), hit.id, hit.version)
101-
.copy(state = Alert.State.DELETED)
102-
.toXContent(XContentFactory.jsonBuilder(), ToXContent.EMPTY_PARAMS))
103-
.setIfSeqNo(hit.seqNo)
104-
.setIfPrimaryTerm(hit.primaryTerm)
105-
.id(hit.id)
106-
}
107-
val copyRequest = BulkRequest().add(indexRequests)
108-
client.bulk(copyRequest, ActionListener.wrap(::onCopyResponse, ::onFailure))
59+
if (monitor != null) {
60+
boolQuery.mustNot(QueryBuilders.termsQuery(Alert.TRIGGER_ID_FIELD, monitor.triggers.map { it.id }))
10961
}
11062

111-
private fun onCopyResponse(response: BulkResponse) {
112-
val deleteRequests = response.items.filterNot { it.isFailed }.map {
113-
DeleteRequest(AlertIndices.ALERT_INDEX, it.id)
114-
.routing(monitorId)
115-
}
116-
if (response.hasFailures()) {
117-
hasFailures = true
118-
for (it in response.items) {
119-
logger.error("Failed to move deleted alert to alert history index: ${it.id}",
120-
it.failure.cause)
121-
}
122-
}
123-
124-
val bulkRequest = BulkRequest().add(deleteRequests)
125-
client.bulk(bulkRequest, ActionListener.wrap(::onDeleteResponse, ::onFailure))
63+
val activeAlertsQuery = SearchSourceBuilder.searchSource()
64+
.query(boolQuery)
65+
.version(true)
66+
67+
val activeAlertsRequest = SearchRequest(AlertIndices.ALERT_INDEX)
68+
.routing(monitorId)
69+
.source(activeAlertsQuery)
70+
val response: SearchResponse = client.suspendUntil { search(activeAlertsRequest, it) }
71+
72+
// If no alerts are found, simply return
73+
if (response.hits.totalHits.value == 0L) return
74+
val indexRequests = response.hits.map { hit ->
75+
IndexRequest(AlertIndices.HISTORY_WRITE_INDEX)
76+
.routing(monitorId)
77+
.source(Alert.parse(alertContentParser(hit.sourceRef), hit.id, hit.version)
78+
.copy(state = Alert.State.DELETED)
79+
.toXContent(XContentFactory.jsonBuilder(), ToXContent.EMPTY_PARAMS))
80+
.version(hit.version)
81+
.versionType(VersionType.EXTERNAL_GTE)
82+
.id(hit.id)
12683
}
127-
128-
private fun onDeleteResponse(response: BulkResponse) {
129-
if (response.hasFailures()) {
130-
hasFailures = true
131-
for (it in response.items) {
132-
logger.error("Failed to delete active alert from alert index: ${it.id}",
133-
it.failure.cause)
134-
}
135-
}
136-
if (hasFailures) reschedule()
84+
val copyRequest = BulkRequest().add(indexRequests)
85+
val copyResponse: BulkResponse = client.suspendUntil { bulk(copyRequest, it) }
86+
87+
val deleteRequests = copyResponse.items.filterNot { it.isFailed }.map {
88+
DeleteRequest(AlertIndices.ALERT_INDEX, it.id)
89+
.routing(monitorId)
90+
.version(it.version)
91+
.versionType(VersionType.EXTERNAL_GTE)
13792
}
138-
139-
private fun onFailure(e: Exception) {
140-
logger.error("Failed to move alerts for ${monitorIdTriggerIdsTuple()}", e)
141-
reschedule()
93+
val deleteResponse: BulkResponse = client.suspendUntil { bulk(BulkRequest().add(deleteRequests), it) }
94+
95+
if (copyResponse.hasFailures()) {
96+
val retryCause = copyResponse.items.filter { it.isFailed }
97+
.firstOrNull { it.status() == RestStatus.TOO_MANY_REQUESTS }
98+
?.failure?.cause
99+
throw RuntimeException("Failed to copy alerts for [$monitorId, ${monitor?.triggers?.map { it.id }}]: " +
100+
copyResponse.buildFailureMessage(), retryCause)
142101
}
143-
144-
private fun reschedule() {
145-
if (backoff.hasNext()) {
146-
logger.warn("Rescheduling AlertMover due to failure for ${monitorIdTriggerIdsTuple()}")
147-
val wait = backoff.next()
148-
val runnable = Runnable {
149-
monitorRunner.rescheduleAlertMover(monitorId, monitor, backoff)
150-
}
151-
threadPool.schedule(runnable, wait, ThreadPool.Names.SAME)
152-
} else {
153-
logger.warn("Retries exhausted for ${monitorIdTriggerIdsTuple()}")
154-
}
102+
if (deleteResponse.hasFailures()) {
103+
val retryCause = deleteResponse.items.filter { it.isFailed }
104+
.firstOrNull { it.status() == RestStatus.TOO_MANY_REQUESTS }
105+
?.failure?.cause
106+
throw RuntimeException("Failed to delete alerts for [$monitorId, ${monitor?.triggers?.map { it.id }}]: " +
107+
deleteResponse.buildFailureMessage(), retryCause)
155108
}
109+
}
156110

157-
private fun alertContentParser(bytesReference: BytesReference): XContentParser {
158-
val xcp = XContentHelper.createParser(NamedXContentRegistry.EMPTY, LoggingDeprecationHandler.INSTANCE,
111+
private fun alertContentParser(bytesReference: BytesReference): XContentParser {
112+
val xcp = XContentHelper.createParser(NamedXContentRegistry.EMPTY, LoggingDeprecationHandler.INSTANCE,
159113
bytesReference, XContentType.JSON)
160-
XContentParserUtils.ensureExpectedToken(XContentParser.Token.START_OBJECT, xcp.nextToken(), xcp::getTokenLocation)
161-
return xcp
162-
}
163-
164-
private fun monitorIdTriggerIdsTuple(): String {
165-
return "[$monitorId, ${monitor?.triggers?.map { it.id }}]"
166-
}
114+
XContentParserUtils.ensureExpectedToken(XContentParser.Token.START_OBJECT, xcp.nextToken(), xcp::getTokenLocation)
115+
return xcp
167116
}

alerting/src/main/kotlin/com/amazon/opendistroforelasticsearch/alerting/model/action/Action.kt

+19-6
Original file line numberDiff line numberDiff line change
@@ -46,15 +46,19 @@ data class Action(
4646
}
4747

4848
override fun toXContent(builder: XContentBuilder, params: ToXContent.Params): XContentBuilder {
49-
return builder.startObject()
49+
val xContentBuilder = builder.startObject()
5050
.field(ID_FIELD, id)
5151
.field(NAME_FIELD, name)
5252
.field(DESTINATION_ID_FIELD, destinationId)
53-
.field(SUBJECT_TEMPLATE_FIELD, subjectTemplate)
5453
.field(MESSAGE_TEMPLATE_FIELD, messageTemplate)
5554
.field(THROTTLE_ENABLED_FIELD, throttleEnabled)
56-
.field(THROTTLE_FIELD, throttle)
57-
.endObject()
55+
if (subjectTemplate != null) {
56+
xContentBuilder.field(SUBJECT_TEMPLATE_FIELD, subjectTemplate)
57+
}
58+
if (throttle != null) {
59+
xContentBuilder.field(THROTTLE_FIELD, throttle)
60+
}
61+
return xContentBuilder.endObject()
5862
}
5963

6064
fun asTemplateArg(): Map<String, Any> {
@@ -93,9 +97,14 @@ data class Action(
9397
ID_FIELD -> id = xcp.text()
9498
NAME_FIELD -> name = xcp.textOrNull()
9599
DESTINATION_ID_FIELD -> destinationId = xcp.textOrNull()
96-
SUBJECT_TEMPLATE_FIELD -> subjectTemplate = Script.parse(xcp, Script.DEFAULT_TEMPLATE_LANG)
100+
SUBJECT_TEMPLATE_FIELD -> {
101+
subjectTemplate = if (xcp.currentToken() == XContentParser.Token.VALUE_NULL) null else
102+
Script.parse(xcp, Script.DEFAULT_TEMPLATE_LANG)
103+
}
97104
MESSAGE_TEMPLATE_FIELD -> messageTemplate = Script.parse(xcp, Script.DEFAULT_TEMPLATE_LANG)
98-
THROTTLE_FIELD -> throttle = Throttle.parse(xcp)
105+
THROTTLE_FIELD -> {
106+
throttle = if (xcp.currentToken() == XContentParser.Token.VALUE_NULL) null else Throttle.parse(xcp)
107+
}
99108
THROTTLE_ENABLED_FIELD -> {
100109
throttleEnabled = xcp.booleanValue()
101110
}
@@ -106,6 +115,10 @@ data class Action(
106115
}
107116
}
108117

118+
if (throttleEnabled) {
119+
requireNotNull(throttle, { "Action throttle enabled but not set throttle value" })
120+
}
121+
109122
return Action(requireNotNull(name) { "Action name is null" },
110123
requireNotNull(destinationId) { "Destination id is null" },
111124
subjectTemplate,

alerting/src/main/kotlin/com/amazon/opendistroforelasticsearch/alerting/model/action/Throttle.kt

+1-1
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ data class Throttle(
4646
@Throws(IOException::class)
4747
fun parse(xcp: XContentParser): Throttle {
4848
var value: Int = 0
49-
var unit: ChronoUnit? = null
49+
var unit: ChronoUnit = ChronoUnit.MINUTES // only support MINUTES throttle unit currently
5050

5151
XContentParserUtils.ensureExpectedToken(XContentParser.Token.START_OBJECT, xcp.currentToken(), xcp::getTokenLocation)
5252
while (xcp.nextToken() != XContentParser.Token.END_OBJECT) {

alerting/src/main/kotlin/com/amazon/opendistroforelasticsearch/alerting/model/destination/Destination.kt

+1
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,7 @@ data class Destination(
132132
}
133133
}
134134

135+
@Throws(IOException::class)
135136
fun publish(compiledSubject: String?, compiledMessage: String): String {
136137
val destinationMessage: BaseMessage
137138
when (type) {

0 commit comments

Comments
 (0)