Skip to content

Commit 70eabe5

Browse files
sohamiSorabh Hamirwasia
and
Sorabh Hamirwasia
committed
Part 1: Support for cancel_after_timeinterval parameter in search and msearch request (#986)
* Part 1: Support for cancel_after_timeinterval parameter in search and msearch request This commit introduces the new request level parameter to configure the timeout interval after which a search request will be cancelled. For msearch request the parameter is supported both at parent request and at sub child search requests. If it is provided at parent level and child search request doesn't have it then the parent level value is set at such child request. The parent level msearch is not used to cancel the parent request as it may be tricky to come up with correct value in cases when child search request can have different runtimes TEST: Added test for ser/de with new parameter Signed-off-by: Sorabh Hamirwasia <[email protected]> * Part 2: Support for cancel_after_timeinterval parameter in search and msearch request This commit adds the handling of the new request level parameter and schedule cancellation task. It also adds a cluster setting to set a global cancellation timeout for search request which will be used in absence of request level timeout. TEST: Added new tests in SearchCancellationIT Signed-off-by: Sorabh Hamirwasia <[email protected]> * Address Review feedback for Part 1 Signed-off-by: Sorabh Hamirwasia <[email protected]> * Address review feedback for Part 2 Signed-off-by: Sorabh Hamirwasia <[email protected]> * Update CancellableTask to remove the cancelOnTimeout boolean flag Signed-off-by: Sorabh Hamirwasia <[email protected]> * Replace search.cancellation.timeout cluster setting with search.enforce_server.timeout.cancellation to control if cluster level cancel_after_time_interval should take precedence over request level cancel_after_time_interval value Signed-off-by: Sorabh Hamirwasia <[email protected]> * Removing the search.enforce_server.timeout.cancellation cluster setting and just keeping search.cancel_after_time_interval setting with request level parameter taking the precedence. Signed-off-by: Sorabh Hamirwasia <[email protected]> Co-authored-by: Sorabh Hamirwasia <[email protected]>
1 parent 987bfcf commit 70eabe5

File tree

17 files changed

+590
-17
lines changed

17 files changed

+590
-17
lines changed

server/src/internalClusterTest/java/org/opensearch/search/SearchCancellationIT.java

+282-3
Large diffs are not rendered by default.

server/src/main/java/org/opensearch/action/search/MultiSearchRequest.java

+7
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@
6666
import static org.opensearch.common.xcontent.support.XContentMapValues.nodeBooleanValue;
6767
import static org.opensearch.common.xcontent.support.XContentMapValues.nodeStringArrayValue;
6868
import static org.opensearch.common.xcontent.support.XContentMapValues.nodeStringValue;
69+
import static org.opensearch.common.xcontent.support.XContentMapValues.nodeTimeValue;
6970

7071
/**
7172
* A multi search API request.
@@ -272,6 +273,9 @@ public static void readMultiLineFormat(BytesReference data,
272273
allowNoIndices = value;
273274
} else if ("ignore_throttled".equals(entry.getKey()) || "ignoreThrottled".equals(entry.getKey())) {
274275
ignoreThrottled = value;
276+
} else if ("cancel_after_time_interval".equals(entry.getKey()) ||
277+
"cancelAfterTimeInterval".equals(entry.getKey())) {
278+
searchRequest.setCancelAfterTimeInterval(nodeTimeValue(value, null));
275279
} else {
276280
throw new IllegalArgumentException("key [" + entry.getKey() + "] is not supported in the metadata section");
277281
}
@@ -362,6 +366,9 @@ public static void writeSearchRequestParams(SearchRequest request, XContentBuild
362366
if (request.allowPartialSearchResults() != null) {
363367
xContentBuilder.field("allow_partial_search_results", request.allowPartialSearchResults());
364368
}
369+
if (request.getCancelAfterTimeInterval() != null) {
370+
xContentBuilder.field("cancel_after_time_interval", request.getCancelAfterTimeInterval().getStringRep());
371+
}
365372
xContentBuilder.endObject();
366373
}
367374

server/src/main/java/org/opensearch/action/search/SearchRequest.java

+26-4
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
package org.opensearch.action.search;
3434

3535
import org.opensearch.LegacyESVersion;
36+
import org.opensearch.Version;
3637
import org.opensearch.action.ActionRequest;
3738
import org.opensearch.action.ActionRequestValidationException;
3839
import org.opensearch.action.IndicesRequest;
@@ -114,6 +115,8 @@ public class SearchRequest extends ActionRequest implements IndicesRequest.Repla
114115

115116
private IndicesOptions indicesOptions = DEFAULT_INDICES_OPTIONS;
116117

118+
private TimeValue cancelAfterTimeInterval;
119+
117120
public SearchRequest() {
118121
this.localClusterAlias = null;
119122
this.absoluteStartMillis = DEFAULT_ABSOLUTE_START_MILLIS;
@@ -191,6 +194,7 @@ private SearchRequest(SearchRequest searchRequest, String[] indices, String loca
191194
this.localClusterAlias = localClusterAlias;
192195
this.absoluteStartMillis = absoluteStartMillis;
193196
this.finalReduce = finalReduce;
197+
this.cancelAfterTimeInterval = searchRequest.cancelAfterTimeInterval;
194198
}
195199

196200
/**
@@ -237,6 +241,10 @@ public SearchRequest(StreamInput in) throws IOException {
237241
if (in.getVersion().onOrAfter(LegacyESVersion.V_7_0_0)) {
238242
ccsMinimizeRoundtrips = in.readBoolean();
239243
}
244+
245+
if (in.getVersion().onOrAfter(Version.V_1_1_0)) {
246+
cancelAfterTimeInterval = in.readOptionalTimeValue();
247+
}
240248
}
241249

242250
@Override
@@ -271,6 +279,10 @@ public void writeTo(StreamOutput out) throws IOException {
271279
if (out.getVersion().onOrAfter(LegacyESVersion.V_7_0_0)) {
272280
out.writeBoolean(ccsMinimizeRoundtrips);
273281
}
282+
283+
if (out.getVersion().onOrAfter(Version.V_1_1_0)) {
284+
out.writeOptionalTimeValue(cancelAfterTimeInterval);
285+
}
274286
}
275287

276288
@Override
@@ -669,9 +681,17 @@ public static int resolveTrackTotalHitsUpTo(Scroll scroll, SearchSourceBuilder s
669681
SearchContext.DEFAULT_TRACK_TOTAL_HITS_UP_TO : source.trackTotalHitsUpTo();
670682
}
671683

684+
public void setCancelAfterTimeInterval(TimeValue cancelAfterTimeInterval) {
685+
this.cancelAfterTimeInterval = cancelAfterTimeInterval;
686+
}
687+
688+
public TimeValue getCancelAfterTimeInterval() {
689+
return cancelAfterTimeInterval;
690+
}
691+
672692
@Override
673693
public SearchTask createTask(long id, String type, String action, TaskId parentTaskId, Map<String, String> headers) {
674-
return new SearchTask(id, type, action, this::buildDescription, parentTaskId, headers);
694+
return new SearchTask(id, type, action, this::buildDescription, parentTaskId, headers, cancelAfterTimeInterval);
675695
}
676696

677697
public final String buildDescription() {
@@ -718,14 +738,15 @@ public boolean equals(Object o) {
718738
Objects.equals(allowPartialSearchResults, that.allowPartialSearchResults) &&
719739
Objects.equals(localClusterAlias, that.localClusterAlias) &&
720740
absoluteStartMillis == that.absoluteStartMillis &&
721-
ccsMinimizeRoundtrips == that.ccsMinimizeRoundtrips;
741+
ccsMinimizeRoundtrips == that.ccsMinimizeRoundtrips &&
742+
Objects.equals(cancelAfterTimeInterval, that.cancelAfterTimeInterval);
722743
}
723744

724745
@Override
725746
public int hashCode() {
726747
return Objects.hash(searchType, Arrays.hashCode(indices), routing, preference, source, requestCache,
727748
scroll, Arrays.hashCode(types), indicesOptions, batchedReduceSize, maxConcurrentShardRequests, preFilterShardSize,
728-
allowPartialSearchResults, localClusterAlias, absoluteStartMillis, ccsMinimizeRoundtrips);
749+
allowPartialSearchResults, localClusterAlias, absoluteStartMillis, ccsMinimizeRoundtrips, cancelAfterTimeInterval);
729750
}
730751

731752
@Override
@@ -746,6 +767,7 @@ public String toString() {
746767
", localClusterAlias=" + localClusterAlias +
747768
", getOrCreateAbsoluteStartMillis=" + absoluteStartMillis +
748769
", ccsMinimizeRoundtrips=" + ccsMinimizeRoundtrips +
749-
", source=" + source + '}';
770+
", source=" + source +
771+
", cancelAfterTimeInterval=" + cancelAfterTimeInterval + "}";
750772
}
751773
}

server/src/main/java/org/opensearch/action/search/SearchRequestBuilder.java

+8
Original file line numberDiff line numberDiff line change
@@ -626,4 +626,12 @@ public SearchRequestBuilder setPreFilterShardSize(int preFilterShardSize) {
626626
this.request.setPreFilterShardSize(preFilterShardSize);
627627
return this;
628628
}
629+
630+
/**
631+
* Request level time interval to control how long search is allowed to execute after which it is cancelled.
632+
*/
633+
public SearchRequestBuilder setCancelAfterTimeInterval(TimeValue cancelAfterTimeInterval) {
634+
this.request.setCancelAfterTimeInterval(cancelAfterTimeInterval);
635+
return this;
636+
}
629637
}

server/src/main/java/org/opensearch/action/search/SearchTask.java

+11-3
Original file line numberDiff line numberDiff line change
@@ -32,12 +32,15 @@
3232

3333
package org.opensearch.action.search;
3434

35+
import org.opensearch.common.unit.TimeValue;
3536
import org.opensearch.tasks.CancellableTask;
3637
import org.opensearch.tasks.TaskId;
3738

3839
import java.util.Map;
3940
import java.util.function.Supplier;
4041

42+
import static org.opensearch.search.SearchService.NO_TIMEOUT;
43+
4144
/**
4245
* Task storing information about a currently running {@link SearchRequest}.
4346
*/
@@ -46,9 +49,14 @@ public class SearchTask extends CancellableTask {
4649
private final Supplier<String> descriptionSupplier;
4750
private SearchProgressListener progressListener = SearchProgressListener.NOOP;
4851

49-
public SearchTask(long id, String type, String action, Supplier<String> descriptionSupplier,
50-
TaskId parentTaskId, Map<String, String> headers) {
51-
super(id, type, action, null, parentTaskId, headers);
52+
public SearchTask(long id, String type, String action, Supplier<String> descriptionSupplier, TaskId parentTaskId,
53+
Map<String, String> headers) {
54+
this(id, type, action, descriptionSupplier, parentTaskId, headers, NO_TIMEOUT);
55+
}
56+
57+
public SearchTask(long id, String type, String action, Supplier<String> descriptionSupplier, TaskId parentTaskId,
58+
Map<String, String> headers, TimeValue cancelAfterTimeInterval) {
59+
super(id, type, action, null, parentTaskId, headers, cancelAfterTimeInterval);
5260
this.descriptionSupplier = descriptionSupplier;
5361
}
5462

server/src/main/java/org/opensearch/action/search/TransportSearchAction.java

+17
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
import org.opensearch.action.support.ActionFilters;
4242
import org.opensearch.action.support.HandledTransportAction;
4343
import org.opensearch.action.support.IndicesOptions;
44+
import org.opensearch.action.support.TimeoutTaskCancellationUtility;
4445
import org.opensearch.client.Client;
4546
import org.opensearch.client.OriginSettingClient;
4647
import org.opensearch.client.node.NodeClient;
@@ -81,6 +82,7 @@
8182
import org.opensearch.search.internal.SearchContext;
8283
import org.opensearch.search.profile.ProfileShardResult;
8384
import org.opensearch.search.profile.SearchProfileShardResults;
85+
import org.opensearch.tasks.CancellableTask;
8486
import org.opensearch.tasks.Task;
8587
import org.opensearch.tasks.TaskId;
8688
import org.opensearch.threadpool.ThreadPool;
@@ -121,6 +123,13 @@ public class TransportSearchAction extends HandledTransportAction<SearchRequest,
121123
public static final Setting<Long> SHARD_COUNT_LIMIT_SETTING = Setting.longSetting(
122124
"action.search.shard_count.limit", Long.MAX_VALUE, 1L, Property.Dynamic, Property.NodeScope);
123125

126+
// cluster level setting for timeout based search cancellation. If search request level parameter is present then that will take
127+
// precedence over the cluster setting value
128+
public static final String SEARCH_CANCEL_AFTER_TIME_INTERVAL_SETTING_KEY = "search.cancel_after_time_interval";
129+
public static final Setting<TimeValue> SEARCH_CANCEL_AFTER_TIME_INTERVAL_SETTING =
130+
Setting.timeSetting(SEARCH_CANCEL_AFTER_TIME_INTERVAL_SETTING_KEY, SearchService.NO_TIMEOUT, Setting.Property.Dynamic,
131+
Setting.Property.NodeScope);
132+
124133
private final NodeClient client;
125134
private final ThreadPool threadPool;
126135
private final ClusterService clusterService;
@@ -239,6 +248,14 @@ long buildTookInMillis() {
239248

240249
@Override
241250
protected void doExecute(Task task, SearchRequest searchRequest, ActionListener<SearchResponse> listener) {
251+
// only if task is of type CancellableTask and support cancellation on timeout, treat this request eligible for timeout based
252+
// cancellation. There may be other top level requests like AsyncSearch which is using SearchRequest internally and has it's own
253+
// cancellation mechanism. For such cases, the SearchRequest when created can override the createTask and set the
254+
// cancelAfterTimeInterval to NO_TIMEOUT and bypass this mechanism
255+
if (task instanceof CancellableTask) {
256+
listener = TimeoutTaskCancellationUtility.wrapWithCancellationListener(client, (CancellableTask) task,
257+
clusterService.getClusterSettings(), listener);
258+
}
242259
executeRequest(task, searchRequest, this::searchAsyncAction, listener);
243260
}
244261

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
/*
2+
* SPDX-License-Identifier: Apache-2.0
3+
*
4+
* The OpenSearch Contributors require contributions made to
5+
* this file be licensed under the Apache-2.0 license or a
6+
* compatible open source license.
7+
*/
8+
9+
package org.opensearch.action.support;
10+
11+
import org.apache.logging.log4j.LogManager;
12+
import org.apache.logging.log4j.Logger;
13+
import org.apache.logging.log4j.message.ParameterizedMessage;
14+
import org.opensearch.action.ActionListener;
15+
import org.opensearch.action.admin.cluster.node.tasks.cancel.CancelTasksRequest;
16+
import org.opensearch.client.OriginSettingClient;
17+
import org.opensearch.client.node.NodeClient;
18+
import org.opensearch.common.settings.ClusterSettings;
19+
import org.opensearch.common.unit.TimeValue;
20+
import org.opensearch.search.SearchService;
21+
import org.opensearch.tasks.CancellableTask;
22+
import org.opensearch.tasks.TaskId;
23+
import org.opensearch.threadpool.Scheduler;
24+
import org.opensearch.threadpool.ThreadPool;
25+
26+
import java.util.concurrent.TimeUnit;
27+
import java.util.concurrent.atomic.AtomicBoolean;
28+
29+
import static org.opensearch.action.admin.cluster.node.tasks.get.GetTaskAction.TASKS_ORIGIN;
30+
import static org.opensearch.action.search.TransportSearchAction.SEARCH_CANCEL_AFTER_TIME_INTERVAL_SETTING;
31+
32+
public class TimeoutTaskCancellationUtility {
33+
34+
private static final Logger logger = LogManager.getLogger(TimeoutTaskCancellationUtility.class);
35+
36+
/**
37+
* Wraps a listener with a timeout listener {@link TimeoutRunnableListener} to schedule the task cancellation for provided tasks on
38+
* generic thread pool
39+
* @param client - {@link NodeClient}
40+
* @param taskToCancel - task to schedule cancellation for
41+
* @param clusterSettings - {@link ClusterSettings}
42+
* @param listener - original listener associated with the task
43+
* @return wrapped listener
44+
*/
45+
public static <Response> ActionListener<Response> wrapWithCancellationListener(NodeClient client, CancellableTask taskToCancel,
46+
ClusterSettings clusterSettings, ActionListener<Response> listener) {
47+
final TimeValue globalTimeout = clusterSettings.get(SEARCH_CANCEL_AFTER_TIME_INTERVAL_SETTING);
48+
final TimeValue timeoutInterval = (taskToCancel.getCancellationTimeout() == null) ? globalTimeout
49+
: taskToCancel.getCancellationTimeout();
50+
// Note: -1 (or no timeout) will help to turn off cancellation. The combinations will be request level set at -1 or request level
51+
// set to null and cluster level set to -1.
52+
ActionListener<Response> listenerToReturn = listener;
53+
if (timeoutInterval.equals(SearchService.NO_TIMEOUT)) {
54+
return listenerToReturn;
55+
}
56+
57+
try {
58+
final TimeoutRunnableListener<Response> wrappedListener = new TimeoutRunnableListener<>(timeoutInterval, listener, () -> {
59+
final CancelTasksRequest cancelTasksRequest = new CancelTasksRequest();
60+
cancelTasksRequest.setTaskId(new TaskId(client.getLocalNodeId(), taskToCancel.getId()));
61+
cancelTasksRequest.setReason("Cancellation timeout of " + timeoutInterval + " is expired");
62+
// force the origin to execute the cancellation as a system user
63+
new OriginSettingClient(client, TASKS_ORIGIN).admin().cluster()
64+
.cancelTasks(cancelTasksRequest, ActionListener.wrap(r -> logger.debug(
65+
"Scheduled cancel task with timeout: {} for original task: {} is successfully completed", timeoutInterval,
66+
cancelTasksRequest.getTaskId()),
67+
e -> logger.error(new ParameterizedMessage("Scheduled cancel task with timeout: {} for original task: {} is failed",
68+
timeoutInterval, cancelTasksRequest.getTaskId()), e))
69+
);
70+
});
71+
wrappedListener.cancellable = client.threadPool().schedule(wrappedListener, timeoutInterval, ThreadPool.Names.GENERIC);
72+
listenerToReturn = wrappedListener;
73+
} catch (Exception ex) {
74+
// if there is any exception in scheduling the cancellation task then continue without it
75+
logger.warn("Failed to schedule the cancellation task for original task: {}, will continue without it", taskToCancel.getId());
76+
}
77+
return listenerToReturn;
78+
}
79+
80+
/**
81+
* Timeout listener which executes the provided runnable after timeout is expired and if a response/failure is not yet received.
82+
* If either a response/failure is received before timeout then the scheduled task is cancelled and response/failure is sent back to
83+
* the original listener.
84+
*/
85+
private static class TimeoutRunnableListener<Response> implements ActionListener<Response>, Runnable {
86+
87+
private static final Logger logger = LogManager.getLogger(TimeoutRunnableListener.class);
88+
89+
// Runnable to execute after timeout
90+
private final TimeValue timeout;
91+
private final ActionListener<Response> originalListener;
92+
private final Runnable timeoutRunnable;
93+
private final AtomicBoolean executeRunnable = new AtomicBoolean(true);
94+
private volatile Scheduler.ScheduledCancellable cancellable;
95+
private final long creationTime;
96+
97+
TimeoutRunnableListener(TimeValue timeout, ActionListener<Response> listener, Runnable runAfterTimeout) {
98+
this.timeout = timeout;
99+
this.originalListener = listener;
100+
this.timeoutRunnable = runAfterTimeout;
101+
this.creationTime = System.nanoTime();
102+
}
103+
104+
@Override public void onResponse(Response response) {
105+
checkAndCancel();
106+
originalListener.onResponse(response);
107+
}
108+
109+
@Override public void onFailure(Exception e) {
110+
checkAndCancel();
111+
originalListener.onFailure(e);
112+
}
113+
114+
@Override public void run() {
115+
try {
116+
if (executeRunnable.compareAndSet(true, false)) {
117+
timeoutRunnable.run();
118+
} // else do nothing since either response/failure is already sent to client
119+
} catch (Exception ex) {
120+
// ignore the exception
121+
logger.error(new ParameterizedMessage("Ignoring the failure to run the provided runnable after timeout of {} with " +
122+
"exception", timeout), ex);
123+
}
124+
}
125+
126+
private void checkAndCancel() {
127+
if (executeRunnable.compareAndSet(true, false)) {
128+
logger.debug("Aborting the scheduled cancel task after {}",
129+
TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - creationTime));
130+
// timer has not yet expired so cancel it
131+
cancellable.cancel();
132+
}
133+
}
134+
}
135+
}

server/src/main/java/org/opensearch/common/settings/ClusterSettings.java

+1
Original file line numberDiff line numberDiff line change
@@ -345,6 +345,7 @@ public void apply(Settings value, Settings current, Settings previous) {
345345
SearchService.DEFAULT_ALLOW_PARTIAL_SEARCH_RESULTS,
346346
ElectMasterService.DISCOVERY_ZEN_MINIMUM_MASTER_NODES_SETTING,
347347
TransportSearchAction.SHARD_COUNT_LIMIT_SETTING,
348+
TransportSearchAction.SEARCH_CANCEL_AFTER_TIME_INTERVAL_SETTING,
348349
RemoteClusterService.REMOTE_CLUSTER_SKIP_UNAVAILABLE,
349350
RemoteClusterService.SEARCH_REMOTE_CLUSTER_SKIP_UNAVAILABLE,
350351
SniffConnectionStrategy.REMOTE_CONNECTIONS_PER_CLUSTER,

server/src/main/java/org/opensearch/rest/action/search/RestMultiSearchAction.java

+7
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
import org.opensearch.common.io.stream.NamedWriteableRegistry;
4545
import org.opensearch.common.logging.DeprecationLogger;
4646
import org.opensearch.common.settings.Settings;
47+
import org.opensearch.common.unit.TimeValue;
4748
import org.opensearch.common.xcontent.XContent;
4849
import org.opensearch.common.xcontent.XContentParser;
4950
import org.opensearch.common.xcontent.XContentType;
@@ -158,6 +159,7 @@ public static MultiSearchRequest parseRequest(RestRequest restRequest,
158159
multiRequest.add(searchRequest);
159160
});
160161
List<SearchRequest> requests = multiRequest.requests();
162+
final TimeValue cancelAfterTimeInterval = restRequest.paramAsTime("cancel_after_time_interval", null);
161163
for (SearchRequest request : requests) {
162164
// preserve if it's set on the request
163165
if (preFilterShardSize != null && request.getPreFilterShardSize() == null) {
@@ -166,6 +168,11 @@ public static MultiSearchRequest parseRequest(RestRequest restRequest,
166168
if (maxConcurrentShardRequests != null) {
167169
request.setMaxConcurrentShardRequests(maxConcurrentShardRequests);
168170
}
171+
// if cancel_after_time_interval parameter is set at per search request level than that is used otherwise one set at
172+
// multi search request level will be used
173+
if (request.getCancelAfterTimeInterval() == null) {
174+
request.setCancelAfterTimeInterval(cancelAfterTimeInterval);
175+
}
169176
}
170177
return multiRequest;
171178
}

server/src/main/java/org/opensearch/rest/action/search/RestSearchAction.java

+2
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,8 @@ public static void parseSearchRequest(SearchRequest searchRequest, RestRequest r
208208
searchRequest.setCcsMinimizeRoundtrips(
209209
request.paramAsBoolean("ccs_minimize_roundtrips", searchRequest.isCcsMinimizeRoundtrips()));
210210
}
211+
212+
searchRequest.setCancelAfterTimeInterval(request.paramAsTime("cancel_after_time_interval", null));
211213
}
212214

213215
/**

0 commit comments

Comments
 (0)