Skip to content

Commit 6943dbc

Browse files
harshavamsimsfroh
authored andcommitted
Approximate match all query with a sort (opensearch-project#17772)
* Adding approximate match all query Signed-off-by: Harsha Vamsi Kalluri <[email protected]> * Minor refactor Signed-off-by: Harsha Vamsi Kalluri <[email protected]> * Fix doc field sorting Signed-off-by: Harsha Vamsi Kalluri <[email protected]> * Fix tests Signed-off-by: Harsha Vamsi Kalluri <[email protected]> * Fix more tests Signed-off-by: Harsha Vamsi Kalluri <[email protected]> * Fix more tests Signed-off-by: Harsha Vamsi Kalluri <[email protected]> * Fix backward tests Signed-off-by: Harsha Vamsi Kalluri <[email protected]> --------- Signed-off-by: Harsha Vamsi Kalluri <[email protected]> Signed-off-by: Michael Froh <[email protected]> Co-authored-by: Michael Froh <[email protected]> Signed-off-by: Sriram Ganesh <[email protected]>
1 parent 1673fd3 commit 6943dbc

File tree

17 files changed

+278
-19
lines changed

17 files changed

+278
-19
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
3636
- Add update and delete support in pull-based ingestion ([#17822](https://github.com/opensearch-project/OpenSearch/pull/17822))
3737
- Allow maxPollSize and pollTimeout in IngestionSource to be configurable ([#17863](https://github.com/opensearch-project/OpenSearch/pull/17863))
3838
- [Star Tree] [Search] Add query changes to support unsigned-long in star tree ([#17275](https://github.com/opensearch-project/OpenSearch/pull/17275))
39+
- Add `ApproximateMatchAllQuery` that targets match_all queries and approximates sorts ([#17772](https://github.com/opensearch-project/OpenSearch/pull/17772))
3940
- Add TermsQuery support to Search GRPC endpoint ([#17888](https://github.com/opensearch-project/OpenSearch/pull/17888))
4041

4142
### Changed

rest-api-spec/src/main/resources/rest-api-spec/test/indices.validate_query/10_basic.yml

+4-2
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,9 @@ setup:
1212

1313
---
1414
"Validate query api":
15-
15+
- skip:
16+
version: ' - 2.99.99'
17+
reason: message changed in 3.0.0
1618

1719
- do:
1820
indices.validate_query:
@@ -64,7 +66,7 @@ setup:
6466
- is_true: valid
6567
- match: {_shards.failed: 0}
6668
- match: {explanations.0.index: 'testing'}
67-
- match: {explanations.0.explanation: '*:*'}
69+
- match: {explanations.0.explanation: 'ApproximateScoreQuery(originalQuery=*:*, approximationQuery=Approximate(*:*))'}
6870

6971
---
7072
"Validate body without query element":

server/src/internalClusterTest/java/org/opensearch/search/sort/FieldSortIT.java

+1-2
Original file line numberDiff line numberDiff line change
@@ -1145,11 +1145,10 @@ public void testSortMissingNumbersMinMax() throws Exception {
11451145
.get();
11461146
assertNoFailures(searchResponse);
11471147

1148-
assertThat(searchResponse.getHits().getTotalHits().value(), equalTo(3L));
1148+
assertThat(searchResponse.getHits().getTotalHits().value(), equalTo(2L));
11491149
assertThat(searchResponse.getHits().getAt(0).getId(), equalTo("1"));
11501150
// The order here could be unstable (depends on document order) since missing == field value
11511151
assertThat(searchResponse.getHits().getAt(1).getId(), is(oneOf("3", "2")));
1152-
assertThat(searchResponse.getHits().getAt(2).getId(), is(oneOf("2", "3")));
11531152

11541153
logger.info("--> sort with missing _last");
11551154
searchResponse = client().prepareSearch()

server/src/internalClusterTest/java/org/opensearch/validate/SimpleValidateQueryIT.java

+4-1
Original file line numberDiff line numberDiff line change
@@ -294,7 +294,10 @@ public void testExplainNoQuery() {
294294
assertThat(validateQueryResponse.isValid(), equalTo(true));
295295
assertThat(validateQueryResponse.getQueryExplanation().size(), equalTo(1));
296296
assertThat(validateQueryResponse.getQueryExplanation().get(0).getIndex(), equalTo("test"));
297-
assertThat(validateQueryResponse.getQueryExplanation().get(0).getExplanation(), equalTo("*:*"));
297+
assertThat(
298+
validateQueryResponse.getQueryExplanation().get(0).getExplanation(),
299+
equalTo("ApproximateScoreQuery(originalQuery=*:*, approximationQuery=Approximate(*:*))")
300+
);
298301
}
299302

300303
public void testExplainFilteredAlias() {

server/src/main/java/org/opensearch/index/query/MatchAllQueryBuilder.java

+3-1
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,8 @@
4040
import org.opensearch.core.xcontent.ObjectParser;
4141
import org.opensearch.core.xcontent.XContentBuilder;
4242
import org.opensearch.core.xcontent.XContentParser;
43+
import org.opensearch.search.approximate.ApproximateMatchAllQuery;
44+
import org.opensearch.search.approximate.ApproximateScoreQuery;
4345

4446
import java.io.IOException;
4547

@@ -88,7 +90,7 @@ public static MatchAllQueryBuilder fromXContent(XContentParser parser) {
8890

8991
@Override
9092
protected Query doToQuery(QueryShardContext context) {
91-
return Queries.newMatchAllQuery();
93+
return new ApproximateScoreQuery(Queries.newMatchAllQuery(), new ApproximateMatchAllQuery());
9294
}
9395

9496
@Override
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
/*
2+
* SPDX-License-Identifier: Apache-2.0
3+
*
4+
* The OpenSearch Contributors require contributions made to
5+
* this file be licensed under the Apache-2.0 license or a
6+
* compatible open source license.
7+
*/
8+
9+
package org.opensearch.search.approximate;
10+
11+
import org.apache.lucene.search.IndexSearcher;
12+
import org.apache.lucene.search.Query;
13+
import org.apache.lucene.search.QueryVisitor;
14+
import org.opensearch.index.mapper.MappedFieldType;
15+
import org.opensearch.search.internal.SearchContext;
16+
import org.opensearch.search.sort.FieldSortBuilder;
17+
18+
import java.io.IOException;
19+
import java.util.Objects;
20+
21+
/**
22+
* Replaces match-all query with a less expensive query if possible.
23+
* <p>
24+
* Currently, will rewrite to a bounded range query over the high/low end of a field if a primary sort is specified
25+
* on that field.
26+
*/
27+
public class ApproximateMatchAllQuery extends ApproximateQuery {
28+
private ApproximateQuery approximation = null;
29+
30+
@Override
31+
protected boolean canApproximate(SearchContext context) {
32+
approximation = null;
33+
if (context == null) {
34+
return false;
35+
}
36+
if (context.aggregations() != null) {
37+
return false;
38+
}
39+
40+
if (context.request() != null && context.request().source() != null && context.innerHits().getInnerHits().isEmpty()) {
41+
FieldSortBuilder primarySortField = FieldSortBuilder.getPrimaryFieldSortOrNull(context.request().source());
42+
if (primarySortField != null
43+
&& primarySortField.missing() == null
44+
&& !primarySortField.fieldName().equals(FieldSortBuilder.DOC_FIELD_NAME)
45+
&& !primarySortField.fieldName().equals(FieldSortBuilder.ID_FIELD_NAME)) {
46+
MappedFieldType mappedFieldType = context.getQueryShardContext().fieldMapper(primarySortField.fieldName());
47+
if (mappedFieldType == null) {
48+
return false;
49+
}
50+
Query rangeQuery = mappedFieldType.rangeQuery(null, null, false, false, null, null, null, context.getQueryShardContext());
51+
if (rangeQuery instanceof ApproximateScoreQuery approximateScoreQuery) {
52+
approximateScoreQuery.setContext(context);
53+
if (approximateScoreQuery.resolvedQuery instanceof ApproximateQuery) {
54+
approximation = (ApproximateQuery) approximateScoreQuery.resolvedQuery;
55+
return true;
56+
}
57+
}
58+
}
59+
}
60+
return false;
61+
}
62+
63+
@Override
64+
public String toString(String field) {
65+
return "Approximate(*:*)";
66+
}
67+
68+
@Override
69+
public void visit(QueryVisitor visitor) {
70+
visitor.visitLeaf(this);
71+
72+
}
73+
74+
@Override
75+
public boolean equals(Object o) {
76+
if (sameClassAs(o)) {
77+
ApproximateMatchAllQuery other = (ApproximateMatchAllQuery) o;
78+
return Objects.equals(approximation, other.approximation);
79+
}
80+
return false;
81+
}
82+
83+
@Override
84+
public int hashCode() {
85+
return classHash();
86+
}
87+
88+
@Override
89+
public Query rewrite(IndexSearcher indexSearcher) throws IOException {
90+
if (approximation == null) {
91+
throw new IllegalStateException("rewrite called without setting context or query could not be approximated");
92+
}
93+
return approximation.rewrite(indexSearcher);
94+
}
95+
}

server/src/main/java/org/opensearch/search/approximate/ApproximateScoreQuery.java

-3
Original file line numberDiff line numberDiff line change
@@ -51,9 +51,6 @@ public Query rewrite(IndexSearcher indexSearcher) throws IOException {
5151
}
5252

5353
public void setContext(SearchContext context) {
54-
if (resolvedQuery != null) {
55-
throw new IllegalStateException("Query already resolved, duplicate call to setContext");
56-
}
5754
resolvedQuery = approximationQuery.canApproximate(context) ? approximationQuery : originalQuery;
5855
};
5956

server/src/main/java/org/opensearch/search/sort/FieldSortBuilder.java

+1
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@ public class FieldSortBuilder extends SortBuilder<FieldSortBuilder> implements W
100100
* special field name to sort by index order
101101
*/
102102
public static final String DOC_FIELD_NAME = "_doc";
103+
public static final String ID_FIELD_NAME = "_id";
103104
private static final SortFieldAndFormat SORT_DOC = new SortFieldAndFormat(new SortField(null, SortField.Type.DOC), DocValueFormat.RAW);
104105
private static final SortFieldAndFormat SORT_DOC_REVERSE = new SortFieldAndFormat(
105106
new SortField(null, SortField.Type.DOC, true),

server/src/test/java/org/opensearch/index/query/BoolQueryBuilderTests.java

+6-1
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,8 @@
4242
import org.opensearch.core.xcontent.XContentBuilder;
4343
import org.opensearch.core.xcontent.XContentParseException;
4444
import org.opensearch.core.xcontent.XContentParser;
45+
import org.opensearch.search.approximate.ApproximateMatchAllQuery;
46+
import org.opensearch.search.approximate.ApproximateScoreQuery;
4547
import org.opensearch.test.AbstractQueryTestCase;
4648
import org.hamcrest.Matchers;
4749

@@ -208,7 +210,10 @@ public void testMinShouldMatchFilterWithoutShouldClauses() throws Exception {
208210
assertThat(innerBooleanQuery.clauses().size(), equalTo(1));
209211
BooleanClause innerBooleanClause = innerBooleanQuery.clauses().get(0);
210212
assertThat(innerBooleanClause.occur(), equalTo(BooleanClause.Occur.MUST));
211-
assertThat(innerBooleanClause.query(), instanceOf(MatchAllDocsQuery.class));
213+
assertThat(innerBooleanClause.query(), instanceOf(ApproximateScoreQuery.class));
214+
ApproximateScoreQuery approxQuery = (ApproximateScoreQuery) innerBooleanClause.query();
215+
assertThat(approxQuery.getOriginalQuery(), instanceOf(MatchAllDocsQuery.class));
216+
assertThat(approxQuery.getApproximationQuery(), instanceOf(ApproximateMatchAllQuery.class));
212217
}
213218

214219
public void testMinShouldMatchBiggerThanNumberOfShouldClauses() throws Exception {

server/src/test/java/org/opensearch/index/query/MatchAllQueryBuilderTests.java

+6-1
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@
3434

3535
import org.apache.lucene.search.MatchAllDocsQuery;
3636
import org.apache.lucene.search.Query;
37+
import org.opensearch.search.approximate.ApproximateMatchAllQuery;
38+
import org.opensearch.search.approximate.ApproximateScoreQuery;
3739
import org.opensearch.test.AbstractQueryTestCase;
3840

3941
import java.io.IOException;
@@ -49,7 +51,10 @@ protected MatchAllQueryBuilder doCreateTestQueryBuilder() {
4951

5052
@Override
5153
protected void doAssertLuceneQuery(MatchAllQueryBuilder queryBuilder, Query query, QueryShardContext context) throws IOException {
52-
assertThat(query, instanceOf(MatchAllDocsQuery.class));
54+
assertThat(query, instanceOf(ApproximateScoreQuery.class));
55+
ApproximateScoreQuery approxQuery = (ApproximateScoreQuery) query;
56+
assertThat(approxQuery.getOriginalQuery(), instanceOf(MatchAllDocsQuery.class));
57+
assertThat(approxQuery.getApproximationQuery(), instanceOf(ApproximateMatchAllQuery.class));
5358
}
5459

5560
public void testFromJson() throws IOException {

server/src/test/java/org/opensearch/index/query/NestedQueryBuilderTests.java

+6-1
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,8 @@
5050
import org.opensearch.index.mapper.MapperService;
5151
import org.opensearch.index.query.functionscore.FunctionScoreQueryBuilder;
5252
import org.opensearch.index.search.OpenSearchToParentBlockJoinQuery;
53+
import org.opensearch.search.approximate.ApproximateMatchAllQuery;
54+
import org.opensearch.search.approximate.ApproximateScoreQuery;
5355
import org.opensearch.search.fetch.subphase.InnerHitsContext;
5456
import org.opensearch.search.internal.SearchContext;
5557
import org.opensearch.search.sort.FieldSortBuilder;
@@ -493,7 +495,10 @@ public void testNestedDepthAllowed() throws Exception {
493495
.filter(c -> c.occur() == BooleanClause.Occur.MUST)
494496
.findFirst();
495497
assertTrue(childLeg.isPresent());
496-
assertEquals(new MatchAllDocsQuery(), childLeg.get().query());
498+
assertThat(childLeg.get().query(), instanceOf(ApproximateScoreQuery.class));
499+
ApproximateScoreQuery approxQuery = (ApproximateScoreQuery) childLeg.get().query();
500+
assertThat(approxQuery.getOriginalQuery(), instanceOf(MatchAllDocsQuery.class));
501+
assertThat(approxQuery.getApproximationQuery(), instanceOf(ApproximateMatchAllQuery.class));
497502
};
498503
check.accept(createShardContext());
499504
doWithDepth(randomIntBetween(1, 20), check);

server/src/test/java/org/opensearch/index/query/WrapperQueryBuilderTests.java

+8-1
Original file line numberDiff line numberDiff line change
@@ -41,12 +41,16 @@
4141
import org.opensearch.core.common.bytes.BytesArray;
4242
import org.opensearch.core.common.bytes.BytesReference;
4343
import org.opensearch.core.xcontent.MediaTypeRegistry;
44+
import org.opensearch.search.approximate.ApproximateMatchAllQuery;
45+
import org.opensearch.search.approximate.ApproximateScoreQuery;
4446
import org.opensearch.test.AbstractQueryTestCase;
4547

4648
import java.io.IOException;
4749
import java.io.UncheckedIOException;
4850
import java.io.UnsupportedEncodingException;
4951

52+
import static org.hamcrest.CoreMatchers.instanceOf;
53+
5054
public class WrapperQueryBuilderTests extends AbstractQueryTestCase<WrapperQueryBuilder> {
5155

5256
@Override
@@ -171,7 +175,10 @@ public void testRewriteInnerQueryToo() throws IOException {
171175
assertEquals(new TermQuery(new Term(TEXT_FIELD_NAME, "bar")), qb.rewrite(shardContext).toQuery(shardContext));
172176

173177
qb = new WrapperQueryBuilder(new BoolQueryBuilder().toString());
174-
assertEquals(new MatchAllDocsQuery(), qb.rewrite(shardContext).toQuery(shardContext));
178+
assertThat(qb.rewrite(shardContext).toQuery(shardContext), instanceOf(ApproximateScoreQuery.class));
179+
ApproximateScoreQuery approxQuery = (ApproximateScoreQuery) qb.rewrite(shardContext).toQuery(shardContext);
180+
assertThat(approxQuery.getOriginalQuery(), instanceOf(MatchAllDocsQuery.class));
181+
assertThat(approxQuery.getApproximationQuery(), instanceOf(ApproximateMatchAllQuery.class));
175182
}
176183

177184
@Override

server/src/test/java/org/opensearch/index/query/functionscore/FunctionScoreQueryBuilderTests.java

+6-1
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,8 @@
6767
import org.opensearch.script.Script;
6868
import org.opensearch.script.ScriptType;
6969
import org.opensearch.search.MultiValueMode;
70+
import org.opensearch.search.approximate.ApproximateMatchAllQuery;
71+
import org.opensearch.search.approximate.ApproximateScoreQuery;
7072
import org.opensearch.test.AbstractQueryTestCase;
7173
import org.opensearch.test.TestGeoShapeFieldMapperPlugin;
7274
import org.joda.time.DateTime;
@@ -630,7 +632,10 @@ public void testCustomWeightFactorQueryBuilderWithFunctionScoreWithoutQueryGiven
630632
Query parsedQuery = parseQuery(functionScoreQuery(weightFactorFunction(1.3f))).toQuery(createShardContext());
631633
assertThat(parsedQuery, instanceOf(FunctionScoreQuery.class));
632634
FunctionScoreQuery functionScoreQuery = (FunctionScoreQuery) parsedQuery;
633-
assertThat(functionScoreQuery.getSubQuery() instanceof MatchAllDocsQuery, equalTo(true));
635+
assertThat(functionScoreQuery.getSubQuery(), CoreMatchers.instanceOf(ApproximateScoreQuery.class));
636+
ApproximateScoreQuery approxQuery = (ApproximateScoreQuery) functionScoreQuery.getSubQuery();
637+
assertThat(approxQuery.getOriginalQuery(), CoreMatchers.instanceOf(MatchAllDocsQuery.class));
638+
assertThat(approxQuery.getApproximationQuery(), CoreMatchers.instanceOf(ApproximateMatchAllQuery.class));
634639
assertThat((double) (functionScoreQuery.getFunctions()[0]).getWeight(), closeTo(1.3, 0.001));
635640
}
636641

server/src/test/java/org/opensearch/index/search/NestedHelperTests.java

+7-1
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
import org.apache.lucene.search.Query;
4343
import org.apache.lucene.search.TermQuery;
4444
import org.apache.lucene.search.join.ScoreMode;
45+
import org.opensearch.common.lucene.search.Queries;
4546
import org.opensearch.common.settings.Settings;
4647
import org.opensearch.common.xcontent.XContentFactory;
4748
import org.opensearch.core.xcontent.XContentBuilder;
@@ -52,6 +53,8 @@
5253
import org.opensearch.index.query.NestedQueryBuilder;
5354
import org.opensearch.index.query.QueryShardContext;
5455
import org.opensearch.index.query.TermQueryBuilder;
56+
import org.opensearch.search.approximate.ApproximateMatchAllQuery;
57+
import org.opensearch.search.approximate.ApproximateScoreQuery;
5558
import org.opensearch.test.OpenSearchSingleNodeTestCase;
5659

5760
import java.io.IOException;
@@ -325,7 +328,10 @@ public void testNested() throws IOException {
325328
NestedQueryBuilder queryBuilder = new NestedQueryBuilder("nested1", new MatchAllQueryBuilder(), ScoreMode.Avg);
326329
OpenSearchToParentBlockJoinQuery query = (OpenSearchToParentBlockJoinQuery) queryBuilder.toQuery(context);
327330

328-
Query expectedChildQuery = new BooleanQuery.Builder().add(new MatchAllDocsQuery(), Occur.MUST)
331+
Query expectedChildQuery = new BooleanQuery.Builder().add(
332+
new ApproximateScoreQuery(Queries.newMatchAllQuery(), new ApproximateMatchAllQuery()),
333+
Occur.MUST
334+
)
329335
// we automatically add a filter since the inner query might match non-nested docs
330336
.add(new TermQuery(new Term(NestedPathFieldMapper.NAME, "nested1")), Occur.FILTER)
331337
.build();

0 commit comments

Comments
 (0)