Skip to content

Commit 2e63913

Browse files
authored
Fix match_phrase_prefix_query not working on text field with multiple values and index_prefixes (opensearch-project#10959)
* Fix match_phrase_prefix_query not working on text field with multiple values and index_prefixes Signed-off-by: Gao Binlong <[email protected]> * Add more test Signed-off-by: Gao Binlong <[email protected]> * modify change log Signed-off-by: Gao Binlong <[email protected]> * Fix test failure Signed-off-by: Gao Binlong <[email protected]> * Change the indexAnalyzer used by prefix field Signed-off-by: Gao Binlong <[email protected]> * Skip old version for yaml test Signed-off-by: Gao Binlong <[email protected]> * Optimize some code Signed-off-by: Gao Binlong <[email protected]> * Fix test failure Signed-off-by: Gao Binlong <[email protected]> * Modify yaml test description Signed-off-by: Gao Binlong <[email protected]> * Remove the name parameter for setAnalyzer() Signed-off-by: Gao Binlong <[email protected]> --------- Signed-off-by: Gao Binlong <[email protected]>
1 parent 51af2e2 commit 2e63913

File tree

5 files changed

+124
-9
lines changed

5 files changed

+124
-9
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
4949
- Fix bug in SBP cancellation logic ([#13259](https://github.com/opensearch-project/OpenSearch/pull/13474))
5050
- Fix handling of Short and Byte data types in ScriptProcessor ingest pipeline ([#14379](https://github.com/opensearch-project/OpenSearch/issues/14379))
5151
- Switch to iterative version of WKT format parser ([#14086](https://github.com/opensearch-project/OpenSearch/pull/14086))
52+
- Fix match_phrase_prefix_query not working on text field with multiple values and index_prefixes ([#10959](https://github.com/opensearch-project/OpenSearch/pull/10959))
5253
- Fix the computed max shards of cluster to avoid int overflow ([#14155](https://github.com/opensearch-project/OpenSearch/pull/14155))
5354
- Fixed rest-high-level client searchTemplate & mtermVectors endpoints to have a leading slash ([#14465](https://github.com/opensearch-project/OpenSearch/pull/14465))
5455
- Write shard level metadata blob when snapshotting searchable snapshot indexes ([#13190](https://github.com/opensearch-project/OpenSearch/pull/13190))

rest-api-spec/src/main/resources/rest-api-spec/test/search/190_index_prefix_search.yml

Lines changed: 51 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,12 @@ setup:
1010
index_prefixes:
1111
min_chars: 2
1212
max_chars: 5
13-
13+
text_with_pos_inc_gap:
14+
type: text
15+
position_increment_gap: 201
16+
index_prefixes:
17+
min_chars: 2
18+
max_chars: 5
1419
- do:
1520
index:
1621
index: test
@@ -23,6 +28,18 @@ setup:
2328
id: 2
2429
body: { text: sentence with UPPERCASE WORDS }
2530

31+
- do:
32+
index:
33+
index: test
34+
id: 3
35+
body: { text: ["foo", "b-12"] }
36+
37+
- do:
38+
index:
39+
index: test
40+
id: 4
41+
body: { text_with_pos_inc_gap: ["foo", "b-12"] }
42+
2643
- do:
2744
indices.refresh:
2845
index: [test]
@@ -116,3 +133,36 @@ setup:
116133
]
117134

118135
- match: {hits.total: 1}
136+
137+
# related issue: https://github.com/opensearch-project/OpenSearch/issues/9203
138+
---
139+
"search index prefixes with multiple values":
140+
- skip:
141+
version: " - 2.99.99"
142+
reason: "the bug was fixed in 3.0.0"
143+
- do:
144+
search:
145+
rest_total_hits_as_int: true
146+
index: test
147+
body:
148+
query:
149+
match_phrase_prefix:
150+
text: "b-12"
151+
152+
- match: {hits.total: 1}
153+
154+
---
155+
"search index prefixes with multiple values and custom position_increment_gap":
156+
- skip:
157+
version: " - 2.99.99"
158+
reason: "the bug was fixed in 3.0.0"
159+
- do:
160+
search:
161+
rest_total_hits_as_int: true
162+
index: test
163+
body:
164+
query:
165+
match_phrase_prefix:
166+
text_with_pos_inc_gap: "b-12"
167+
168+
- match: {hits.total: 1}

server/src/main/java/org/opensearch/index/mapper/TextFieldMapper.java

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -448,7 +448,6 @@ protected PrefixFieldMapper buildPrefixMapper(BuilderContext context, FieldType
448448
pft.setStoreTermVectorOffsets(true);
449449
}
450450
PrefixFieldType prefixFieldType = new PrefixFieldType(tft, fullName + "._index_prefix", indexPrefixes.get());
451-
prefixFieldType.setAnalyzer(analyzers.getIndexAnalyzer());
452451
tft.setPrefixFieldType(prefixFieldType);
453452
return new PrefixFieldMapper(pft, prefixFieldType);
454453
}
@@ -522,19 +521,26 @@ private static class PrefixWrappedAnalyzer extends AnalyzerWrapper {
522521
private final int minChars;
523522
private final int maxChars;
524523
private final Analyzer delegate;
524+
private final int positionIncrementGap;
525525

526-
PrefixWrappedAnalyzer(Analyzer delegate, int minChars, int maxChars) {
526+
PrefixWrappedAnalyzer(Analyzer delegate, int minChars, int maxChars, int positionIncrementGap) {
527527
super(delegate.getReuseStrategy());
528528
this.delegate = delegate;
529529
this.minChars = minChars;
530530
this.maxChars = maxChars;
531+
this.positionIncrementGap = positionIncrementGap;
531532
}
532533

533534
@Override
534535
protected Analyzer getWrappedAnalyzer(String fieldName) {
535536
return delegate;
536537
}
537538

539+
@Override
540+
public int getPositionIncrementGap(String fieldName) {
541+
return positionIncrementGap;
542+
}
543+
538544
@Override
539545
protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) {
540546
TokenFilter filter = new EdgeNGramTokenFilter(components.getTokenStream(), minChars, maxChars, false);
@@ -588,17 +594,18 @@ static final class PrefixFieldType extends StringFieldType {
588594

589595
final int minChars;
590596
final int maxChars;
591-
final TextFieldType parentField;
597+
final TextFieldType parent;
592598

593599
PrefixFieldType(TextFieldType parentField, String name, PrefixConfig config) {
594600
this(parentField, name, config.minChars, config.maxChars);
595601
}
596602

597-
PrefixFieldType(TextFieldType parentField, String name, int minChars, int maxChars) {
598-
super(name, true, false, false, parentField.getTextSearchInfo(), Collections.emptyMap());
603+
PrefixFieldType(TextFieldType parent, String name, int minChars, int maxChars) {
604+
super(name, true, false, false, parent.getTextSearchInfo(), Collections.emptyMap());
599605
this.minChars = minChars;
600606
this.maxChars = maxChars;
601-
this.parentField = parentField;
607+
this.parent = parent;
608+
setAnalyzer(parent.indexAnalyzer());
602609
}
603610

604611
@Override
@@ -609,8 +616,13 @@ public ValueFetcher valueFetcher(QueryShardContext context, SearchLookup searchL
609616
}
610617

611618
void setAnalyzer(NamedAnalyzer delegate) {
619+
String analyzerName = delegate.name();
612620
setIndexAnalyzer(
613-
new NamedAnalyzer(delegate.name(), AnalyzerScope.INDEX, new PrefixWrappedAnalyzer(delegate.analyzer(), minChars, maxChars))
621+
new NamedAnalyzer(
622+
analyzerName,
623+
AnalyzerScope.INDEX,
624+
new PrefixWrappedAnalyzer(delegate.analyzer(), minChars, maxChars, delegate.getPositionIncrementGap(analyzerName))
625+
)
614626
);
615627
}
616628

@@ -639,7 +651,7 @@ public Query prefixQuery(String value, MultiTermQuery.RewriteMethod method, bool
639651
Automaton automaton = Operations.concatenate(automata);
640652
AutomatonQuery query = AutomatonQueries.createAutomatonQuery(new Term(name(), value + "*"), automaton, method);
641653
return new BooleanQuery.Builder().add(query, BooleanClause.Occur.SHOULD)
642-
.add(new TermQuery(new Term(parentField.name(), value)), BooleanClause.Occur.SHOULD)
654+
.add(new TermQuery(new Term(parent.name(), value)), BooleanClause.Occur.SHOULD)
643655
.build();
644656
}
645657

server/src/test/java/org/opensearch/index/mapper/TextFieldMapperTests.java

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -380,6 +380,57 @@ public void testIndexOptions() throws IOException {
380380
}
381381
}
382382

383+
public void testPositionIncrementGapOnIndexPrefixField() throws IOException {
384+
// test default position_increment_gap
385+
MapperService mapperService = createMapperService(
386+
fieldMapping(b -> b.field("type", "text").field("analyzer", "default").startObject("index_prefixes").endObject())
387+
);
388+
ParsedDocument doc = mapperService.documentMapper().parse(source(b -> b.array("field", new String[] { "a", "b 12" })));
389+
390+
withLuceneIndex(mapperService, iw -> iw.addDocument(doc.rootDoc()), reader -> {
391+
TermsEnum terms = getOnlyLeafReader(reader).terms("field").iterator();
392+
assertTrue(terms.seekExact(new BytesRef("12")));
393+
PostingsEnum postings = terms.postings(null, PostingsEnum.POSITIONS);
394+
assertEquals(0, postings.nextDoc());
395+
assertEquals(TextFieldMapper.Defaults.POSITION_INCREMENT_GAP + 2, postings.nextPosition());
396+
});
397+
398+
withLuceneIndex(mapperService, iw -> iw.addDocument(doc.rootDoc()), reader -> {
399+
TermsEnum terms = getOnlyLeafReader(reader).terms("field._index_prefix").iterator();
400+
assertTrue(terms.seekExact(new BytesRef("12")));
401+
PostingsEnum postings = terms.postings(null, PostingsEnum.POSITIONS);
402+
assertEquals(0, postings.nextDoc());
403+
assertEquals(TextFieldMapper.Defaults.POSITION_INCREMENT_GAP + 2, postings.nextPosition());
404+
});
405+
406+
// test custom position_increment_gap
407+
final int positionIncrementGap = randomIntBetween(1, 1000);
408+
MapperService mapperService2 = createMapperService(
409+
fieldMapping(
410+
b -> b.field("type", "text")
411+
.field("position_increment_gap", positionIncrementGap)
412+
.field("analyzer", "default")
413+
.startObject("index_prefixes")
414+
.endObject()
415+
)
416+
);
417+
ParsedDocument doc2 = mapperService2.documentMapper().parse(source(b -> b.array("field", new String[] { "a", "b 12" })));
418+
withLuceneIndex(mapperService2, iw -> iw.addDocument(doc2.rootDoc()), reader -> {
419+
TermsEnum terms = getOnlyLeafReader(reader).terms("field").iterator();
420+
assertTrue(terms.seekExact(new BytesRef("12")));
421+
PostingsEnum postings = terms.postings(null, PostingsEnum.POSITIONS);
422+
assertEquals(0, postings.nextDoc());
423+
assertEquals(positionIncrementGap + 2, postings.nextPosition());
424+
});
425+
withLuceneIndex(mapperService2, iw -> iw.addDocument(doc2.rootDoc()), reader -> {
426+
TermsEnum terms = getOnlyLeafReader(reader).terms("field._index_prefix").iterator();
427+
assertTrue(terms.seekExact(new BytesRef("12")));
428+
PostingsEnum postings = terms.postings(null, PostingsEnum.POSITIONS);
429+
assertEquals(0, postings.nextDoc());
430+
assertEquals(positionIncrementGap + 2, postings.nextPosition());
431+
});
432+
}
433+
383434
public void testDefaultPositionIncrementGap() throws IOException {
384435
MapperService mapperService = createMapperService(fieldMapping(this::minimalMapping));
385436
ParsedDocument doc = mapperService.documentMapper().parse(source(b -> b.array("field", new String[] { "a", "b" })));

server/src/test/java/org/opensearch/index/mapper/TextFieldTypeTests.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,7 @@ public void testFuzzyQuery() {
167167

168168
public void testIndexPrefixes() {
169169
TextFieldType ft = createFieldType(true);
170+
ft.setIndexAnalyzer(Lucene.STANDARD_ANALYZER);
170171
ft.setPrefixFieldType(new TextFieldMapper.PrefixFieldType(ft, "field._index_prefix", 2, 10));
171172

172173
Query q = ft.prefixQuery("goin", CONSTANT_SCORE_REWRITE, false, randomMockShardContext());

0 commit comments

Comments
 (0)