Skip to content

Commit 297f2ba

Browse files
Fix match_phrase_prefix_query not working on text field with multiple values and index_prefixes (#10959)
* Fix match_phrase_prefix_query not working on text field with multiple values and index_prefixes Signed-off-by: Gao Binlong <[email protected]> * Add more test Signed-off-by: Gao Binlong <[email protected]> * modify change log Signed-off-by: Gao Binlong <[email protected]> * Fix test failure Signed-off-by: Gao Binlong <[email protected]> * Change the indexAnalyzer used by prefix field Signed-off-by: Gao Binlong <[email protected]> * Skip old version for yaml test Signed-off-by: Gao Binlong <[email protected]> * Optimize some code Signed-off-by: Gao Binlong <[email protected]> * Fix test failure Signed-off-by: Gao Binlong <[email protected]> * Modify yaml test description Signed-off-by: Gao Binlong <[email protected]> * Remove the name parameter for setAnalyzer() Signed-off-by: Gao Binlong <[email protected]> --------- Signed-off-by: Gao Binlong <[email protected]> (cherry picked from commit 2e63913) Signed-off-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
1 parent 3fde6aa commit 297f2ba

File tree

5 files changed

+124
-9
lines changed

5 files changed

+124
-9
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
4747
- Fix bug in SBP cancellation logic ([#13259](https://github.com/opensearch-project/OpenSearch/pull/13474))
4848
- Fix handling of Short and Byte data types in ScriptProcessor ingest pipeline ([#14379](https://github.com/opensearch-project/OpenSearch/issues/14379))
4949
- Switch to iterative version of WKT format parser ([#14086](https://github.com/opensearch-project/OpenSearch/pull/14086))
50+
- Fix match_phrase_prefix_query not working on text field with multiple values and index_prefixes ([#10959](https://github.com/opensearch-project/OpenSearch/pull/10959))
5051
- Fix the computed max shards of cluster to avoid int overflow ([#14155](https://github.com/opensearch-project/OpenSearch/pull/14155))
5152
- Fixed rest-high-level client searchTemplate & mtermVectors endpoints to have a leading slash ([#14465](https://github.com/opensearch-project/OpenSearch/pull/14465))
5253
- Write shard level metadata blob when snapshotting searchable snapshot indexes ([#13190](https://github.com/opensearch-project/OpenSearch/pull/13190))

rest-api-spec/src/main/resources/rest-api-spec/test/search/190_index_prefix_search.yml

Lines changed: 51 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,12 @@ setup:
1010
index_prefixes:
1111
min_chars: 2
1212
max_chars: 5
13-
13+
text_with_pos_inc_gap:
14+
type: text
15+
position_increment_gap: 201
16+
index_prefixes:
17+
min_chars: 2
18+
max_chars: 5
1419
- do:
1520
index:
1621
index: test
@@ -23,6 +28,18 @@ setup:
2328
id: 2
2429
body: { text: sentence with UPPERCASE WORDS }
2530

31+
- do:
32+
index:
33+
index: test
34+
id: 3
35+
body: { text: ["foo", "b-12"] }
36+
37+
- do:
38+
index:
39+
index: test
40+
id: 4
41+
body: { text_with_pos_inc_gap: ["foo", "b-12"] }
42+
2643
- do:
2744
indices.refresh:
2845
index: [test]
@@ -116,3 +133,36 @@ setup:
116133
]
117134

118135
- match: {hits.total: 1}
136+
137+
# related issue: https://github.com/opensearch-project/OpenSearch/issues/9203
138+
---
139+
"search index prefixes with multiple values":
140+
- skip:
141+
version: " - 2.99.99"
142+
reason: "the bug was fixed in 3.0.0"
143+
- do:
144+
search:
145+
rest_total_hits_as_int: true
146+
index: test
147+
body:
148+
query:
149+
match_phrase_prefix:
150+
text: "b-12"
151+
152+
- match: {hits.total: 1}
153+
154+
---
155+
"search index prefixes with multiple values and custom position_increment_gap":
156+
- skip:
157+
version: " - 2.99.99"
158+
reason: "the bug was fixed in 3.0.0"
159+
- do:
160+
search:
161+
rest_total_hits_as_int: true
162+
index: test
163+
body:
164+
query:
165+
match_phrase_prefix:
166+
text_with_pos_inc_gap: "b-12"
167+
168+
- match: {hits.total: 1}

server/src/main/java/org/opensearch/index/mapper/TextFieldMapper.java

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -449,7 +449,6 @@ protected PrefixFieldMapper buildPrefixMapper(BuilderContext context, FieldType
449449
pft.setStoreTermVectorOffsets(true);
450450
}
451451
PrefixFieldType prefixFieldType = new PrefixFieldType(tft, fullName + "._index_prefix", indexPrefixes.get());
452-
prefixFieldType.setAnalyzer(analyzers.getIndexAnalyzer());
453452
tft.setPrefixFieldType(prefixFieldType);
454453
return new PrefixFieldMapper(pft, prefixFieldType);
455454
}
@@ -523,19 +522,26 @@ private static class PrefixWrappedAnalyzer extends AnalyzerWrapper {
523522
private final int minChars;
524523
private final int maxChars;
525524
private final Analyzer delegate;
525+
private final int positionIncrementGap;
526526

527-
PrefixWrappedAnalyzer(Analyzer delegate, int minChars, int maxChars) {
527+
PrefixWrappedAnalyzer(Analyzer delegate, int minChars, int maxChars, int positionIncrementGap) {
528528
super(delegate.getReuseStrategy());
529529
this.delegate = delegate;
530530
this.minChars = minChars;
531531
this.maxChars = maxChars;
532+
this.positionIncrementGap = positionIncrementGap;
532533
}
533534

534535
@Override
535536
protected Analyzer getWrappedAnalyzer(String fieldName) {
536537
return delegate;
537538
}
538539

540+
@Override
541+
public int getPositionIncrementGap(String fieldName) {
542+
return positionIncrementGap;
543+
}
544+
539545
@Override
540546
protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) {
541547
TokenFilter filter = new EdgeNGramTokenFilter(components.getTokenStream(), minChars, maxChars, false);
@@ -589,17 +595,18 @@ static final class PrefixFieldType extends StringFieldType {
589595

590596
final int minChars;
591597
final int maxChars;
592-
final TextFieldType parentField;
598+
final TextFieldType parent;
593599

594600
PrefixFieldType(TextFieldType parentField, String name, PrefixConfig config) {
595601
this(parentField, name, config.minChars, config.maxChars);
596602
}
597603

598-
PrefixFieldType(TextFieldType parentField, String name, int minChars, int maxChars) {
599-
super(name, true, false, false, parentField.getTextSearchInfo(), Collections.emptyMap());
604+
PrefixFieldType(TextFieldType parent, String name, int minChars, int maxChars) {
605+
super(name, true, false, false, parent.getTextSearchInfo(), Collections.emptyMap());
600606
this.minChars = minChars;
601607
this.maxChars = maxChars;
602-
this.parentField = parentField;
608+
this.parent = parent;
609+
setAnalyzer(parent.indexAnalyzer());
603610
}
604611

605612
@Override
@@ -610,8 +617,13 @@ public ValueFetcher valueFetcher(QueryShardContext context, SearchLookup searchL
610617
}
611618

612619
void setAnalyzer(NamedAnalyzer delegate) {
620+
String analyzerName = delegate.name();
613621
setIndexAnalyzer(
614-
new NamedAnalyzer(delegate.name(), AnalyzerScope.INDEX, new PrefixWrappedAnalyzer(delegate.analyzer(), minChars, maxChars))
622+
new NamedAnalyzer(
623+
analyzerName,
624+
AnalyzerScope.INDEX,
625+
new PrefixWrappedAnalyzer(delegate.analyzer(), minChars, maxChars, delegate.getPositionIncrementGap(analyzerName))
626+
)
615627
);
616628
}
617629

@@ -640,7 +652,7 @@ public Query prefixQuery(String value, MultiTermQuery.RewriteMethod method, bool
640652
Automaton automaton = Operations.concatenate(automata);
641653
AutomatonQuery query = AutomatonQueries.createAutomatonQuery(new Term(name(), value + "*"), automaton, method);
642654
return new BooleanQuery.Builder().add(query, BooleanClause.Occur.SHOULD)
643-
.add(new TermQuery(new Term(parentField.name(), value)), BooleanClause.Occur.SHOULD)
655+
.add(new TermQuery(new Term(parent.name(), value)), BooleanClause.Occur.SHOULD)
644656
.build();
645657
}
646658

server/src/test/java/org/opensearch/index/mapper/TextFieldMapperTests.java

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -380,6 +380,57 @@ public void testIndexOptions() throws IOException {
380380
}
381381
}
382382

383+
public void testPositionIncrementGapOnIndexPrefixField() throws IOException {
384+
// test default position_increment_gap
385+
MapperService mapperService = createMapperService(
386+
fieldMapping(b -> b.field("type", "text").field("analyzer", "default").startObject("index_prefixes").endObject())
387+
);
388+
ParsedDocument doc = mapperService.documentMapper().parse(source(b -> b.array("field", new String[] { "a", "b 12" })));
389+
390+
withLuceneIndex(mapperService, iw -> iw.addDocument(doc.rootDoc()), reader -> {
391+
TermsEnum terms = getOnlyLeafReader(reader).terms("field").iterator();
392+
assertTrue(terms.seekExact(new BytesRef("12")));
393+
PostingsEnum postings = terms.postings(null, PostingsEnum.POSITIONS);
394+
assertEquals(0, postings.nextDoc());
395+
assertEquals(TextFieldMapper.Defaults.POSITION_INCREMENT_GAP + 2, postings.nextPosition());
396+
});
397+
398+
withLuceneIndex(mapperService, iw -> iw.addDocument(doc.rootDoc()), reader -> {
399+
TermsEnum terms = getOnlyLeafReader(reader).terms("field._index_prefix").iterator();
400+
assertTrue(terms.seekExact(new BytesRef("12")));
401+
PostingsEnum postings = terms.postings(null, PostingsEnum.POSITIONS);
402+
assertEquals(0, postings.nextDoc());
403+
assertEquals(TextFieldMapper.Defaults.POSITION_INCREMENT_GAP + 2, postings.nextPosition());
404+
});
405+
406+
// test custom position_increment_gap
407+
final int positionIncrementGap = randomIntBetween(1, 1000);
408+
MapperService mapperService2 = createMapperService(
409+
fieldMapping(
410+
b -> b.field("type", "text")
411+
.field("position_increment_gap", positionIncrementGap)
412+
.field("analyzer", "default")
413+
.startObject("index_prefixes")
414+
.endObject()
415+
)
416+
);
417+
ParsedDocument doc2 = mapperService2.documentMapper().parse(source(b -> b.array("field", new String[] { "a", "b 12" })));
418+
withLuceneIndex(mapperService2, iw -> iw.addDocument(doc2.rootDoc()), reader -> {
419+
TermsEnum terms = getOnlyLeafReader(reader).terms("field").iterator();
420+
assertTrue(terms.seekExact(new BytesRef("12")));
421+
PostingsEnum postings = terms.postings(null, PostingsEnum.POSITIONS);
422+
assertEquals(0, postings.nextDoc());
423+
assertEquals(positionIncrementGap + 2, postings.nextPosition());
424+
});
425+
withLuceneIndex(mapperService2, iw -> iw.addDocument(doc2.rootDoc()), reader -> {
426+
TermsEnum terms = getOnlyLeafReader(reader).terms("field._index_prefix").iterator();
427+
assertTrue(terms.seekExact(new BytesRef("12")));
428+
PostingsEnum postings = terms.postings(null, PostingsEnum.POSITIONS);
429+
assertEquals(0, postings.nextDoc());
430+
assertEquals(positionIncrementGap + 2, postings.nextPosition());
431+
});
432+
}
433+
383434
public void testDefaultPositionIncrementGap() throws IOException {
384435
MapperService mapperService = createMapperService(fieldMapping(this::minimalMapping));
385436
ParsedDocument doc = mapperService.documentMapper().parse(source(b -> b.array("field", new String[] { "a", "b" })));

server/src/test/java/org/opensearch/index/mapper/TextFieldTypeTests.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,7 @@ public void testFuzzyQuery() {
167167

168168
public void testIndexPrefixes() {
169169
TextFieldType ft = createFieldType(true);
170+
ft.setIndexAnalyzer(Lucene.STANDARD_ANALYZER);
170171
ft.setPrefixFieldType(new TextFieldMapper.PrefixFieldType(ft, "field._index_prefix", 2, 10));
171172

172173
Query q = ft.prefixQuery("goin", CONSTANT_SCORE_REWRITE, false, randomMockShardContext());

0 commit comments

Comments
 (0)