50
50
import org .opensearch .test .IndexSettingsModule ;
51
51
import org .opensearch .test .OpenSearchTestCase ;
52
52
import org .hamcrest .MatcherAssert ;
53
+ import org .junit .Before ;
53
54
54
55
import java .io .IOException ;
56
+ import java .io .InputStream ;
57
+ import java .nio .file .Files ;
58
+ import java .nio .file .Path ;
55
59
import java .util .ArrayList ;
56
60
import java .util .Arrays ;
57
61
import java .util .List ;
63
67
import static org .hamcrest .Matchers .instanceOf ;
64
68
65
69
public class CompoundAnalysisTests extends OpenSearchTestCase {
70
+
71
+ Settings [] settingsArr ;
72
+
73
+ @ Before
74
+ public void initialize () throws IOException {
75
+ final Path home = createTempDir ();
76
+ copyHyphenationPatternsFile (home );
77
+ this .settingsArr = new Settings [] { getJsonSettings (home ), getYamlSettings (home ) };
78
+ }
79
+
66
80
public void testDefaultsCompoundAnalysis () throws Exception {
67
- Settings settings = getJsonSettings ();
68
- IndexSettings idxSettings = IndexSettingsModule .newIndexSettings ("test" , settings );
69
- AnalysisModule analysisModule = createAnalysisModule (settings );
70
- TokenFilterFactory filterFactory = analysisModule .getAnalysisRegistry ().buildTokenFilterFactories (idxSettings ).get ("dict_dec" );
71
- MatcherAssert .assertThat (filterFactory , instanceOf (DictionaryCompoundWordTokenFilterFactory .class ));
81
+ for (Settings settings : this .settingsArr ) {
82
+ IndexSettings idxSettings = IndexSettingsModule .newIndexSettings ("test" , settings );
83
+ AnalysisModule analysisModule = createAnalysisModule (settings );
84
+ TokenFilterFactory filterFactory = analysisModule .getAnalysisRegistry ().buildTokenFilterFactories (idxSettings ).get ("dict_dec" );
85
+ MatcherAssert .assertThat (filterFactory , instanceOf (DictionaryCompoundWordTokenFilterFactory .class ));
86
+ }
72
87
}
73
88
74
89
public void testDictionaryDecompounder () throws Exception {
75
- Settings [] settingsArr = new Settings [] { getJsonSettings (), getYamlSettings () };
76
- for (Settings settings : settingsArr ) {
90
+ for (Settings settings : this .settingsArr ) {
77
91
List <String > terms = analyze (settings , "decompoundingAnalyzer" , "donaudampfschiff spargelcremesuppe" );
78
92
MatcherAssert .assertThat (terms .size (), equalTo (8 ));
79
93
MatcherAssert .assertThat (
@@ -83,6 +97,26 @@ public void testDictionaryDecompounder() throws Exception {
83
97
}
84
98
}
85
99
100
+ // Hyphenation Decompounder tests mimic the behavior of lucene tests
101
+ // lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestHyphenationCompoundWordTokenFilterFactory.java
102
+ public void testHyphenationDecompounder () throws Exception {
103
+ for (Settings settings : this .settingsArr ) {
104
+ List <String > terms = analyze (settings , "hyphenationAnalyzer" , "min veninde som er lidt af en læsehest" );
105
+ MatcherAssert .assertThat (terms .size (), equalTo (10 ));
106
+ MatcherAssert .assertThat (terms , hasItems ("min" , "veninde" , "som" , "er" , "lidt" , "af" , "en" , "læsehest" , "læse" , "hest" ));
107
+ }
108
+ }
109
+
110
+ // Hyphenation Decompounder tests mimic the behavior of lucene tests
111
+ // lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestHyphenationCompoundWordTokenFilterFactory.java
112
+ public void testHyphenationDecompounderNoSubMatches () throws Exception {
113
+ for (Settings settings : this .settingsArr ) {
114
+ List <String > terms = analyze (settings , "hyphenationAnalyzerNoSubMatches" , "basketballkurv" );
115
+ MatcherAssert .assertThat (terms .size (), equalTo (3 ));
116
+ MatcherAssert .assertThat (terms , hasItems ("basketballkurv" , "basketball" , "kurv" ));
117
+ }
118
+ }
119
+
86
120
private List <String > analyze (Settings settings , String analyzerName , String text ) throws IOException {
87
121
IndexSettings idxSettings = IndexSettingsModule .newIndexSettings ("test" , settings );
88
122
AnalysisModule analysisModule = createAnalysisModule (settings );
@@ -111,21 +145,28 @@ public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
111
145
}));
112
146
}
113
147
114
- private Settings getJsonSettings () throws IOException {
148
+ private void copyHyphenationPatternsFile (Path home ) throws IOException {
149
+ InputStream hyphenation_patterns_path = getClass ().getResourceAsStream ("da_UTF8.xml" );
150
+ Path config = home .resolve ("config" );
151
+ Files .createDirectory (config );
152
+ Files .copy (hyphenation_patterns_path , config .resolve ("da_UTF8.xml" ));
153
+ }
154
+
155
+ private Settings getJsonSettings (Path home ) throws IOException {
115
156
String json = "/org/opensearch/analysis/common/test1.json" ;
116
157
return Settings .builder ()
117
158
.loadFromStream (json , getClass ().getResourceAsStream (json ), false )
118
159
.put (IndexMetadata .SETTING_VERSION_CREATED , Version .CURRENT )
119
- .put (Environment .PATH_HOME_SETTING .getKey (), createTempDir () .toString ())
160
+ .put (Environment .PATH_HOME_SETTING .getKey (), home .toString ())
120
161
.build ();
121
162
}
122
163
123
- private Settings getYamlSettings () throws IOException {
164
+ private Settings getYamlSettings (Path home ) throws IOException {
124
165
String yaml = "/org/opensearch/analysis/common/test1.yml" ;
125
166
return Settings .builder ()
126
167
.loadFromStream (yaml , getClass ().getResourceAsStream (yaml ), false )
127
168
.put (IndexMetadata .SETTING_VERSION_CREATED , Version .CURRENT )
128
- .put (Environment .PATH_HOME_SETTING .getKey (), createTempDir () .toString ())
169
+ .put (Environment .PATH_HOME_SETTING .getKey (), home .toString ())
129
170
.build ();
130
171
}
131
172
}
0 commit comments