Skip to content

Commit 2cd6921

Browse files
author
Guian Gumpac
authored
Add support for wildcard_query function (#156)
* Implemented wildcard_query and added tests in core Signed-off-by: Guian Gumpac <[email protected]> * Implemented and added tests for sql Signed-off-by: Guian Gumpac <[email protected]> * Implemented and added tests for ppl Signed-off-by: Guian Gumpac <[email protected]> * Implemented and added tests for lucene Signed-off-by: Guian Gumpac <[email protected]> * Fixed test for like expression Signed-off-by: Guian Gumpac <[email protected]> * Added parameters to wildcard_query Signed-off-by: Guian Gumpac <[email protected]> * Added integration tests for ppl and sql Signed-off-by: Guian Gumpac <[email protected]> * Added docs for doctests Signed-off-by: Guian Gumpac <[email protected]> * Fixed issues introduced during merging Signed-off-by: Guian Gumpac <[email protected]> * Addressed PR comment Signed-off-by: Guian Gumpac <[email protected]> * Added annotation that was deleted from merging Signed-off-by: Guian Gumpac <[email protected]> * Fixed merge conflict issues Signed-off-by: Guian Gumpac <[email protected]> * Addressed some PR comments and handled escaping wildcards Signed-off-by: Guian Gumpac <[email protected]> * Added tests for wildcard conversion and created data for testing Signed-off-by: Guian Gumpac <[email protected]> * Added javadoc Signed-off-by: Guian Gumpac <[email protected]> * Changed index name Signed-off-by: Guian Gumpac <[email protected]> * Temporarily changed jackson_version to run GH actions Signed-off-by: Guian Gumpac <[email protected]> * Added comparison test for wildcard conversion Signed-off-by: Guian Gumpac <[email protected]> * Removed PPL implementation of wildcard_query Signed-off-by: Guian Gumpac <[email protected]> * Reverted ppl docs change Signed-off-by: Guian Gumpac <[email protected]> * Made namedArgument a static function Signed-off-by: Guian Gumpac <[email protected]> * Removed extra space Signed-off-by: Guian Gumpac <[email protected]> * Fixed LIKE query Signed-off-by: Guian Gumpac <[email protected]> * Fixed LIKE tests and added more tests Signed-off-by: Guian Gumpac <[email protected]> * Addressed PR comments Signed-off-by: Guian Gumpac <[email protected]> * Implemented converting text field to keyword. Still needs testing Signed-off-by: Guian Gumpac <[email protected]> * Added test cases for LIKE in sql and ppl Signed-off-by: Guian Gumpac <[email protected]> * Addressed PR comments regarding docs Signed-off-by: Guian Gumpac <[email protected]> * Fixed backslashes in docs Signed-off-by: Guian Gumpac <[email protected]> * Added missed backticks in docs Signed-off-by: Guian Gumpac <[email protected]> * Moved escaping wildcard test to common/utils Signed-off-by: Guian Gumpac <[email protected]> * Fixed checkstyle error Signed-off-by: Guian Gumpac <[email protected]> Signed-off-by: Guian Gumpac <[email protected]>
1 parent 50669eb commit 2cd6921

File tree

29 files changed

+1000
-38
lines changed

29 files changed

+1000
-38
lines changed

common/src/main/java/org/opensearch/sql/common/utils/StringUtils.java

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,48 @@ public static String format(final String format, Object... args) {
108108
return String.format(Locale.ROOT, format, args);
109109
}
110110

111+
/**
112+
* Converts sql wildcard character % and _ to * and ?.
113+
* @param text string to be converted
114+
* @return converted string
115+
*/
116+
public static String convertSqlWildcardToLucene(String text) {
117+
final char DEFAULT_ESCAPE = '\\';
118+
StringBuilder convertedString = new StringBuilder(text.length());
119+
boolean escaped = false;
120+
121+
for (char currentChar : text.toCharArray()) {
122+
switch (currentChar) {
123+
case DEFAULT_ESCAPE:
124+
escaped = true;
125+
convertedString.append(currentChar);
126+
break;
127+
case '%':
128+
if (escaped) {
129+
convertedString.deleteCharAt(convertedString.length() - 1);
130+
convertedString.append("%");
131+
} else {
132+
convertedString.append("*");
133+
}
134+
escaped = false;
135+
break;
136+
case '_':
137+
if (escaped) {
138+
convertedString.deleteCharAt(convertedString.length() - 1);
139+
convertedString.append("_");
140+
} else {
141+
convertedString.append('?');
142+
}
143+
escaped = false;
144+
break;
145+
default:
146+
convertedString.append(currentChar);
147+
escaped = false;
148+
}
149+
}
150+
return convertedString.toString();
151+
}
152+
111153
private static boolean isQuoted(String text, String mark) {
112154
return !Strings.isNullOrEmpty(text) && text.startsWith(mark) && text.endsWith(mark);
113155
}
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package org.opensearch.sql.common.utils;
7+
8+
import static org.junit.Assert.assertEquals;
9+
10+
import org.junit.Test;
11+
12+
public class ConvertSQLWildcardTest {
13+
@Test
14+
public void test_escaping_sql_wildcards() {
15+
assertEquals("%", StringUtils.convertSqlWildcardToLucene("\\%"));
16+
assertEquals("\\*", StringUtils.convertSqlWildcardToLucene("\\*"));
17+
assertEquals("_", StringUtils.convertSqlWildcardToLucene("\\_"));
18+
assertEquals("\\?", StringUtils.convertSqlWildcardToLucene("\\?"));
19+
assertEquals("%*", StringUtils.convertSqlWildcardToLucene("\\%%"));
20+
assertEquals("*%", StringUtils.convertSqlWildcardToLucene("%\\%"));
21+
assertEquals("%*%", StringUtils.convertSqlWildcardToLucene("\\%%\\%"));
22+
assertEquals("*%*", StringUtils.convertSqlWildcardToLucene("%\\%%"));
23+
assertEquals("_?", StringUtils.convertSqlWildcardToLucene("\\__"));
24+
assertEquals("?_", StringUtils.convertSqlWildcardToLucene("_\\_"));
25+
assertEquals("_?_", StringUtils.convertSqlWildcardToLucene("\\__\\_"));
26+
assertEquals("?_?", StringUtils.convertSqlWildcardToLucene("_\\__"));
27+
assertEquals("%\\*_\\?", StringUtils.convertSqlWildcardToLucene("\\%\\*\\_\\?"));
28+
}
29+
}

core/src/main/java/org/opensearch/sql/expression/DSL.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -698,6 +698,10 @@ public static FunctionExpression match_bool_prefix(Expression... args) {
698698
return compile(BuiltinFunctionName.MATCH_BOOL_PREFIX, args);
699699
}
700700

701+
public static FunctionExpression wildcard_query(Expression... args) {
702+
return compile(BuiltinFunctionName.WILDCARD_QUERY, args);
703+
}
704+
701705
public static FunctionExpression now(Expression... args) {
702706
return compile(BuiltinFunctionName.NOW, args);
703707
}

core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,9 @@ public enum BuiltinFunctionName {
221221
QUERY(FunctionName.of("query")),
222222
MATCH_QUERY(FunctionName.of("match_query")),
223223
MATCHQUERY(FunctionName.of("matchquery")),
224-
MULTI_MATCH(FunctionName.of("multi_match"));
224+
MULTI_MATCH(FunctionName.of("multi_match")),
225+
WILDCARDQUERY(FunctionName.of("wildcardquery")),
226+
WILDCARD_QUERY(FunctionName.of("wildcard_query"));
225227

226228
private final FunctionName name;
227229

core/src/main/java/org/opensearch/sql/expression/function/OpenSearchFunctions.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@ public void register(BuiltinFunctionRepository repository) {
3737
repository.register(match_phrase(BuiltinFunctionName.MATCH_PHRASE));
3838
repository.register(match_phrase(BuiltinFunctionName.MATCHPHRASE));
3939
repository.register(match_phrase_prefix());
40+
repository.register(wildcard_query(BuiltinFunctionName.WILDCARD_QUERY));
41+
repository.register(wildcard_query(BuiltinFunctionName.WILDCARDQUERY));
4042
}
4143

4244
private static FunctionResolver match_bool_prefix() {
@@ -79,6 +81,11 @@ private static FunctionResolver query_string() {
7981
return new RelevanceFunctionResolver(funcName, STRUCT);
8082
}
8183

84+
private static FunctionResolver wildcard_query(BuiltinFunctionName wildcardQuery) {
85+
FunctionName funcName = wildcardQuery.getName();
86+
return new RelevanceFunctionResolver(funcName, STRING);
87+
}
88+
8289
public static class OpenSearchFunction extends FunctionExpression {
8390
private final FunctionName functionName;
8491
private final List<Expression> arguments;

core/src/test/java/org/opensearch/sql/analysis/ExpressionAnalyzerTest.java

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -540,6 +540,34 @@ void query_string_expression_two_fields() {
540540
AstDSL.unresolvedArg("query", stringLiteral("query_value"))));
541541
}
542542

543+
@Test
544+
void wildcard_query_expression() {
545+
assertAnalyzeEqual(
546+
DSL.wildcard_query(
547+
DSL.namedArgument("field", DSL.literal("test")),
548+
DSL.namedArgument("query", DSL.literal("query_value*"))),
549+
AstDSL.function("wildcard_query",
550+
unresolvedArg("field", stringLiteral("test")),
551+
unresolvedArg("query", stringLiteral("query_value*"))));
552+
}
553+
554+
@Test
555+
void wildcard_query_expression_all_params() {
556+
assertAnalyzeEqual(
557+
DSL.wildcard_query(
558+
DSL.namedArgument("field", DSL.literal("test")),
559+
DSL.namedArgument("query", DSL.literal("query_value*")),
560+
DSL.namedArgument("boost", DSL.literal("1.5")),
561+
DSL.namedArgument("case_insensitive", DSL.literal("true")),
562+
DSL.namedArgument("rewrite", DSL.literal("scoring_boolean"))),
563+
AstDSL.function("wildcard_query",
564+
unresolvedArg("field", stringLiteral("test")),
565+
unresolvedArg("query", stringLiteral("query_value*")),
566+
unresolvedArg("boost", stringLiteral("1.5")),
567+
unresolvedArg("case_insensitive", stringLiteral("true")),
568+
unresolvedArg("rewrite", stringLiteral("scoring_boolean"))));
569+
}
570+
543571
@Test
544572
public void match_phrase_prefix_all_params() {
545573
assertAnalyzeEqual(

core/src/test/java/org/opensearch/sql/expression/function/OpenSearchFunctionsTest.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,4 +197,12 @@ void query_string() {
197197
fields.getValue(), query.getValue()),
198198
expr.toString());
199199
}
200+
201+
@Test
202+
void wildcard_query() {
203+
FunctionExpression expr = DSL.wildcard_query(field, query);
204+
assertEquals(String.format("wildcard_query(field=%s, query=%s)",
205+
field.getValue(), query.getValue()),
206+
expr.toString());
207+
}
200208
}

docs/user/dql/functions.rst

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3118,6 +3118,58 @@ Example searching for field Tags::
31183118
| [Winnie-the-<em>Pooh</em>] |
31193119
+----------------------------------------------+
31203120

3121+
WILDCARD_QUERY
3122+
------------
3123+
3124+
Description
3125+
>>>>>>>>>>>
3126+
3127+
``wildcard_query(field_expression, query_expression[, option=<option_value>]*)``
3128+
3129+
The ``wildcard_query`` function maps to the ``wildcard_query`` query used in search engine. It returns documents that match provided text in the specified field.
3130+
OpenSearch supports wildcard characters ``*`` and ``?``. See the full description here: https://opensearch.org/docs/latest/opensearch/query-dsl/term/#wildcards.
3131+
You may include a backslash ``\`` to escape SQL wildcard characters ``\%`` and ``\_``.
3132+
3133+
Available parameters include:
3134+
3135+
- boost
3136+
- case_insensitive
3137+
- rewrite
3138+
3139+
For backward compatibility, ``wildcardquery`` is also supported and mapped to ``wildcard_query`` query as well.
3140+
3141+
Example with only ``field`` and ``query`` expressions, and all other parameters are set default values::
3142+
3143+
os> select Body from wildcard where wildcard_query(Body, 'test wildcard*');
3144+
fetched rows / total rows = 7/7
3145+
+-------------------------------------------+
3146+
| Body |
3147+
|-------------------------------------------|
3148+
| test wildcard |
3149+
| test wildcard in the end of the text% |
3150+
| test wildcard in % the middle of the text |
3151+
| test wildcard %% beside each other |
3152+
| test wildcard in the end of the text_ |
3153+
| test wildcard in _ the middle of the text |
3154+
| test wildcard __ beside each other |
3155+
+-------------------------------------------+
3156+
3157+
Another example to show how to set custom values for the optional parameters::
3158+
3159+
os> select Body from wildcard where wildcard_query(Body, 'test wildcard*', boost=0.7, case_insensitive=true, rewrite='constant_score');
3160+
fetched rows / total rows = 7/7
3161+
+-------------------------------------------+
3162+
| Body |
3163+
|-------------------------------------------|
3164+
| test wildcard |
3165+
| test wildcard in the end of the text% |
3166+
| test wildcard in % the middle of the text |
3167+
| test wildcard %% beside each other |
3168+
| test wildcard in the end of the text_ |
3169+
| test wildcard in _ the middle of the text |
3170+
| test wildcard __ beside each other |
3171+
+-------------------------------------------+
3172+
31213173
System Functions
31223174
================
31233175

@@ -3142,3 +3194,5 @@ Example::
31423194
|----------------+---------------+-----------------+------------------|
31433195
| DATE | INTEGER | DATETIME | STRUCT |
31443196
+----------------+---------------+-----------------+------------------+
3197+
3198+

doctest/test_data/wildcard.json

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
{"index":{"_id":"0"}}
2+
{"Body":"test wildcard"}
3+
{"index":{"_id":"1"}}
4+
{"Body":"test wildcard in the end of the text%"}
5+
{"index":{"_id":"2"}}
6+
{"Body":"%test wildcard in the beginning of the text"}
7+
{"index":{"_id":"3"}}
8+
{"Body":"test wildcard in % the middle of the text"}
9+
{"index":{"_id":"4"}}
10+
{"Body":"test wildcard %% beside each other"}
11+
{"index":{"_id":"5"}}
12+
{"Body":"test wildcard in the end of the text_"}
13+
{"index":{"_id":"6"}}
14+
{"Body":"_test wildcard in the beginning of the text"}
15+
{"index":{"_id":"7"}}
16+
{"Body":"test wildcard in _ the middle of the text"}
17+
{"index":{"_id":"8"}}
18+
{"Body":"test wildcard __ beside each other"}
19+
{"index":{"_id":"9"}}
20+
{"Body":"test backslash wildcard \\_"}

doctest/test_docs.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
NYC_TAXI = "nyc_taxi"
2727
BOOKS = "books"
2828
APACHE = "apache"
29+
WILDCARD = "wildcard"
2930

3031

3132
class DocTestConnection(OpenSearchConnection):
@@ -92,6 +93,7 @@ def set_up_test_indices(test):
9293
load_file("nyc_taxi.json", index_name=NYC_TAXI)
9394
load_file("books.json", index_name=BOOKS)
9495
load_file("apache.json", index_name=APACHE)
96+
load_file("wildcard.json", index_name=WILDCARD)
9597

9698

9799
def load_file(filename, index_name):
@@ -120,7 +122,7 @@ def set_up(test):
120122

121123
def tear_down(test):
122124
# drop leftover tables after each test
123-
test_data_client.indices.delete(index=[ACCOUNTS, EMPLOYEES, PEOPLE, ACCOUNT2, NYC_TAXI, BOOKS, APACHE], ignore_unavailable=True)
125+
test_data_client.indices.delete(index=[ACCOUNTS, EMPLOYEES, PEOPLE, ACCOUNT2, NYC_TAXI, BOOKS, APACHE, WILDCARD], ignore_unavailable=True)
124126

125127

126128
docsuite = partial(doctest.DocFileSuite,

doctest/test_mapping/wildcard.json

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
{
2+
"mappings" : {
3+
"properties" : {
4+
"Body" : {
5+
"type" : "keyword"
6+
}
7+
}
8+
}
9+
}

integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -584,7 +584,11 @@ public enum Index {
584584
CALCS(TestsConstants.TEST_INDEX_CALCS,
585585
"calcs",
586586
getMappingFile("calcs_index_mappings.json"),
587-
"src/test/resources/calcs.json"),;
587+
"src/test/resources/calcs.json"),
588+
WILDCARD(TestsConstants.TEST_INDEX_WILDCARD,
589+
"wildcard",
590+
getMappingFile("wildcard_index_mappings.json"),
591+
"src/test/resources/wildcard.json"),;
588592

589593
private final String name;
590594
private final String type;

integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ public class TestsConstants {
5353
public final static String TEST_INDEX_BEER = TEST_INDEX + "_beer";
5454
public final static String TEST_INDEX_NULL_MISSING = TEST_INDEX + "_null_missing";
5555
public final static String TEST_INDEX_CALCS = TEST_INDEX + "_calcs";
56+
public final static String TEST_INDEX_WILDCARD = TEST_INDEX + "_wildcard";
5657

5758
public final static String DATE_FORMAT = "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'";
5859
public final static String TS_DATE_FORMAT = "yyyy-MM-dd HH:mm:ss.SSS";

0 commit comments

Comments
 (0)