Skip to content

Commit 5aa86d7

Browse files
acarbonetto14yapkc1kenrickyap
authored
PPL: Add json function and cast(x as json) function (#3243)
* added implementation Signed-off-by: Kenrick Yap <[email protected]> * added doctest, integ-tests, and unit tests Signed-off-by: Kenrick Yap <[email protected]> * addressed pr comments Signed-off-by: Kenrick Yap <[email protected]> * addressed PR comments Signed-off-by: Kenrick Yap <[email protected]> * removed unused dependencies Signed-off-by: Kenrick Yap <[email protected]> * linting Signed-off-by: Kenrick Yap <[email protected]> * addressed pr comment and rolling back disabled test case Signed-off-by: Kenrick Yap <[email protected]> * removed disabled import Signed-off-by: Kenrick Yap <[email protected]> * nit Signed-off-by: Kenrick Yap <[email protected]> * Update integ-test/src/test/java/org/opensearch/sql/ppl/JsonFunctionIT.java Co-authored-by: Andrew Carbonetto <[email protected]> Signed-off-by: kenrickyap <[email protected]> * fixed integ test Signed-off-by: Kenrick Yap <[email protected]> * SQL: adding error case unit tests for json_valid Signed-off-by: Andrew Carbonetto <[email protected]> * json_valid: null and missing should return false Signed-off-by: Andrew Carbonetto <[email protected]> * PPL: Add json and cast to json functions Signed-off-by: Andrew Carbonetto <[email protected]> * PPL: Update json cast for review Signed-off-by: Andrew Carbonetto <[email protected]> * Fix testes Signed-off-by: Andrew Carbonetto <[email protected]> * spotless Signed-off-by: Andrew Carbonetto <[email protected]> * Fix tests Signed-off-by: Andrew Carbonetto <[email protected]> * SPOTLESS Signed-off-by: Andrew Carbonetto <[email protected]> * Clean up for merge Signed-off-by: Andrew Carbonetto <[email protected]> * clean up unit tests Signed-off-by: Andrew Carbonetto <[email protected]> * Add casting from undefined Signed-off-by: Andrew Carbonetto <[email protected]> * Add cast to scalar from undefined expression Signed-off-by: Andrew Carbonetto <[email protected]> * Add test for missing/null Signed-off-by: Andrew Carbonetto <[email protected]> * Clean up merge conflicts Signed-off-by: Andrew Carbonetto <[email protected]> * Fix jacoco coverage Signed-off-by: Andrew Carbonetto <[email protected]> * Move to Switch by json type Signed-off-by: Andrew Carbonetto <[email protected]> * Remove conflicted files Signed-off-by: Andrew Carbonetto <[email protected]> * Add doctext row Signed-off-by: Andrew Carbonetto <[email protected]> * Adding more json tests Signed-off-by: Andrew Carbonetto <[email protected]> * adding more json IT tests Signed-off-by: Andrew Carbonetto <[email protected]> --------- Signed-off-by: Kenrick Yap <[email protected]> Signed-off-by: Kenrick Yap <[email protected]> Signed-off-by: kenrickyap <[email protected]> Signed-off-by: Andrew Carbonetto <[email protected]> Signed-off-by: Andrew Carbonetto <[email protected]> Co-authored-by: Kenrick Yap <[email protected]> Co-authored-by: Kenrick Yap <[email protected]> Co-authored-by: kenrickyap <[email protected]>
1 parent 08a1899 commit 5aa86d7

File tree

13 files changed

+645
-37
lines changed

13 files changed

+645
-37
lines changed

core/src/main/java/org/opensearch/sql/ast/expression/Cast.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
import static org.opensearch.sql.expression.function.BuiltinFunctionName.CAST_TO_FLOAT;
1414
import static org.opensearch.sql.expression.function.BuiltinFunctionName.CAST_TO_INT;
1515
import static org.opensearch.sql.expression.function.BuiltinFunctionName.CAST_TO_IP;
16+
import static org.opensearch.sql.expression.function.BuiltinFunctionName.CAST_TO_JSON;
1617
import static org.opensearch.sql.expression.function.BuiltinFunctionName.CAST_TO_LONG;
1718
import static org.opensearch.sql.expression.function.BuiltinFunctionName.CAST_TO_SHORT;
1819
import static org.opensearch.sql.expression.function.BuiltinFunctionName.CAST_TO_STRING;
@@ -56,6 +57,7 @@ public class Cast extends UnresolvedExpression {
5657
.put("timestamp", CAST_TO_TIMESTAMP.getName())
5758
.put("datetime", CAST_TO_DATETIME.getName())
5859
.put("ip", CAST_TO_IP.getName())
60+
.put("json", CAST_TO_JSON.getName())
5961
.build();
6062

6163
/** The source expression cast from. */

core/src/main/java/org/opensearch/sql/expression/DSL.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -687,6 +687,10 @@ public static FunctionExpression jsonValid(Expression... expressions) {
687687
return compile(FunctionProperties.None, BuiltinFunctionName.JSON_VALID, expressions);
688688
}
689689

690+
public static FunctionExpression stringToJson(Expression value) {
691+
return compile(FunctionProperties.None, BuiltinFunctionName.JSON, value);
692+
}
693+
690694
public static Aggregator avg(Expression... expressions) {
691695
return aggregate(BuiltinFunctionName.AVG, expressions);
692696
}
@@ -843,6 +847,10 @@ public static FunctionExpression castIp(Expression value) {
843847
return compile(FunctionProperties.None, BuiltinFunctionName.CAST_TO_IP, value);
844848
}
845849

850+
public static FunctionExpression castJson(Expression value) {
851+
return compile(FunctionProperties.None, BuiltinFunctionName.CAST_TO_JSON, value);
852+
}
853+
846854
public static FunctionExpression typeof(Expression value) {
847855
return compile(FunctionProperties.None, BuiltinFunctionName.TYPEOF, value);
848856
}

core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,7 @@ public enum BuiltinFunctionName {
206206

207207
/** Json Functions. */
208208
JSON_VALID(FunctionName.of("json_valid")),
209+
JSON(FunctionName.of("json")),
209210

210211
/** GEOSPATIAL Functions. */
211212
GEOIP(FunctionName.of("geoip")),
@@ -238,6 +239,7 @@ public enum BuiltinFunctionName {
238239
CAST_TO_TIMESTAMP(FunctionName.of("cast_to_timestamp")),
239240
CAST_TO_DATETIME(FunctionName.of("cast_to_datetime")),
240241
CAST_TO_IP(FunctionName.of("cast_to_ip")),
242+
CAST_TO_JSON(FunctionName.of("cast_to_json")),
241243
TYPEOF(FunctionName.of("typeof")),
242244

243245
/** Relevance Function. */

core/src/main/java/org/opensearch/sql/expression/json/JsonFunctions.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,10 @@
77

88
import static org.opensearch.sql.data.type.ExprCoreType.BOOLEAN;
99
import static org.opensearch.sql.data.type.ExprCoreType.STRING;
10+
import static org.opensearch.sql.data.type.ExprCoreType.UNDEFINED;
1011
import static org.opensearch.sql.expression.function.FunctionDSL.define;
1112
import static org.opensearch.sql.expression.function.FunctionDSL.impl;
13+
import static org.opensearch.sql.expression.function.FunctionDSL.nullMissingHandling;
1214

1315
import lombok.experimental.UtilityClass;
1416
import org.opensearch.sql.expression.function.BuiltinFunctionName;
@@ -20,10 +22,17 @@
2022
public class JsonFunctions {
2123
public void register(BuiltinFunctionRepository repository) {
2224
repository.register(jsonValid());
25+
repository.register(jsonFunction());
2326
}
2427

2528
private DefaultFunctionResolver jsonValid() {
2629
return define(
2730
BuiltinFunctionName.JSON_VALID.getName(), impl(JsonUtils::isValidJson, BOOLEAN, STRING));
2831
}
32+
33+
private DefaultFunctionResolver jsonFunction() {
34+
return define(
35+
BuiltinFunctionName.JSON.getName(),
36+
impl(nullMissingHandling(JsonUtils::castJson), UNDEFINED, STRING));
37+
}
2938
}

core/src/main/java/org/opensearch/sql/expression/operator/convert/TypeCastOperators.java

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import static org.opensearch.sql.data.type.ExprCoreType.STRING;
1818
import static org.opensearch.sql.data.type.ExprCoreType.TIME;
1919
import static org.opensearch.sql.data.type.ExprCoreType.TIMESTAMP;
20+
import static org.opensearch.sql.data.type.ExprCoreType.UNDEFINED;
2021
import static org.opensearch.sql.expression.function.FunctionDSL.impl;
2122
import static org.opensearch.sql.expression.function.FunctionDSL.implWithProperties;
2223
import static org.opensearch.sql.expression.function.FunctionDSL.nullMissingHandling;
@@ -42,6 +43,7 @@
4243
import org.opensearch.sql.expression.function.BuiltinFunctionRepository;
4344
import org.opensearch.sql.expression.function.DefaultFunctionResolver;
4445
import org.opensearch.sql.expression.function.FunctionDSL;
46+
import org.opensearch.sql.utils.JsonUtils;
4547

4648
@UtilityClass
4749
public class TypeCastOperators {
@@ -57,6 +59,7 @@ public static void register(BuiltinFunctionRepository repository) {
5759
repository.register(castToDouble());
5860
repository.register(castToBoolean());
5961
repository.register(castToIp());
62+
repository.register(castToJson());
6063
repository.register(castToDate());
6164
repository.register(castToTime());
6265
repository.register(castToTimestamp());
@@ -105,7 +108,8 @@ private static DefaultFunctionResolver castToShort() {
105108
impl(
106109
nullMissingHandling((v) -> new ExprShortValue(v.booleanValue() ? 1 : 0)),
107110
SHORT,
108-
BOOLEAN));
111+
BOOLEAN),
112+
impl(nullMissingHandling((v) -> v), SHORT, UNDEFINED));
109113
}
110114

111115
private static DefaultFunctionResolver castToInt() {
@@ -119,7 +123,8 @@ private static DefaultFunctionResolver castToInt() {
119123
impl(
120124
nullMissingHandling((v) -> new ExprIntegerValue(v.booleanValue() ? 1 : 0)),
121125
INTEGER,
122-
BOOLEAN));
126+
BOOLEAN),
127+
impl(nullMissingHandling((v) -> v), INTEGER, UNDEFINED));
123128
}
124129

125130
private static DefaultFunctionResolver castToLong() {
@@ -133,7 +138,8 @@ private static DefaultFunctionResolver castToLong() {
133138
impl(
134139
nullMissingHandling((v) -> new ExprLongValue(v.booleanValue() ? 1L : 0L)),
135140
LONG,
136-
BOOLEAN));
141+
BOOLEAN),
142+
impl(nullMissingHandling((v) -> v), LONG, UNDEFINED));
137143
}
138144

139145
private static DefaultFunctionResolver castToFloat() {
@@ -147,7 +153,8 @@ private static DefaultFunctionResolver castToFloat() {
147153
impl(
148154
nullMissingHandling((v) -> new ExprFloatValue(v.booleanValue() ? 1f : 0f)),
149155
FLOAT,
150-
BOOLEAN));
156+
BOOLEAN),
157+
impl(nullMissingHandling((v) -> v), FLOAT, UNDEFINED));
151158
}
152159

153160
private static DefaultFunctionResolver castToDouble() {
@@ -161,7 +168,8 @@ private static DefaultFunctionResolver castToDouble() {
161168
impl(
162169
nullMissingHandling((v) -> new ExprDoubleValue(v.booleanValue() ? 1D : 0D)),
163170
DOUBLE,
164-
BOOLEAN));
171+
BOOLEAN),
172+
impl(nullMissingHandling((v) -> v), DOUBLE, UNDEFINED));
165173
}
166174

167175
private static DefaultFunctionResolver castToBoolean() {
@@ -173,7 +181,8 @@ private static DefaultFunctionResolver castToBoolean() {
173181
STRING),
174182
impl(
175183
nullMissingHandling((v) -> ExprBooleanValue.of(v.doubleValue() != 0)), BOOLEAN, DOUBLE),
176-
impl(nullMissingHandling((v) -> v), BOOLEAN, BOOLEAN));
184+
impl(nullMissingHandling((v) -> v), BOOLEAN, BOOLEAN),
185+
impl(nullMissingHandling((v) -> v), BOOLEAN, UNDEFINED));
177186
}
178187

179188
private static DefaultFunctionResolver castToIp() {
@@ -183,6 +192,12 @@ private static DefaultFunctionResolver castToIp() {
183192
impl(nullMissingHandling((v) -> v), IP, IP));
184193
}
185194

195+
private static DefaultFunctionResolver castToJson() {
196+
return FunctionDSL.define(
197+
BuiltinFunctionName.CAST_TO_JSON.getName(),
198+
impl(nullMissingHandling(JsonUtils::castJson), UNDEFINED, STRING));
199+
}
200+
186201
private static DefaultFunctionResolver castToDate() {
187202
return FunctionDSL.define(
188203
BuiltinFunctionName.CAST_TO_DATE.getName(),
Lines changed: 84 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,40 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
16
package org.opensearch.sql.utils;
27

8+
import static org.opensearch.sql.data.model.ExprValueUtils.LITERAL_FALSE;
9+
import static org.opensearch.sql.data.model.ExprValueUtils.LITERAL_NULL;
10+
import static org.opensearch.sql.data.model.ExprValueUtils.LITERAL_TRUE;
11+
312
import com.fasterxml.jackson.core.JsonProcessingException;
13+
import com.fasterxml.jackson.databind.JsonNode;
414
import com.fasterxml.jackson.databind.ObjectMapper;
15+
import java.util.LinkedHashMap;
16+
import java.util.LinkedList;
17+
import java.util.List;
18+
import java.util.Map;
519
import lombok.experimental.UtilityClass;
20+
import org.opensearch.sql.data.model.ExprBooleanValue;
21+
import org.opensearch.sql.data.model.ExprCollectionValue;
22+
import org.opensearch.sql.data.model.ExprDoubleValue;
23+
import org.opensearch.sql.data.model.ExprIntegerValue;
24+
import org.opensearch.sql.data.model.ExprNullValue;
25+
import org.opensearch.sql.data.model.ExprStringValue;
26+
import org.opensearch.sql.data.model.ExprTupleValue;
627
import org.opensearch.sql.data.model.ExprValue;
728
import org.opensearch.sql.data.model.ExprValueUtils;
29+
import org.opensearch.sql.exception.SemanticCheckException;
830

931
@UtilityClass
1032
public class JsonUtils {
1133
/**
1234
* Checks if given JSON string can be parsed as valid JSON.
1335
*
1436
* @param jsonExprValue JSON string (e.g. "{\"hello\": \"world\"}").
15-
* @return true if the string can be parsed as valid JSON, else false.
37+
* @return true if the string can be parsed as valid JSON, else false (including null or missing).
1638
*/
1739
public static ExprValue isValidJson(ExprValue jsonExprValue) {
1840
ObjectMapper objectMapper = new ObjectMapper();
@@ -23,9 +45,68 @@ public static ExprValue isValidJson(ExprValue jsonExprValue) {
2345

2446
try {
2547
objectMapper.readTree(jsonExprValue.stringValue());
26-
return ExprValueUtils.LITERAL_TRUE;
48+
return LITERAL_TRUE;
2749
} catch (JsonProcessingException e) {
28-
return ExprValueUtils.LITERAL_FALSE;
50+
return LITERAL_FALSE;
51+
}
52+
}
53+
54+
/**
55+
* Converts a JSON encoded string to a {@link ExprValue}. Expression type will be UNDEFINED.
56+
*
57+
* @param json JSON string (e.g. "{\"hello\": \"world\"}").
58+
* @return ExprValue returns an expression that best represents the provided JSON-encoded string.
59+
* <ol>
60+
* <li>{@link ExprTupleValue} if the JSON is an object
61+
* <li>{@link ExprCollectionValue} if the JSON is an array
62+
* <li>{@link ExprDoubleValue} if the JSON is a floating-point number scalar
63+
* <li>{@link ExprIntegerValue} if the JSON is an integral number scalar
64+
* <li>{@link ExprStringValue} if the JSON is a string scalar
65+
* <li>{@link ExprBooleanValue} if the JSON is a boolean scalar
66+
* <li>{@link ExprNullValue} if the JSON is null, empty, or invalid
67+
* </ol>
68+
*/
69+
public static ExprValue castJson(ExprValue json) {
70+
ObjectMapper objectMapper = new ObjectMapper();
71+
JsonNode jsonNode;
72+
try {
73+
jsonNode = objectMapper.readTree(json.stringValue());
74+
} catch (JsonProcessingException e) {
75+
final String errorFormat = "JSON string '%s' is not valid. Error details: %s";
76+
throw new SemanticCheckException(String.format(errorFormat, json, e.getMessage()), e);
77+
}
78+
79+
return processJsonNode(jsonNode);
80+
}
81+
82+
private static ExprValue processJsonNode(JsonNode jsonNode) {
83+
switch (jsonNode.getNodeType()) {
84+
case ARRAY:
85+
List<ExprValue> elements = new LinkedList<>();
86+
for (var iter = jsonNode.iterator(); iter.hasNext(); ) {
87+
jsonNode = iter.next();
88+
elements.add(processJsonNode(jsonNode));
89+
}
90+
return new ExprCollectionValue(elements);
91+
case OBJECT:
92+
Map<String, ExprValue> values = new LinkedHashMap<>();
93+
for (var iter = jsonNode.fields(); iter.hasNext(); ) {
94+
Map.Entry<String, JsonNode> entry = iter.next();
95+
values.put(entry.getKey(), processJsonNode(entry.getValue()));
96+
}
97+
return ExprTupleValue.fromExprValueMap(values);
98+
case STRING:
99+
return new ExprStringValue(jsonNode.asText());
100+
case NUMBER:
101+
if (jsonNode.isFloatingPointNumber()) {
102+
return new ExprDoubleValue(jsonNode.asDouble());
103+
}
104+
return new ExprIntegerValue(jsonNode.asLong());
105+
case BOOLEAN:
106+
return jsonNode.asBoolean() ? LITERAL_TRUE : LITERAL_FALSE;
107+
default:
108+
// in all other cases, return null
109+
return LITERAL_NULL;
29110
}
30111
}
31112
}

0 commit comments

Comments
 (0)