Skip to content
This repository was archived by the owner on Aug 2, 2022. It is now read-only.

Commit 0cb3240

Browse files
authored
Support SELECT DISTINCT in new SQL engine (#833)
* Change grammar * Add UT and AST builder * Pass jacoco * Pass jacoco * Add comparison test * Add doctest * Change doc * Prepare PR * Add doc for distinct * limitation
1 parent 9f65c67 commit 0cb3240

File tree

7 files changed

+98
-9
lines changed

7 files changed

+98
-9
lines changed

docs/user/dql/basics.rst

+13-2
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ Syntax
2020

2121
The syntax of ``SELECT`` statement is as follows::
2222

23-
SELECT [DISTINCT] (* | expression) [[AS] alias] [, ...]
23+
SELECT [ALL | DISTINCT] (* | expression) [[AS] alias] [, ...]
2424
FROM index_name
2525
[WHERE predicates]
2626
[GROUP BY expression [, ...]
@@ -199,7 +199,7 @@ Result set:
199199
Example 4: Selecting Distinct Fields
200200
------------------------------------
201201

202-
``DISTINCT`` is useful when you want to de-duplicate and get unique field value. You can provide one or more field names.
202+
By default, ``SELECT ALL`` takes effect to return all rows. ``DISTINCT`` is useful when you want to de-duplicate and get unique field value. You can provide one or more field names ('DISTINCT *' is not supported yet).
203203
204204
SQL query::
205205

@@ -255,6 +255,17 @@ Result set:
255255
| 36|
256256
+---+
257257

258+
In fact your can use any expression in a ``DISTINCT`` clause as follows::
259+
260+
od> SELECT DISTINCT SUBSTRING(lastname, 1, 1) FROM accounts;
261+
fetched rows / total rows = 3/3
262+
+-----------------------------+
263+
| SUBSTRING(lastname, 1, 1) |
264+
|-----------------------------|
265+
| A |
266+
| B |
267+
| D |
268+
+-----------------------------+
258269

259270
FROM
260271
====

integ-test/src/test/resources/correctness/queries/select.txt

+5
Original file line numberDiff line numberDiff line change
@@ -20,3 +20,8 @@ SELECT AvgTicketPrice, Carrier FROM kibana_sample_data_flights WHERE AvgTicketPr
2020
SELECT AvgTicketPrice, Carrier FROM kibana_sample_data_flights WHERE ABS(AvgTicketPrice * -2) > 1000
2121
SELECT AvgTicketPrice, Carrier FROM kibana_sample_data_flights WHERE Carrier LIKE 'JetBeat_'
2222
SELECT AvgTicketPrice, Carrier FROM kibana_sample_data_flights WHERE Carrier LIKE '%Air%'
23+
SELECT ALL OriginWeather FROM kibana_sample_data_flights
24+
SELECT DISTINCT OriginWeather FROM kibana_sample_data_flights
25+
SELECT DISTINCT OriginWeather, FlightDelay FROM kibana_sample_data_flights
26+
SELECT DISTINCT SUBSTRING(OriginWeather, 1, 1) AS origin FROM kibana_sample_data_flights
27+
SELECT DISTINCT SUBSTRING(OriginWeather, 1, 1) AS origin, FlightDelay FROM kibana_sample_data_flights

sql/src/main/antlr/OpenDistroSQLParser.g4

+5-1
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,11 @@ querySpecification
6464
;
6565

6666
selectClause
67-
: SELECT selectElements
67+
: SELECT selectSpec? selectElements
68+
;
69+
70+
selectSpec
71+
: (ALL | DISTINCT)
6872
;
6973

7074
selectElements

sql/src/main/java/com/amazon/opendistroforelasticsearch/sql/sql/parser/AstAggregationBuilder.java

+4-6
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@
2727
import com.amazon.opendistroforelasticsearch.sql.ast.tree.UnresolvedPlan;
2828
import com.amazon.opendistroforelasticsearch.sql.common.utils.StringUtils;
2929
import com.amazon.opendistroforelasticsearch.sql.exception.SemanticCheckException;
30-
import com.amazon.opendistroforelasticsearch.sql.sql.antlr.parser.OpenDistroSQLParser.GroupByClauseContext;
3130
import com.amazon.opendistroforelasticsearch.sql.sql.antlr.parser.OpenDistroSQLParserBaseVisitor;
3231
import com.amazon.opendistroforelasticsearch.sql.sql.parser.context.QuerySpecification;
3332
import java.util.ArrayList;
@@ -42,7 +41,7 @@
4241
* AST aggregation builder that builds AST aggregation node for the following scenarios:
4342
*
4443
* 1. Explicit GROUP BY
45-
* 1.1 Group by column name or scalar expression:
44+
* 1.1 Group by column name or scalar expression (SELECT DISTINCT equivalent):
4645
* SELECT ABS(age) FROM test GROUP BY ABS(age)
4746
* 1.2 Group by alias in SELECT AS clause:
4847
* SELECT state AS s FROM test GROUP BY s
@@ -77,18 +76,17 @@ public class AstAggregationBuilder extends OpenDistroSQLParserBaseVisitor<Unreso
7776

7877
@Override
7978
public UnresolvedPlan visit(ParseTree groupByClause) {
80-
if (groupByClause == null) {
79+
if (querySpec.getGroupByItems().isEmpty()) {
8180
if (isAggregatorNotFoundAnywhere()) {
8281
// Simple select query without GROUP BY and aggregate function in SELECT
8382
return null;
8483
}
8584
return buildImplicitAggregation();
8685
}
87-
return super.visit(groupByClause);
86+
return buildExplicitAggregation();
8887
}
8988

90-
@Override
91-
public UnresolvedPlan visitGroupByClause(GroupByClauseContext ctx) {
89+
private UnresolvedPlan buildExplicitAggregation() {
9290
List<UnresolvedExpression> groupByItems = replaceGroupByItemIfAliasOrOrdinal();
9391
return new Aggregation(
9492
new ArrayList<>(querySpec.getAggregators()),

sql/src/main/java/com/amazon/opendistroforelasticsearch/sql/sql/parser/context/QuerySpecification.java

+17
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
import static com.amazon.opendistroforelasticsearch.sql.sql.antlr.parser.OpenDistroSQLParser.GroupByElementContext;
2020
import static com.amazon.opendistroforelasticsearch.sql.sql.antlr.parser.OpenDistroSQLParser.OrderByElementContext;
21+
import static com.amazon.opendistroforelasticsearch.sql.sql.antlr.parser.OpenDistroSQLParser.SelectClauseContext;
2122
import static com.amazon.opendistroforelasticsearch.sql.sql.antlr.parser.OpenDistroSQLParser.SelectElementContext;
2223
import static com.amazon.opendistroforelasticsearch.sql.sql.antlr.parser.OpenDistroSQLParser.SubqueryAsRelationContext;
2324
import static com.amazon.opendistroforelasticsearch.sql.sql.parser.ParserUtils.getTextInQuery;
@@ -34,6 +35,7 @@
3435
import com.amazon.opendistroforelasticsearch.sql.exception.SemanticCheckException;
3536
import com.amazon.opendistroforelasticsearch.sql.sql.antlr.parser.OpenDistroSQLParser.AggregateFunctionCallContext;
3637
import com.amazon.opendistroforelasticsearch.sql.sql.antlr.parser.OpenDistroSQLParser.QuerySpecificationContext;
38+
import com.amazon.opendistroforelasticsearch.sql.sql.antlr.parser.OpenDistroSQLParser.SelectSpecContext;
3739
import com.amazon.opendistroforelasticsearch.sql.sql.antlr.parser.OpenDistroSQLParserBaseVisitor;
3840
import com.amazon.opendistroforelasticsearch.sql.sql.parser.AstExpressionBuilder;
3941
import java.util.ArrayList;
@@ -181,6 +183,17 @@ public Void visitSubqueryAsRelation(SubqueryAsRelationContext ctx) {
181183
return null;
182184
}
183185

186+
@Override
187+
public Void visitSelectClause(SelectClauseContext ctx) {
188+
super.visitSelectClause(ctx);
189+
190+
// SELECT DISTINCT is an equivalent and special form of GROUP BY
191+
if (isDistinct(ctx.selectSpec())) {
192+
groupByItems.addAll(selectItems);
193+
}
194+
return null;
195+
}
196+
184197
@Override
185198
public Void visitSelectElement(SelectElementContext ctx) {
186199
UnresolvedExpression expr = visitAstExpression(ctx.expression());
@@ -215,6 +228,10 @@ public Void visitAggregateFunctionCall(AggregateFunctionCallContext ctx) {
215228
return super.visitAggregateFunctionCall(ctx);
216229
}
217230

231+
private boolean isDistinct(SelectSpecContext ctx) {
232+
return (ctx != null) && (ctx.DISTINCT() != null);
233+
}
234+
218235
private SortOrder visitSortOrder(Token ctx) {
219236
if (ctx == null) {
220237
return null;

sql/src/test/java/com/amazon/opendistroforelasticsearch/sql/sql/antlr/SQLSyntaxParserTest.java

+6
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,12 @@ public void canParseGroupByClause() {
116116
assertNotNull(parser.parse("SELECT ABS(balance) FROM test GROUP BY 1"));
117117
}
118118

119+
@Test
120+
public void canParseDistinctClause() {
121+
assertNotNull(parser.parse("SELECT DISTINCT name FROM test"));
122+
assertNotNull(parser.parse("SELECT DISTINCT name, balance FROM test"));
123+
}
124+
119125
@Test
120126
public void canParseCaseStatement() {
121127
assertNotNull(parser.parse("SELECT CASE WHEN age > 30 THEN 'age1' ELSE 'age2' END FROM test"));

sql/src/test/java/com/amazon/opendistroforelasticsearch/sql/sql/parser/AstBuilderTest.java

+48
Original file line numberDiff line numberDiff line change
@@ -402,6 +402,54 @@ public void can_build_order_by_multiple_field_names() {
402402
buildAST("SELECT name, age FROM test ORDER BY name, age DESC"));
403403
}
404404

405+
@Test
406+
public void can_build_select_distinct_clause() {
407+
assertEquals(
408+
project(
409+
agg(
410+
relation("test"),
411+
emptyList(),
412+
emptyList(),
413+
ImmutableList.of(
414+
alias("name", qualifiedName("name")),
415+
alias("age", qualifiedName("age"))),
416+
emptyList()),
417+
alias("name", qualifiedName("name")),
418+
alias("age", qualifiedName("age"))),
419+
buildAST("SELECT DISTINCT name, age FROM test"));
420+
}
421+
422+
@Test
423+
public void can_build_select_distinct_clause_with_function() {
424+
assertEquals(
425+
project(
426+
agg(
427+
relation("test"),
428+
emptyList(),
429+
emptyList(),
430+
ImmutableList.of(
431+
alias("SUBSTRING(name, 1, 2)",
432+
function(
433+
"SUBSTRING",
434+
qualifiedName("name"),
435+
intLiteral(1), intLiteral(2)))),
436+
emptyList()),
437+
alias("SUBSTRING(name, 1, 2)",
438+
function(
439+
"SUBSTRING",
440+
qualifiedName("name"),
441+
intLiteral(1), intLiteral(2)))),
442+
buildAST("SELECT DISTINCT SUBSTRING(name, 1, 2) FROM test"));
443+
}
444+
445+
@Test
446+
public void can_build_select_all_clause() {
447+
assertEquals(
448+
buildAST("SELECT name, age FROM test"),
449+
buildAST("SELECT ALL name, age FROM test")
450+
);
451+
}
452+
405453
@Test
406454
public void can_build_order_by_null_option() {
407455
assertEquals(

0 commit comments

Comments
 (0)