Skip to content

Commit d0093af

Browse files
authored
[BugFix] fix wrong order by scope for distinct query (backport #37910) (#38057)
Signed-off-by: packy92 <[email protected]>
1 parent 5f0c8df commit d0093af

File tree

6 files changed

+168
-26
lines changed

6 files changed

+168
-26
lines changed

fe/fe-core/src/main/java/com/starrocks/qe/SessionVariable.java

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -432,6 +432,8 @@ public static MaterializedViewRewriteMode parse(String str) {
432432

433433
public static final String CBO_EQ_BASE_TYPE = "cbo_eq_base_type";
434434

435+
public static final String ENABLE_STRICT_ORDER_BY = "enable_strict_order_by";
436+
435437
public static final List<String> DEPRECATED_VARIABLES = ImmutableList.<String>builder()
436438
.add(CODEGEN_LEVEL)
437439
.add(ENABLE_SPILLING)
@@ -1072,6 +1074,9 @@ public String getCboEqBaseType() {
10721074
@VarAttr(name = FOLLOWER_QUERY_FORWARD_MODE, flag = VariableMgr.INVISIBLE | VariableMgr.DISABLE_FORWARD_TO_LEADER)
10731075
private String followerForwardMode = "";
10741076

1077+
@VarAttr(name = ENABLE_STRICT_ORDER_BY)
1078+
private boolean enableStrictOrderBy = true;
1079+
10751080
public void setFollowerQueryForwardMode(String mode) {
10761081
this.followerForwardMode = mode;
10771082
}
@@ -1996,6 +2001,14 @@ public void setCrossJoinCostPenalty(long crossJoinCostPenalty) {
19962001
this.crossJoinCostPenalty = crossJoinCostPenalty;
19972002
}
19982003

2004+
public boolean isEnableStrictOrderBy() {
2005+
return enableStrictOrderBy;
2006+
}
2007+
2008+
public void setEnableStrictOrderBy(boolean enableStrictOrderBy) {
2009+
this.enableStrictOrderBy = enableStrictOrderBy;
2010+
}
2011+
19992012
// Serialize to thrift object
20002013
// used for rest api
20012014
public TQueryOptions toThrift() {

fe/fe-core/src/main/java/com/starrocks/sql/analyzer/AggregationAnalyzer.java

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
import com.starrocks.qe.ConnectContext;
3535
import com.starrocks.qe.SqlModeHelper;
3636
import com.starrocks.sql.ast.AstVisitor;
37+
import com.starrocks.sql.ast.FieldReference;
3738
import com.starrocks.sql.ast.LambdaFunctionExpr;
3839
import com.starrocks.sql.ast.QueryStatement;
3940

@@ -103,9 +104,15 @@ public Boolean visit(ParseNode expr) {
103104
return super.visit(expr);
104105
}
105106

107+
@Override
108+
public Boolean visitFieldReference(FieldReference node, Void context) {
109+
String colInfo = node.getTblName() == null ? "column" : "column of " + node.getTblName().toString();
110+
throw new SemanticException(colInfo + " must appear in the GROUP BY clause or be used in an aggregate function");
111+
}
112+
106113
@Override
107114
public Boolean visitExpression(Expr node, Void context) {
108-
throw new SemanticException("%s is not support in GROUP BY clause", node.toSql());
115+
throw new SemanticException(node.toSql() + " must appear in the GROUP BY clause or be used in an aggregate function");
109116
}
110117

111118
private boolean isGroupingKey(Expr node) {

fe/fe-core/src/main/java/com/starrocks/sql/analyzer/SelectAnalyzer.java

Lines changed: 61 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
// This file is licensed under the Elastic License 2.0. Copyright 2021-present, StarRocks Inc.
22
package com.starrocks.sql.analyzer;
33

4+
import com.google.common.base.Preconditions;
45
import com.google.common.collect.ImmutableList;
56
import com.google.common.collect.Lists;
67
import com.google.common.collect.Sets;
@@ -72,10 +73,11 @@ public void analyze(AnalyzeState analyzeState,
7273
analyzeHaving(havingClause, analyzeState, sourceScope, outputScope, outputExpressions);
7374

7475
// Construct sourceAndOutputScope with sourceScope and outputScope
75-
Scope sourceAndOutputScope = computeAndAssignOrderScope(analyzeState, sourceScope, outputScope);
76+
Scope sourceAndOutputScope = computeAndAssignOrderScope(analyzeState, sourceScope, outputScope,
77+
selectList.isDistinct());
7678

7779
List<OrderByElement> orderByElements =
78-
analyzeOrderBy(sortClause, analyzeState, sourceAndOutputScope, outputExpressions);
80+
analyzeOrderBy(sortClause, analyzeState, sourceAndOutputScope, outputExpressions, selectList.isDistinct());
7981
List<Expr> orderByExpressions =
8082
orderByElements.stream().map(OrderByElement::getExpr).collect(Collectors.toList());
8183

@@ -165,7 +167,7 @@ public void analyze(AnalyzeState analyzeState,
165167
.collect(Collectors.toList());
166168

167169
Scope sourceScopeForOrder = new Scope(RelationId.anonymous(), new RelationFields(sourceForOrderFields));
168-
computeAndAssignOrderScope(analyzeState, sourceScopeForOrder, outputScope);
170+
computeAndAssignOrderScope(analyzeState, sourceScopeForOrder, outputScope, selectList.isDistinct());
169171
analyzeState.setOrderSourceExpressions(orderSourceExpressions);
170172
}
171173

@@ -300,7 +302,8 @@ private List<Expr> analyzeSelect(SelectList selectList, Relation fromRelation, b
300302

301303
private List<OrderByElement> analyzeOrderBy(List<OrderByElement> orderByElements, AnalyzeState analyzeState,
302304
Scope orderByScope,
303-
List<Expr> outputExpressions) {
305+
List<Expr> outputExpressions,
306+
boolean isDistinct) {
304307
if (orderByElements == null) {
305308
analyzeState.setOrderBy(Collections.emptyList());
306309
return Collections.emptyList();
@@ -316,20 +319,32 @@ private List<OrderByElement> analyzeOrderBy(List<OrderByElement> orderByElements
316319
if (ordinal < 1 || ordinal > outputExpressions.size()) {
317320
throw new SemanticException("ORDER BY position %s is not in select list", ordinal);
318321
}
322+
// index can ensure no ambiguous, we don't need to re-analyze this output expression
319323
expression = outputExpressions.get((int) ordinal - 1);
320-
}
321-
322-
if (expression instanceof FieldReference) {
323-
// If the expression of order by is a FieldReference, it means that the type of sql is
324+
} else if (expression instanceof FieldReference) {
325+
// If the expression of order by is a FieldReference, and it's not a distinct select,
326+
// it means that the type of sql is
324327
// "select * from t order by 1", then this FieldReference cannot be parsed in OrderByScope,
325328
// but should be parsed in sourceScope
326-
analyzeExpression(expression, analyzeState, orderByScope.getParent());
329+
if (isDistinct) {
330+
analyzeExpression(expression, analyzeState, orderByScope);
331+
} else {
332+
analyzeExpression(expression, analyzeState, orderByScope.getParent());
333+
}
327334
} else {
328335
ExpressionAnalyzer expressionAnalyzer = new ExpressionAnalyzer(session);
329336
expressionAnalyzer.analyzeWithoutUpdateState(expression, analyzeState, orderByScope);
330337
List<Expr> aggregations = Lists.newArrayList();
331338
expression.collectAll(e -> e.isAggregate(), aggregations);
332-
aggregations.forEach(e -> analyzeExpression(e, analyzeState, orderByScope.getParent()));
339+
if (isDistinct && !aggregations.isEmpty()) {
340+
throw new SemanticException("for SELECT DISTINCT, ORDER BY expressions must appear in select list");
341+
}
342+
343+
if (!aggregations.isEmpty()) {
344+
// use parent scope to analyze agg func firstly
345+
Preconditions.checkState(orderByScope.getParent() != null, "parent scope not be set");
346+
aggregations.forEach(e -> analyzeExpression(e, analyzeState, orderByScope.getParent()));
347+
}
333348
analyzeExpression(expression, analyzeState, orderByScope);
334349
}
335350

@@ -643,23 +658,24 @@ public Expr visitSlot(SlotRef slotRef, Void context) {
643658
}
644659
}
645660

646-
private Scope computeAndAssignOrderScope(AnalyzeState analyzeState, Scope sourceScope, Scope outputScope) {
647-
// The Scope used by order by allows parsing of the same column,
648-
// such as 'select v1 as v, v1 as v from t0 order by v'
649-
// but normal parsing does not allow it. So add a de-duplication operation here.
661+
private Scope computeAndAssignOrderScope(AnalyzeState analyzeState, Scope sourceScope, Scope outputScope,
662+
boolean isDistinct) {
663+
664+
List<Field> allFields = Lists.newArrayList();
665+
// order by can only "see" fields from distinct output
666+
if (isDistinct) {
667+
allFields = removeDuplicateField(outputScope.getRelationFields().getAllFields());
668+
Scope orderScope = new Scope(outputScope.getRelationId(), new RelationFields(allFields));
669+
analyzeState.setOrderScope(orderScope);
670+
return orderScope;
671+
}
650672

651-
List<Field> allFields = new ArrayList<>();
652673
for (int i = 0; i < analyzeState.getOutputExprInOrderByScope().size(); ++i) {
653674
Field field = outputScope.getRelationFields()
654675
.getFieldByIndex(analyzeState.getOutputExprInOrderByScope().get(i));
655-
if (field.getName() != null && field.getOriginExpression() != null &&
656-
allFields.stream().anyMatch(f -> f.getOriginExpression() != null
657-
&& f.getName() != null && field.getName().equals(f.getName())
658-
&& field.getOriginExpression().equals(f.getOriginExpression()))) {
659-
continue;
660-
}
661676
allFields.add(field);
662677
}
678+
allFields = removeDuplicateField(allFields);
663679

664680
Scope orderScope = new Scope(outputScope.getRelationId(), new RelationFields(allFields));
665681

@@ -676,4 +692,28 @@ private Scope computeAndAssignOrderScope(AnalyzeState analyzeState, Scope source
676692
private void analyzeExpression(Expr expr, AnalyzeState analyzeState, Scope scope) {
677693
ExpressionAnalyzer.analyzeExpression(expr, analyzeState, scope, session);
678694
}
695+
696+
// The Scope used by order by allows parsing of the same column,
697+
// such as 'select v1 as v, v1 as v from t0 order by v'
698+
// but normal parsing does not allow it. So add a de-duplication operation here.
699+
private List<Field> removeDuplicateField(List<Field> originalFields) {
700+
List<Field> allFields = Lists.newArrayList();
701+
for (Field field : originalFields) {
702+
if (session.getSessionVariable().isEnableStrictOrderBy()) {
703+
if (field.getName() != null && field.getOriginExpression() != null &&
704+
allFields.stream().anyMatch(f -> f.getOriginExpression() != null
705+
&& f.getName() != null && field.getName().equals(f.getName())
706+
&& field.getOriginExpression().equals(f.getOriginExpression()))) {
707+
continue;
708+
}
709+
} else {
710+
if (field.getName() != null &&
711+
allFields.stream().anyMatch(f -> f.getName() != null && field.getName().equals(f.getName()))) {
712+
continue;
713+
}
714+
}
715+
allFields.add(field);
716+
}
717+
return allFields;
718+
}
679719
}

fe/fe-core/src/main/java/com/starrocks/sql/ast/FieldReference.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,10 @@ public int getFieldIndex() {
3535
return fieldIndex;
3636
}
3737

38+
public TableName getTblName() {
39+
return tblName;
40+
}
41+
3842
@Override
3943
public boolean equals(Object o) {
4044
if (this == o) {

fe/fe-core/src/test/java/com/starrocks/sql/analyzer/AnalyzeAggregateTest.java

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -109,9 +109,11 @@ public void testDistinct() {
109109
analyzeSuccess("select distinct v1, v2 as v from t0 order by v");
110110
analyzeSuccess("select distinct abs(v1) as v from t0 order by v");
111111
analyzeFail("select distinct v1 from t0 order by v2",
112-
"must be an aggregate expression or appear in GROUP BY clause");
112+
"Column 'v2' cannot be resolved");
113113
analyzeFail("select distinct v1 as v from t0 order by v2",
114-
"must be an aggregate expression or appear in GROUP BY clause");
114+
"Column 'v2' cannot be resolved");
115+
analyzeFail("select * from t0 order by max(v2)",
116+
"column must appear in the GROUP BY clause or be used in an aggregate function");
115117

116118
analyzeSuccess("select distinct v1 as v from t0 having v = 1");
117119
analyzeFail("select distinct v1 as v from t0 having v2 = 2",
@@ -131,7 +133,7 @@ public void testDistinct() {
131133
analyzeSuccess("select distinct v1 from t0 having v1 = 1");
132134
analyzeSuccess("select distinct v1 from t0 where v2 = 1");
133135
analyzeFail("select distinct v1,v2 from t0 order by v3");
134-
analyzeSuccess("select distinct v1 from t0 order by sum(v2)");
136+
analyzeFail("select distinct v1 from t0 order by sum(v2)");
135137

136138
analyzeFail("select count(distinct v1), count(distinct v3) from tarray",
137139
"No matching function with signature: multi_distinct_count(ARRAY)");

fe/fe-core/src/test/java/com/starrocks/sql/plan/OrderByTest.java

Lines changed: 77 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,26 @@
22

33
package com.starrocks.sql.plan;
44

5+
import com.google.common.collect.Lists;
56
import com.starrocks.qe.SessionVariable;
7+
import com.starrocks.sql.analyzer.SemanticException;
68
import org.junit.Assert;
7-
import org.junit.Test;
9+
import org.junit.jupiter.api.BeforeAll;
10+
import org.junit.jupiter.api.Test;
11+
import org.junit.jupiter.params.ParameterizedTest;
12+
import org.junit.jupiter.params.provider.Arguments;
13+
import org.junit.jupiter.params.provider.MethodSource;
14+
15+
import java.util.List;
16+
import java.util.stream.Stream;
817

918
public class OrderByTest extends PlanTestBase {
1019

20+
@BeforeAll
21+
public static void beforeClass() throws Exception {
22+
PlanTestBase.beforeClass();
23+
}
24+
1125
@Test
1226
public void testExistOrderBy() throws Exception {
1327
String sql = "SELECT * \n" +
@@ -482,4 +496,66 @@ public void testOrderByWithWindow() throws Exception {
482496
" | output: sum(2: v2)\n" +
483497
" | group by: 1: v1");
484498
}
499+
500+
@ParameterizedTest
501+
@MethodSource("failToStrictSql")
502+
void testFailToStrictOrderByExpression(String sql) {
503+
Assert.assertThrows(SemanticException.class, () -> getFragmentPlan(sql));
504+
}
505+
506+
@ParameterizedTest
507+
@MethodSource("successToStrictSql")
508+
void testSuccessToStrictOrderByExpression(String sql, String expectedPlan) throws Exception {
509+
String plan = getFragmentPlan(sql);
510+
assertContains(plan, expectedPlan);
511+
}
512+
513+
@ParameterizedTest
514+
@MethodSource("allOrderBySql")
515+
void testNotStrictOrderByExpression(String sql, String expectedPlan) throws Exception {
516+
String hint = "select /*+ set_var(enable_strict_order_by = false) */ ";
517+
sql = hint + sql.substring(7);
518+
String plan = getFragmentPlan(sql);
519+
assertContains(plan, expectedPlan);
520+
}
521+
522+
private static Stream<Arguments> allOrderBySql() {
523+
return Stream.concat(successToStrictSql(), failToStrictSql());
524+
}
525+
526+
private static Stream<Arguments> successToStrictSql() {
527+
List<Arguments> list = Lists.newArrayList();
528+
list.add(Arguments.of("select * from t0 order by 1", "order by: <slot 1> 1: v1 ASC"));
529+
list.add(Arguments.of("select abs(v1) v1, * from t0 order by 1", "order by: <slot 4> 4: abs ASC"));
530+
list.add(Arguments.of("select distinct * from t0 order by 1", "order by: <slot 1> 1: v1 ASC"));
531+
list.add(Arguments.of("select * from t0 order by v1", "order by: <slot 1> 1: v1 ASC"));
532+
list.add(Arguments.of("select t0.* from t0 order by t0.v1", "order by: <slot 1> 1: v1 ASC"));
533+
list.add(Arguments.of("select distinct t0.* from t0 order by t0.v1", "order by: <slot 1> 1: v1 ASC"));
534+
535+
536+
list.add(Arguments.of("select *, v1 from t0 order by v1", "order by: <slot 1> 1: v1 ASC"));
537+
list.add(Arguments.of("select *, v1 from t0 order by abs(v1)", "order by: <slot 4> 4: abs ASC"));
538+
list.add(Arguments.of("select v1, * from t0 order by abs(v1)", "order by: <slot 4> 4: abs ASC"));
539+
list.add(Arguments.of("select distinct * from t0 order by v1", "order by: <slot 1> 1: v1 ASC"));
540+
list.add(Arguments.of("select distinct *, v1 from t0 order by abs(v1)", "order by: <slot 4> 4: abs ASC"));
541+
return list.stream();
542+
}
543+
544+
private static Stream<Arguments> failToStrictSql() {
545+
List<Arguments> list = Lists.newArrayList();
546+
list.add(Arguments.of("select *, v1, abs(v1) v1 from t0 order by 1", "order by: <slot 1> 1: v1 ASC"));
547+
list.add(Arguments.of("select distinct *, v1, abs(v1) v1 from t0 order by 1", "order by: <slot 1> 1: v1 ASC"));
548+
list.add(Arguments.of("select distinct abs(v1) v1, * from t0 order by 1", "order by: <slot 4> 4: abs ASC"));
549+
list.add(Arguments.of("select distinct *, v1, abs(v1) v1 from t0 order by v1", "order by: <slot 1> 1: v1 ASC"));
550+
list.add(Arguments.of("select v1, max(v2) v1 from t0 group by v1 order by abs(v1)", "order by: <slot 5> 5: abs ASC"));
551+
list.add(Arguments.of("select max(v2) v1, v1 from t0 group by v1 order by abs(v1)", "order by: <slot 5> 5: abs ASC"));
552+
list.add(Arguments.of("select v2, max(v2) v2 from t0 group by v2 order by max(v2)", "order by: <slot 4> 4: max ASC"));
553+
list.add(Arguments.of("select max(v2) v2, v2 from t0 group by v2 order by max(v2)", "order by: <slot 4> 4: max ASC"));
554+
list.add(Arguments.of("select upper(v1) v1, *, v1 from t0 order by v1", "order by: <slot 4> 4: upper ASC"));
555+
list.add(Arguments.of("select *, v1, upper(v1) v1 from t0 order by v1", "order by: <slot 1> 1: v1 ASC"));
556+
list.add(Arguments.of("select distinct upper(v1) v1, *, v1 from t0 order by v1", "order by: <slot 1> 1: v1 ASC"));
557+
list.add(Arguments.of("select distinct *, v1, upper(v1) v1 from t0 order by v1", "order by: <slot 1> 1: v1 ASC"));
558+
559+
return list.stream();
560+
}
485561
}

0 commit comments

Comments
 (0)