Skip to content

Commit 6e46072

Browse files
authored
[BugFix] fix wrong order by scope for distinct query (backport #37910) (#38086)
Signed-off-by: packy92 <[email protected]>
1 parent fac5bfb commit 6e46072

File tree

6 files changed

+169
-25
lines changed

6 files changed

+169
-25
lines changed

fe/fe-core/src/main/java/com/starrocks/qe/SessionVariable.java

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -375,6 +375,8 @@ public class SessionVariable implements Serializable, Writable, Cloneable {
375375

376376
public static final String LARGE_DECIMAL_UNDERLYING_TYPE = "large_decimal_underlying_type";
377377

378+
public static final String ENABLE_STRICT_ORDER_BY = "enable_strict_order_by";
379+
378380
// Flag to control whether to proxy follower's query statement to leader/follower.
379381
public enum FollowerQueryForwardMode {
380382
DEFAULT, // proxy queries by the follower's replay progress (default)
@@ -1270,6 +1272,9 @@ public String getCboEqBaseType() {
12701272
@VarAttr(name = FOLLOWER_QUERY_FORWARD_MODE, flag = VariableMgr.INVISIBLE | VariableMgr.DISABLE_FORWARD_TO_LEADER)
12711273
private String followerForwardMode = "";
12721274

1275+
@VarAttr(name = ENABLE_STRICT_ORDER_BY)
1276+
private boolean enableStrictOrderBy = true;
1277+
12731278
public void setFollowerQueryForwardMode(String mode) {
12741279
this.followerForwardMode = mode;
12751280
}
@@ -2398,6 +2403,14 @@ public void setCrossJoinCostPenalty(long crossJoinCostPenalty) {
23982403
this.crossJoinCostPenalty = crossJoinCostPenalty;
23992404
}
24002405

2406+
public boolean isEnableStrictOrderBy() {
2407+
return enableStrictOrderBy;
2408+
}
2409+
2410+
public void setEnableStrictOrderBy(boolean enableStrictOrderBy) {
2411+
this.enableStrictOrderBy = enableStrictOrderBy;
2412+
}
2413+
24012414
// Serialize to thrift object
24022415
// used for rest api
24032416
public TQueryOptions toThrift() {

fe/fe-core/src/main/java/com/starrocks/sql/analyzer/AggregationAnalyzer.java

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
import com.starrocks.qe.SqlModeHelper;
4848
import com.starrocks.sql.ast.ArrayExpr;
4949
import com.starrocks.sql.ast.AstVisitor;
50+
import com.starrocks.sql.ast.FieldReference;
5051
import com.starrocks.sql.ast.LambdaFunctionExpr;
5152
import com.starrocks.sql.ast.QueryStatement;
5253

@@ -116,9 +117,16 @@ public Boolean visit(ParseNode expr) {
116117
return super.visit(expr);
117118
}
118119

120+
@Override
121+
public Boolean visitFieldReference(FieldReference node, Void context) {
122+
String colInfo = node.getTblName() == null ? "column" : "column of " + node.getTblName().toString();
123+
throw new SemanticException(colInfo + " must appear in the GROUP BY clause or be used in an aggregate function");
124+
}
125+
119126
@Override
120127
public Boolean visitExpression(Expr node, Void context) {
121-
throw new SemanticException("%s is not support in GROUP BY clause", node.toSql());
128+
throw new SemanticException(node.toSql() +
129+
" must appear in the GROUP BY clause or be used in an aggregate function");
122130
}
123131

124132
private boolean isGroupingKey(Expr node) {

fe/fe-core/src/main/java/com/starrocks/sql/analyzer/SelectAnalyzer.java

Lines changed: 62 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
package com.starrocks.sql.analyzer;
1616

17+
import com.google.common.base.Preconditions;
1718
import com.google.common.collect.ImmutableList;
1819
import com.google.common.collect.Lists;
1920
import com.google.common.collect.Sets;
@@ -85,10 +86,11 @@ public void analyze(AnalyzeState analyzeState,
8586
analyzeHaving(havingClause, analyzeState, sourceScope, outputScope, outputExpressions);
8687

8788
// Construct sourceAndOutputScope with sourceScope and outputScope
88-
Scope sourceAndOutputScope = computeAndAssignOrderScope(analyzeState, sourceScope, outputScope);
89+
Scope sourceAndOutputScope = computeAndAssignOrderScope(analyzeState, sourceScope, outputScope,
90+
selectList.isDistinct());
8991

9092
List<OrderByElement> orderByElements =
91-
analyzeOrderBy(sortClause, analyzeState, sourceAndOutputScope, outputExpressions);
93+
analyzeOrderBy(sortClause, analyzeState, sourceAndOutputScope, outputExpressions, selectList.isDistinct());
9294
List<Expr> orderByExpressions =
9395
orderByElements.stream().map(OrderByElement::getExpr).collect(Collectors.toList());
9496

@@ -182,7 +184,7 @@ public void analyze(AnalyzeState analyzeState,
182184
.collect(Collectors.toList());
183185

184186
Scope sourceScopeForOrder = new Scope(RelationId.anonymous(), new RelationFields(sourceForOrderFields));
185-
computeAndAssignOrderScope(analyzeState, sourceScopeForOrder, outputScope);
187+
computeAndAssignOrderScope(analyzeState, sourceScopeForOrder, outputScope, selectList.isDistinct());
186188
analyzeState.setOrderSourceExpressions(orderSourceExpressions);
187189
}
188190

@@ -316,7 +318,8 @@ private List<Expr> analyzeSelect(SelectList selectList, Relation fromRelation, b
316318

317319
private List<OrderByElement> analyzeOrderBy(List<OrderByElement> orderByElements, AnalyzeState analyzeState,
318320
Scope orderByScope,
319-
List<Expr> outputExpressions) {
321+
List<Expr> outputExpressions,
322+
boolean isDistinct) {
320323
if (orderByElements == null) {
321324
analyzeState.setOrderBy(Collections.emptyList());
322325
return Collections.emptyList();
@@ -332,20 +335,32 @@ private List<OrderByElement> analyzeOrderBy(List<OrderByElement> orderByElements
332335
if (ordinal < 1 || ordinal > outputExpressions.size()) {
333336
throw new SemanticException("ORDER BY position %s is not in select list", ordinal);
334337
}
338+
// index can ensure no ambiguous, we don't need to re-analyze this output expression
335339
expression = outputExpressions.get((int) ordinal - 1);
336-
}
337-
338-
if (expression instanceof FieldReference) {
339-
// If the expression of order by is a FieldReference, it means that the type of sql is
340+
} else if (expression instanceof FieldReference) {
341+
// If the expression of order by is a FieldReference, and it's not a distinct select,
342+
// it means that the type of sql is
340343
// "select * from t order by 1", then this FieldReference cannot be parsed in OrderByScope,
341344
// but should be parsed in sourceScope
342-
analyzeExpression(expression, analyzeState, orderByScope.getParent());
345+
if (isDistinct) {
346+
analyzeExpression(expression, analyzeState, orderByScope);
347+
} else {
348+
analyzeExpression(expression, analyzeState, orderByScope.getParent());
349+
}
343350
} else {
344351
ExpressionAnalyzer expressionAnalyzer = new ExpressionAnalyzer(session);
345352
expressionAnalyzer.analyzeWithoutUpdateState(expression, analyzeState, orderByScope);
346353
List<Expr> aggregations = Lists.newArrayList();
347354
expression.collectAll(e -> e.isAggregate(), aggregations);
348-
aggregations.forEach(e -> analyzeExpression(e, analyzeState, orderByScope.getParent()));
355+
if (isDistinct && !aggregations.isEmpty()) {
356+
throw new SemanticException("for SELECT DISTINCT, ORDER BY expressions must appear in select list");
357+
}
358+
359+
if (!aggregations.isEmpty()) {
360+
// use parent scope to analyze agg func firstly
361+
Preconditions.checkState(orderByScope.getParent() != null, "parent scope not be set");
362+
aggregations.forEach(e -> analyzeExpression(e, analyzeState, orderByScope.getParent()));
363+
}
349364
analyzeExpression(expression, analyzeState, orderByScope);
350365
}
351366

@@ -659,23 +674,24 @@ public Expr visitSlot(SlotRef slotRef, Void context) {
659674
}
660675
}
661676

662-
private Scope computeAndAssignOrderScope(AnalyzeState analyzeState, Scope sourceScope, Scope outputScope) {
663-
// The Scope used by order by allows parsing of the same column,
664-
// such as 'select v1 as v, v1 as v from t0 order by v'
665-
// but normal parsing does not allow it. So add a de-duplication operation here.
677+
private Scope computeAndAssignOrderScope(AnalyzeState analyzeState, Scope sourceScope, Scope outputScope,
678+
boolean isDistinct) {
679+
680+
List<Field> allFields = Lists.newArrayList();
681+
// order by can only "see" fields from distinct output
682+
if (isDistinct) {
683+
allFields = removeDuplicateField(outputScope.getRelationFields().getAllFields());
684+
Scope orderScope = new Scope(outputScope.getRelationId(), new RelationFields(allFields));
685+
analyzeState.setOrderScope(orderScope);
686+
return orderScope;
687+
}
666688

667-
List<Field> allFields = new ArrayList<>();
668689
for (int i = 0; i < analyzeState.getOutputExprInOrderByScope().size(); ++i) {
669690
Field field = outputScope.getRelationFields()
670691
.getFieldByIndex(analyzeState.getOutputExprInOrderByScope().get(i));
671-
if (field.getName() != null && field.getOriginExpression() != null &&
672-
allFields.stream().anyMatch(f -> f.getOriginExpression() != null
673-
&& f.getName() != null && field.getName().equals(f.getName())
674-
&& field.getOriginExpression().equals(f.getOriginExpression()))) {
675-
continue;
676-
}
677692
allFields.add(field);
678693
}
694+
allFields = removeDuplicateField(allFields);
679695

680696
Scope orderScope = new Scope(outputScope.getRelationId(), new RelationFields(allFields));
681697

@@ -692,4 +708,29 @@ private Scope computeAndAssignOrderScope(AnalyzeState analyzeState, Scope source
692708
private void analyzeExpression(Expr expr, AnalyzeState analyzeState, Scope scope) {
693709
ExpressionAnalyzer.analyzeExpression(expr, analyzeState, scope, session);
694710
}
711+
712+
713+
// The Scope used by order by allows parsing of the same column,
714+
// such as 'select v1 as v, v1 as v from t0 order by v'
715+
// but normal parsing does not allow it. So add a de-duplication operation here.
716+
private List<Field> removeDuplicateField(List<Field> originalFields) {
717+
List<Field> allFields = Lists.newArrayList();
718+
for (Field field : originalFields) {
719+
if (session.getSessionVariable().isEnableStrictOrderBy()) {
720+
if (field.getName() != null && field.getOriginExpression() != null &&
721+
allFields.stream().anyMatch(f -> f.getOriginExpression() != null
722+
&& f.getName() != null && field.getName().equals(f.getName())
723+
&& field.getOriginExpression().equals(f.getOriginExpression()))) {
724+
continue;
725+
}
726+
} else {
727+
if (field.getName() != null &&
728+
allFields.stream().anyMatch(f -> f.getName() != null && field.getName().equals(f.getName()))) {
729+
continue;
730+
}
731+
}
732+
allFields.add(field);
733+
}
734+
return allFields;
735+
}
695736
}

fe/fe-core/src/main/java/com/starrocks/sql/ast/FieldReference.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,10 @@ public int getFieldIndex() {
4848
return fieldIndex;
4949
}
5050

51+
public TableName getTblName() {
52+
return tblName;
53+
}
54+
5155
@Override
5256
public boolean equals(Object o) {
5357
if (this == o) {

fe/fe-core/src/test/java/com/starrocks/sql/analyzer/AnalyzeAggregateTest.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -122,9 +122,11 @@ public void testDistinct() {
122122
analyzeSuccess("select distinct v1, v2 as v from t0 order by v");
123123
analyzeSuccess("select distinct abs(v1) as v from t0 order by v");
124124
analyzeFail("select distinct v1 from t0 order by v2",
125-
"must be an aggregate expression or appear in GROUP BY clause");
125+
"Column 'v2' cannot be resolved");
126126
analyzeFail("select distinct v1 as v from t0 order by v2",
127-
"must be an aggregate expression or appear in GROUP BY clause");
127+
"Column 'v2' cannot be resolved");
128+
analyzeFail("select * from t0 order by max(v2)",
129+
"column must appear in the GROUP BY clause or be used in an aggregate function");
128130

129131
analyzeSuccess("select distinct v1 as v from t0 having v = 1");
130132
analyzeFail("select distinct v1 as v from t0 having v2 = 2",

fe/fe-core/src/test/java/com/starrocks/sql/plan/OrderByTest.java

Lines changed: 77 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,26 @@
1515

1616
package com.starrocks.sql.plan;
1717

18+
import com.google.common.collect.Lists;
1819
import com.starrocks.qe.SessionVariable;
20+
import com.starrocks.sql.analyzer.SemanticException;
1921
import org.junit.Assert;
20-
import org.junit.Test;
22+
import org.junit.jupiter.api.BeforeAll;
23+
import org.junit.jupiter.api.Test;
24+
import org.junit.jupiter.params.ParameterizedTest;
25+
import org.junit.jupiter.params.provider.Arguments;
26+
import org.junit.jupiter.params.provider.MethodSource;
27+
28+
import java.util.List;
29+
import java.util.stream.Stream;
2130

2231
public class OrderByTest extends PlanTestBase {
2332

33+
@BeforeAll
34+
public static void beforeClass() throws Exception {
35+
PlanTestBase.beforeClass();
36+
}
37+
2438
@Test
2539
public void testExistOrderBy() throws Exception {
2640
String sql = "SELECT * \n" +
@@ -602,4 +616,66 @@ public void testTopNFilterWithProject() throws Exception {
602616
" | build runtime filters:\n" +
603617
" | - filter_id = 0, build_expr = (<slot 1> 1: t1a), remote = false");
604618
}
619+
620+
@ParameterizedTest
621+
@MethodSource("failToStrictSql")
622+
void testFailToStrictOrderByExpression(String sql) {
623+
Assert.assertThrows(SemanticException.class, () -> getFragmentPlan(sql));
624+
}
625+
626+
@ParameterizedTest
627+
@MethodSource("successToStrictSql")
628+
void testSuccessToStrictOrderByExpression(String sql, String expectedPlan) throws Exception {
629+
String plan = getFragmentPlan(sql);
630+
assertContains(plan, expectedPlan);
631+
}
632+
633+
@ParameterizedTest
634+
@MethodSource("allOrderBySql")
635+
void testNotStrictOrderByExpression(String sql, String expectedPlan) throws Exception {
636+
String hint = "select /*+ set_var(enable_strict_order_by = false) */ ";
637+
sql = hint + sql.substring(7);
638+
String plan = getFragmentPlan(sql);
639+
assertContains(plan, expectedPlan);
640+
}
641+
642+
private static Stream<Arguments> allOrderBySql() {
643+
return Stream.concat(successToStrictSql(), failToStrictSql());
644+
}
645+
646+
private static Stream<Arguments> successToStrictSql() {
647+
List<Arguments> list = Lists.newArrayList();
648+
list.add(Arguments.of("select * from t0 order by 1", "order by: <slot 1> 1: v1 ASC"));
649+
list.add(Arguments.of("select abs(v1) v1, * from t0 order by 1", "order by: <slot 4> 4: abs ASC"));
650+
list.add(Arguments.of("select distinct * from t0 order by 1", "order by: <slot 1> 1: v1 ASC"));
651+
list.add(Arguments.of("select * from t0 order by v1", "order by: <slot 1> 1: v1 ASC"));
652+
list.add(Arguments.of("select t0.* from t0 order by t0.v1", "order by: <slot 1> 1: v1 ASC"));
653+
list.add(Arguments.of("select distinct t0.* from t0 order by t0.v1", "order by: <slot 1> 1: v1 ASC"));
654+
655+
656+
list.add(Arguments.of("select *, v1 from t0 order by v1", "order by: <slot 1> 1: v1 ASC"));
657+
list.add(Arguments.of("select *, v1 from t0 order by abs(v1)", "order by: <slot 4> 4: abs ASC"));
658+
list.add(Arguments.of("select v1, * from t0 order by abs(v1)", "order by: <slot 4> 4: abs ASC"));
659+
list.add(Arguments.of("select distinct * from t0 order by v1", "order by: <slot 1> 1: v1 ASC"));
660+
list.add(Arguments.of("select distinct *, v1 from t0 order by abs(v1)", "order by: <slot 4> 4: abs ASC"));
661+
return list.stream();
662+
}
663+
664+
private static Stream<Arguments> failToStrictSql() {
665+
List<Arguments> list = Lists.newArrayList();
666+
list.add(Arguments.of("select *, v1, abs(v1) v1 from t0 order by 1", "order by: <slot 1> 1: v1 ASC"));
667+
list.add(Arguments.of("select distinct *, v1, abs(v1) v1 from t0 order by 1", "order by: <slot 1> 1: v1 ASC"));
668+
list.add(Arguments.of("select distinct abs(v1) v1, * from t0 order by 1", "order by: <slot 4> 4: abs ASC"));
669+
list.add(Arguments.of("select distinct *, v1, abs(v1) v1 from t0 order by v1", "order by: <slot 1> 1: v1 ASC"));
670+
list.add(Arguments.of("select v1, max(v2) v1 from t0 group by v1 order by abs(v1)", "order by: <slot 5> 5: abs ASC"));
671+
list.add(Arguments.of("select max(v2) v1, v1 from t0 group by v1 order by abs(v1)", "order by: <slot 5> 5: abs ASC"));
672+
list.add(Arguments.of("select v2, max(v2) v2 from t0 group by v2 order by max(v2)", "order by: <slot 4> 4: max ASC"));
673+
list.add(Arguments.of("select max(v2) v2, v2 from t0 group by v2 order by max(v2)", "order by: <slot 4> 4: max ASC"));
674+
list.add(Arguments.of("select upper(v1) v1, *, v1 from t0 order by v1", "order by: <slot 4> 4: upper ASC"));
675+
list.add(Arguments.of("select *, v1, upper(v1) v1 from t0 order by v1", "order by: <slot 1> 1: v1 ASC"));
676+
list.add(Arguments.of("select distinct upper(v1) v1, *, v1 from t0 order by v1", "order by: <slot 1> 1: v1 ASC"));
677+
list.add(Arguments.of("select distinct *, v1, upper(v1) v1 from t0 order by v1", "order by: <slot 1> 1: v1 ASC"));
678+
679+
return list.stream();
680+
}
605681
}

0 commit comments

Comments
 (0)