Skip to content

Commit 128d83a

Browse files
committed
[BugFix] fix wrong order by scope for distinct query (backport StarRocks#37910)
Signed-off-by: packy92 <[email protected]>
1 parent a32e3ee commit 128d83a

File tree

7 files changed

+194
-48
lines changed

7 files changed

+194
-48
lines changed

fe/fe-core/src/main/java/com/starrocks/qe/SessionVariable.java

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -423,6 +423,8 @@ public class SessionVariable implements Serializable, Writable, Cloneable {
423423

424424
public static final String ENABLE_PLAN_SERIALIZE_CONCURRENTLY = "enable_plan_serialize_concurrently";
425425

426+
public static final String ENABLE_STRICT_ORDER_BY = "enable_strict_order_by";
427+
426428
// Flag to control whether to proxy follower's query statement to leader/follower.
427429
public enum FollowerQueryForwardMode {
428430
DEFAULT, // proxy queries by the follower's replay progress (default)
@@ -1552,6 +1554,9 @@ public boolean isCboPredicateSubfieldPath() {
15521554
@VarAttr(name = FOLLOWER_QUERY_FORWARD_MODE, flag = VariableMgr.INVISIBLE | VariableMgr.DISABLE_FORWARD_TO_LEADER)
15531555
private String followerForwardMode = "";
15541556

1557+
@VarAttr(name = ENABLE_STRICT_ORDER_BY)
1558+
private boolean enableStrictOrderBy = true;
1559+
15551560
public void setFollowerQueryForwardMode(String mode) {
15561561
this.followerForwardMode = mode;
15571562
}
@@ -2897,6 +2902,14 @@ public void setSkewJoinRandRange(int skewJoinRandRange) {
28972902
this.skewJoinRandRange = skewJoinRandRange;
28982903
}
28992904

2905+
public boolean isEnableStrictOrderBy() {
2906+
return enableStrictOrderBy;
2907+
}
2908+
2909+
public void setEnableStrictOrderBy(boolean enableStrictOrderBy) {
2910+
this.enableStrictOrderBy = enableStrictOrderBy;
2911+
}
2912+
29002913
// Serialize to thrift object
29012914
// used for rest api
29022915
public TQueryOptions toThrift() {

fe/fe-core/src/main/java/com/starrocks/sql/analyzer/AggregationAnalyzer.java

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
import com.starrocks.qe.SqlModeHelper;
4848
import com.starrocks.sql.ast.ArrayExpr;
4949
import com.starrocks.sql.ast.AstVisitor;
50+
import com.starrocks.sql.ast.FieldReference;
5051
import com.starrocks.sql.ast.LambdaFunctionExpr;
5152
import com.starrocks.sql.ast.QueryStatement;
5253

@@ -116,9 +117,16 @@ public Boolean visit(ParseNode expr) {
116117
return super.visit(expr);
117118
}
118119

120+
@Override
121+
public Boolean visitFieldReference(FieldReference node, Void context) {
122+
String colInfo = node.getTblName() == null ? "column" : "column of " + node.getTblName().toString();
123+
throw new SemanticException(colInfo + " must appear in the GROUP BY clause or be used in an aggregate function",
124+
node.getPos());
125+
}
126+
119127
@Override
120128
public Boolean visitExpression(Expr node, Void context) {
121-
throw new SemanticException(PARSER_ERROR_MSG.unsupportedExprWithInfo(node.toSql(), "GROUP BY"),
129+
throw new SemanticException(node.toSql() + " must appear in the GROUP BY clause or be used in an aggregate function",
122130
node.getPos());
123131
}
124132

fe/fe-core/src/main/java/com/starrocks/sql/analyzer/SelectAnalyzer.java

Lines changed: 63 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
package com.starrocks.sql.analyzer;
1616

17+
import com.google.common.base.Preconditions;
1718
import com.google.common.collect.ImmutableList;
1819
import com.google.common.collect.Lists;
1920
import com.google.common.collect.Sets;
@@ -86,10 +87,11 @@ public void analyze(AnalyzeState analyzeState,
8687
analyzeHaving(havingClause, analyzeState, sourceScope, outputScope, outputExpressions);
8788

8889
// Construct sourceAndOutputScope with sourceScope and outputScope
89-
Scope sourceAndOutputScope = computeAndAssignOrderScope(analyzeState, sourceScope, outputScope);
90+
Scope sourceAndOutputScope = computeAndAssignOrderScope(analyzeState, sourceScope, outputScope,
91+
selectList.isDistinct());
9092

9193
List<OrderByElement> orderByElements =
92-
analyzeOrderBy(sortClause, analyzeState, sourceAndOutputScope, outputExpressions);
94+
analyzeOrderBy(sortClause, analyzeState, sourceAndOutputScope, outputExpressions, selectList.isDistinct());
9395
List<Expr> orderByExpressions =
9496
orderByElements.stream().map(OrderByElement::getExpr).collect(Collectors.toList());
9597

@@ -183,7 +185,7 @@ public void analyze(AnalyzeState analyzeState,
183185
.collect(Collectors.toList());
184186

185187
Scope sourceScopeForOrder = new Scope(RelationId.anonymous(), new RelationFields(sourceForOrderFields));
186-
computeAndAssignOrderScope(analyzeState, sourceScopeForOrder, outputScope);
188+
computeAndAssignOrderScope(analyzeState, sourceScopeForOrder, outputScope, selectList.isDistinct());
187189
analyzeState.setOrderSourceExpressions(orderSourceExpressions);
188190
}
189191

@@ -311,7 +313,8 @@ private List<Expr> analyzeSelect(SelectList selectList, Relation fromRelation, b
311313

312314
private List<OrderByElement> analyzeOrderBy(List<OrderByElement> orderByElements, AnalyzeState analyzeState,
313315
Scope orderByScope,
314-
List<Expr> outputExpressions) {
316+
List<Expr> outputExpressions,
317+
boolean isDistinct) {
315318
if (orderByElements == null) {
316319
analyzeState.setOrderBy(Collections.emptyList());
317320
return Collections.emptyList();
@@ -327,20 +330,33 @@ private List<OrderByElement> analyzeOrderBy(List<OrderByElement> orderByElements
327330
if (ordinal < 1 || ordinal > outputExpressions.size()) {
328331
throw new SemanticException("ORDER BY position %s is not in select list", ordinal);
329332
}
333+
// index can ensure no ambiguous, we don't need to re-analyze this output expression
330334
expression = outputExpressions.get((int) ordinal - 1);
331-
}
332-
333-
if (expression instanceof FieldReference) {
334-
// If the expression of order by is a FieldReference, it means that the type of sql is
335+
} else if (expression instanceof FieldReference) {
336+
// If the expression of order by is a FieldReference, and it's not a distinct select,
337+
// it means that the type of sql is
335338
// "select * from t order by 1", then this FieldReference cannot be parsed in OrderByScope,
336339
// but should be parsed in sourceScope
337-
analyzeExpression(expression, analyzeState, orderByScope.getParent());
340+
if (isDistinct) {
341+
analyzeExpression(expression, analyzeState, orderByScope);
342+
} else {
343+
analyzeExpression(expression, analyzeState, orderByScope.getParent());
344+
}
338345
} else {
339346
ExpressionAnalyzer expressionAnalyzer = new ExpressionAnalyzer(session);
340347
expressionAnalyzer.analyzeWithoutUpdateState(expression, analyzeState, orderByScope);
341348
List<Expr> aggregations = Lists.newArrayList();
342349
expression.collectAll(e -> e.isAggregate(), aggregations);
343-
aggregations.forEach(e -> analyzeExpression(e, analyzeState, orderByScope.getParent()));
350+
if (isDistinct && !aggregations.isEmpty()) {
351+
throw new SemanticException("for SELECT DISTINCT, ORDER BY expressions must appear in select list",
352+
expression.getPos());
353+
}
354+
355+
if (!aggregations.isEmpty()) {
356+
// use parent scope to analyze agg func firstly
357+
Preconditions.checkState(orderByScope.getParent() != null, "parent scope not be set");
358+
aggregations.forEach(e -> analyzeExpression(e, analyzeState, orderByScope.getParent()));
359+
}
344360
analyzeExpression(expression, analyzeState, orderByScope);
345361
}
346362

@@ -655,23 +671,24 @@ public Expr visitSlot(SlotRef slotRef, Void context) {
655671
}
656672
}
657673

658-
private Scope computeAndAssignOrderScope(AnalyzeState analyzeState, Scope sourceScope, Scope outputScope) {
659-
// The Scope used by order by allows parsing of the same column,
660-
// such as 'select v1 as v, v1 as v from t0 order by v'
661-
// but normal parsing does not allow it. So add a de-duplication operation here.
674+
private Scope computeAndAssignOrderScope(AnalyzeState analyzeState, Scope sourceScope, Scope outputScope,
675+
boolean isDistinct) {
676+
677+
List<Field> allFields = Lists.newArrayList();
678+
// order by can only "see" fields from distinct output
679+
if (isDistinct) {
680+
allFields = removeDuplicateField(outputScope.getRelationFields().getAllFields());
681+
Scope orderScope = new Scope(outputScope.getRelationId(), new RelationFields(allFields));
682+
analyzeState.setOrderScope(orderScope);
683+
return orderScope;
684+
}
662685

663-
List<Field> allFields = new ArrayList<>();
664686
for (int i = 0; i < analyzeState.getOutputExprInOrderByScope().size(); ++i) {
665687
Field field = outputScope.getRelationFields()
666688
.getFieldByIndex(analyzeState.getOutputExprInOrderByScope().get(i));
667-
if (field.getName() != null && field.getOriginExpression() != null &&
668-
allFields.stream().anyMatch(f -> f.getOriginExpression() != null
669-
&& f.getName() != null && field.getName().equals(f.getName())
670-
&& field.getOriginExpression().equals(f.getOriginExpression()))) {
671-
continue;
672-
}
673689
allFields.add(field);
674690
}
691+
allFields = removeDuplicateField(allFields);
675692

676693
Scope orderScope = new Scope(outputScope.getRelationId(), new RelationFields(allFields));
677694

@@ -688,4 +705,29 @@ private Scope computeAndAssignOrderScope(AnalyzeState analyzeState, Scope source
688705
private void analyzeExpression(Expr expr, AnalyzeState analyzeState, Scope scope) {
689706
ExpressionAnalyzer.analyzeExpression(expr, analyzeState, scope, session);
690707
}
708+
709+
710+
// The Scope used by order by allows parsing of the same column,
711+
// such as 'select v1 as v, v1 as v from t0 order by v'
712+
// but normal parsing does not allow it. So add a de-duplication operation here.
713+
private List<Field> removeDuplicateField(List<Field> originalFields) {
714+
List<Field> allFields = Lists.newArrayList();
715+
for (Field field : originalFields) {
716+
if (session.getSessionVariable().isEnableStrictOrderBy()) {
717+
if (field.getName() != null && field.getOriginExpression() != null &&
718+
allFields.stream().anyMatch(f -> f.getOriginExpression() != null
719+
&& f.getName() != null && field.getName().equals(f.getName())
720+
&& field.getOriginExpression().equals(f.getOriginExpression()))) {
721+
continue;
722+
}
723+
} else {
724+
if (field.getName() != null &&
725+
allFields.stream().anyMatch(f -> f.getName() != null && field.getName().equals(f.getName()))) {
726+
continue;
727+
}
728+
}
729+
allFields.add(field);
730+
}
731+
return allFields;
732+
}
691733
}

fe/fe-core/src/main/java/com/starrocks/sql/ast/FieldReference.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,10 @@ public int getFieldIndex() {
4848
return fieldIndex;
4949
}
5050

51+
public TableName getTblName() {
52+
return tblName;
53+
}
54+
5155
@Override
5256
public boolean equals(Object o) {
5357
if (this == o) {

fe/fe-core/src/test/java/com/starrocks/sql/analyzer/AnalyzeAggregateTest.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -131,9 +131,11 @@ public void testDistinct() {
131131
analyzeSuccess("select distinct v1, v2 as v from t0 order by v");
132132
analyzeSuccess("select distinct abs(v1) as v from t0 order by v");
133133
analyzeFail("select distinct v1 from t0 order by v2",
134-
"must be an aggregate expression or appear in GROUP BY clause");
134+
"Column 'v2' cannot be resolved");
135135
analyzeFail("select distinct v1 as v from t0 order by v2",
136-
"must be an aggregate expression or appear in GROUP BY clause");
136+
"Column 'v2' cannot be resolved");
137+
analyzeFail("select * from t0 order by max(v2)",
138+
"column must appear in the GROUP BY clause or be used in an aggregate function.");
137139

138140
analyzeSuccess("select distinct v1 as v from t0 having v = 1");
139141
analyzeFail("select distinct v1 as v from t0 having v2 = 2",

fe/fe-core/src/test/java/com/starrocks/sql/parser/ParserTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -403,7 +403,7 @@ void testWrongVariableName() {
403403
assertContains(res, "{'pipeline_dop', 'pipeline_sink_dop', 'pipeline_profile_level'}");
404404

405405
res = VariableMgr.findSimilarVarNames("disable_joinreorder");
406-
assertContains(res, "{'disable_join_reorder', 'disable_colocate_join', 'enable_predicate_reorder'}");
406+
assertContains(res, "{'disable_join_reorder', 'disable_colocate_join'");
407407
}
408408

409409
@Test

0 commit comments

Comments
 (0)