Skip to content

Commit 1442a1f

Browse files
committed
[BugFix] fix wrong order by scope for distinct query (backport StarRocks#37910)
Signed-off-by: packy92 <[email protected]>
1 parent 456a5c7 commit 1442a1f

File tree

7 files changed

+196
-48
lines changed

7 files changed

+196
-48
lines changed

fe/fe-core/src/main/java/com/starrocks/qe/SessionVariable.java

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -423,6 +423,8 @@ public class SessionVariable implements Serializable, Writable, Cloneable {
423423
public static final String ENABLE_ICEBERG_IDENTITY_COLUMN_OPTIMIZE = "enable_iceberg_identity_column_optimize";
424424
public static final String ENABLE_PIPELINE_LEVEL_SHUFFLE = "enable_pipeline_level_shuffle";
425425

426+
public static final String ENABLE_STRICT_ORDER_BY = "enable_strict_order_by";
427+
426428
// Flag to control whether to proxy follower's query statement to leader/follower.
427429
public enum FollowerQueryForwardMode {
428430
DEFAULT, // proxy queries by the follower's replay progress (default)
@@ -1452,6 +1454,8 @@ public SessionVariableConstants.ChooseInstancesMode getChooseExecuteInstancesMod
14521454
@VarAttr(name = CBO_EQ_BASE_TYPE, flag = VariableMgr.INVISIBLE)
14531455
private String cboEqBaseType = SessionVariableConstants.VARCHAR;
14541456

1457+
1458+
14551459
public boolean isCboDecimalCastStringStrict() {
14561460
return cboDecimalCastStringStrict;
14571461
}
@@ -1489,6 +1493,9 @@ public void setCboDeriveRangeJoinPredicate(boolean cboDeriveRangeJoinPredicate)
14891493
@VarAttr(name = FOLLOWER_QUERY_FORWARD_MODE, flag = VariableMgr.INVISIBLE | VariableMgr.DISABLE_FORWARD_TO_LEADER)
14901494
private String followerForwardMode = "";
14911495

1496+
@VarAttr(name = ENABLE_STRICT_ORDER_BY)
1497+
private boolean enableStrictOrderBy = true;
1498+
14921499
public void setFollowerQueryForwardMode(String mode) {
14931500
this.followerForwardMode = mode;
14941501
}
@@ -2810,6 +2817,14 @@ public void setCrossJoinCostPenalty(long crossJoinCostPenalty) {
28102817
this.crossJoinCostPenalty = crossJoinCostPenalty;
28112818
}
28122819

2820+
public boolean isEnableStrictOrderBy() {
2821+
return enableStrictOrderBy;
2822+
}
2823+
2824+
public void setEnableStrictOrderBy(boolean enableStrictOrderBy) {
2825+
this.enableStrictOrderBy = enableStrictOrderBy;
2826+
}
2827+
28132828
// Serialize to thrift object
28142829
// used for rest api
28152830
public TQueryOptions toThrift() {

fe/fe-core/src/main/java/com/starrocks/sql/analyzer/AggregationAnalyzer.java

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
import com.starrocks.qe.SqlModeHelper;
4747
import com.starrocks.sql.ast.ArrayExpr;
4848
import com.starrocks.sql.ast.AstVisitor;
49+
import com.starrocks.sql.ast.FieldReference;
4950
import com.starrocks.sql.ast.LambdaFunctionExpr;
5051
import com.starrocks.sql.ast.QueryStatement;
5152

@@ -115,9 +116,16 @@ public Boolean visit(ParseNode expr) {
115116
return super.visit(expr);
116117
}
117118

119+
@Override
120+
public Boolean visitFieldReference(FieldReference node, Void context) {
121+
String colInfo = node.getTblName() == null ? "column" : "column of " + node.getTblName().toString();
122+
throw new SemanticException(colInfo + " must appear in the GROUP BY clause or be used in an aggregate function",
123+
node.getPos());
124+
}
125+
118126
@Override
119127
public Boolean visitExpression(Expr node, Void context) {
120-
throw new SemanticException(PARSER_ERROR_MSG.unsupportedExprWithInfo(node.toSql(), "GROUP BY"),
128+
throw new SemanticException(node.toSql() + " must appear in the GROUP BY clause or be used in an aggregate function",
121129
node.getPos());
122130
}
123131

fe/fe-core/src/main/java/com/starrocks/sql/analyzer/SelectAnalyzer.java

Lines changed: 63 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
package com.starrocks.sql.analyzer;
1616

17+
import com.google.common.base.Preconditions;
1718
import com.google.common.collect.ImmutableList;
1819
import com.google.common.collect.Lists;
1920
import com.google.common.collect.Sets;
@@ -86,10 +87,11 @@ public void analyze(AnalyzeState analyzeState,
8687
analyzeHaving(havingClause, analyzeState, sourceScope, outputScope, outputExpressions);
8788

8889
// Construct sourceAndOutputScope with sourceScope and outputScope
89-
Scope sourceAndOutputScope = computeAndAssignOrderScope(analyzeState, sourceScope, outputScope);
90+
Scope sourceAndOutputScope = computeAndAssignOrderScope(analyzeState, sourceScope, outputScope,
91+
selectList.isDistinct());
9092

9193
List<OrderByElement> orderByElements =
92-
analyzeOrderBy(sortClause, analyzeState, sourceAndOutputScope, outputExpressions);
94+
analyzeOrderBy(sortClause, analyzeState, sourceAndOutputScope, outputExpressions, selectList.isDistinct());
9395
List<Expr> orderByExpressions =
9496
orderByElements.stream().map(OrderByElement::getExpr).collect(Collectors.toList());
9597

@@ -183,7 +185,7 @@ public void analyze(AnalyzeState analyzeState,
183185
.collect(Collectors.toList());
184186

185187
Scope sourceScopeForOrder = new Scope(RelationId.anonymous(), new RelationFields(sourceForOrderFields));
186-
computeAndAssignOrderScope(analyzeState, sourceScopeForOrder, outputScope);
188+
computeAndAssignOrderScope(analyzeState, sourceScopeForOrder, outputScope, selectList.isDistinct());
187189
analyzeState.setOrderSourceExpressions(orderSourceExpressions);
188190
}
189191

@@ -317,7 +319,8 @@ private List<Expr> analyzeSelect(SelectList selectList, Relation fromRelation, b
317319

318320
private List<OrderByElement> analyzeOrderBy(List<OrderByElement> orderByElements, AnalyzeState analyzeState,
319321
Scope orderByScope,
320-
List<Expr> outputExpressions) {
322+
List<Expr> outputExpressions,
323+
boolean isDistinct) {
321324
if (orderByElements == null) {
322325
analyzeState.setOrderBy(Collections.emptyList());
323326
return Collections.emptyList();
@@ -333,20 +336,33 @@ private List<OrderByElement> analyzeOrderBy(List<OrderByElement> orderByElements
333336
if (ordinal < 1 || ordinal > outputExpressions.size()) {
334337
throw new SemanticException("ORDER BY position %s is not in select list", ordinal);
335338
}
339+
// index can ensure no ambiguous, we don't need to re-analyze this output expression
336340
expression = outputExpressions.get((int) ordinal - 1);
337-
}
338-
339-
if (expression instanceof FieldReference) {
340-
// If the expression of order by is a FieldReference, it means that the type of sql is
341+
} else if (expression instanceof FieldReference) {
342+
// If the expression of order by is a FieldReference, and it's not a distinct select,
343+
// it means that the type of sql is
341344
// "select * from t order by 1", then this FieldReference cannot be parsed in OrderByScope,
342345
// but should be parsed in sourceScope
343-
analyzeExpression(expression, analyzeState, orderByScope.getParent());
346+
if (isDistinct) {
347+
analyzeExpression(expression, analyzeState, orderByScope);
348+
} else {
349+
analyzeExpression(expression, analyzeState, orderByScope.getParent());
350+
}
344351
} else {
345352
ExpressionAnalyzer expressionAnalyzer = new ExpressionAnalyzer(session);
346353
expressionAnalyzer.analyzeWithoutUpdateState(expression, analyzeState, orderByScope);
347354
List<Expr> aggregations = Lists.newArrayList();
348355
expression.collectAll(e -> e.isAggregate(), aggregations);
349-
aggregations.forEach(e -> analyzeExpression(e, analyzeState, orderByScope.getParent()));
356+
if (isDistinct && !aggregations.isEmpty()) {
357+
throw new SemanticException("for SELECT DISTINCT, ORDER BY expressions must appear in select list",
358+
expression.getPos());
359+
}
360+
361+
if (!aggregations.isEmpty()) {
362+
// use parent scope to analyze agg func firstly
363+
Preconditions.checkState(orderByScope.getParent() != null, "parent scope not be set");
364+
aggregations.forEach(e -> analyzeExpression(e, analyzeState, orderByScope.getParent()));
365+
}
350366
analyzeExpression(expression, analyzeState, orderByScope);
351367
}
352368

@@ -661,23 +677,24 @@ public Expr visitSlot(SlotRef slotRef, Void context) {
661677
}
662678
}
663679

664-
private Scope computeAndAssignOrderScope(AnalyzeState analyzeState, Scope sourceScope, Scope outputScope) {
665-
// The Scope used by order by allows parsing of the same column,
666-
// such as 'select v1 as v, v1 as v from t0 order by v'
667-
// but normal parsing does not allow it. So add a de-duplication operation here.
680+
private Scope computeAndAssignOrderScope(AnalyzeState analyzeState, Scope sourceScope, Scope outputScope,
681+
boolean isDistinct) {
682+
683+
List<Field> allFields = Lists.newArrayList();
684+
// order by can only "see" fields from distinct output
685+
if (isDistinct) {
686+
allFields = removeDuplicateField(outputScope.getRelationFields().getAllFields());
687+
Scope orderScope = new Scope(outputScope.getRelationId(), new RelationFields(allFields));
688+
analyzeState.setOrderScope(orderScope);
689+
return orderScope;
690+
}
668691

669-
List<Field> allFields = new ArrayList<>();
670692
for (int i = 0; i < analyzeState.getOutputExprInOrderByScope().size(); ++i) {
671693
Field field = outputScope.getRelationFields()
672694
.getFieldByIndex(analyzeState.getOutputExprInOrderByScope().get(i));
673-
if (field.getName() != null && field.getOriginExpression() != null &&
674-
allFields.stream().anyMatch(f -> f.getOriginExpression() != null
675-
&& f.getName() != null && field.getName().equals(f.getName())
676-
&& field.getOriginExpression().equals(f.getOriginExpression()))) {
677-
continue;
678-
}
679695
allFields.add(field);
680696
}
697+
allFields = removeDuplicateField(allFields);
681698

682699
Scope orderScope = new Scope(outputScope.getRelationId(), new RelationFields(allFields));
683700

@@ -694,4 +711,29 @@ private Scope computeAndAssignOrderScope(AnalyzeState analyzeState, Scope source
694711
private void analyzeExpression(Expr expr, AnalyzeState analyzeState, Scope scope) {
695712
ExpressionAnalyzer.analyzeExpression(expr, analyzeState, scope, session);
696713
}
714+
715+
716+
// The Scope used by order by allows parsing of the same column,
717+
// such as 'select v1 as v, v1 as v from t0 order by v'
718+
// but normal parsing does not allow it. So add a de-duplication operation here.
719+
private List<Field> removeDuplicateField(List<Field> originalFields) {
720+
List<Field> allFields = Lists.newArrayList();
721+
for (Field field : originalFields) {
722+
if (session.getSessionVariable().isEnableStrictOrderBy()) {
723+
if (field.getName() != null && field.getOriginExpression() != null &&
724+
allFields.stream().anyMatch(f -> f.getOriginExpression() != null
725+
&& f.getName() != null && field.getName().equals(f.getName())
726+
&& field.getOriginExpression().equals(f.getOriginExpression()))) {
727+
continue;
728+
}
729+
} else {
730+
if (field.getName() != null &&
731+
allFields.stream().anyMatch(f -> f.getName() != null && field.getName().equals(f.getName()))) {
732+
continue;
733+
}
734+
}
735+
allFields.add(field);
736+
}
737+
return allFields;
738+
}
697739
}

fe/fe-core/src/main/java/com/starrocks/sql/ast/FieldReference.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,10 @@ public int getFieldIndex() {
4848
return fieldIndex;
4949
}
5050

51+
public TableName getTblName() {
52+
return tblName;
53+
}
54+
5155
@Override
5256
public boolean equals(Object o) {
5357
if (this == o) {

fe/fe-core/src/test/java/com/starrocks/sql/analyzer/AnalyzeAggregateTest.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -131,9 +131,11 @@ public void testDistinct() {
131131
analyzeSuccess("select distinct v1, v2 as v from t0 order by v");
132132
analyzeSuccess("select distinct abs(v1) as v from t0 order by v");
133133
analyzeFail("select distinct v1 from t0 order by v2",
134-
"must be an aggregate expression or appear in GROUP BY clause");
134+
"Column 'v2' cannot be resolved");
135135
analyzeFail("select distinct v1 as v from t0 order by v2",
136-
"must be an aggregate expression or appear in GROUP BY clause");
136+
"Column 'v2' cannot be resolved");
137+
analyzeFail("select * from t0 order by max(v2)",
138+
"column must appear in the GROUP BY clause or be used in an aggregate function.");
137139

138140
analyzeSuccess("select distinct v1 as v from t0 having v = 1");
139141
analyzeFail("select distinct v1 as v from t0 having v2 = 2",

fe/fe-core/src/test/java/com/starrocks/sql/parser/ParserTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -404,7 +404,7 @@ void testWrongVariableName() {
404404
assertContains(res, "{'pipeline_dop', 'pipeline_sink_dop', 'pipeline_profile_level'}");
405405

406406
res = VariableMgr.findSimilarVarNames("disable_joinreorder");
407-
assertContains(res, "{'disable_join_reorder', 'disable_colocate_join', 'enable_predicate_reorder'}");
407+
assertContains(res, "{'disable_join_reorder', 'disable_colocate_join'");
408408
}
409409

410410
@Test

0 commit comments

Comments
 (0)