Skip to content

Commit 576872a

Browse files
packy92mergify[bot]
authored andcommitted
[BugFix] fix wrong order by scope for distinct query (#37910)
Signed-off-by: packy92 <[email protected]> (cherry picked from commit fa72214) # Conflicts: # fe/fe-core/src/main/java/com/starrocks/qe/SessionVariable.java # fe/fe-core/src/main/java/com/starrocks/sql/analyzer/AggregationAnalyzer.java # fe/fe-core/src/test/java/com/starrocks/sql/parser/ParserTest.java # fe/fe-core/src/test/java/com/starrocks/sql/plan/OrderByTest.java
1 parent fce8784 commit 576872a

File tree

7 files changed

+433
-41
lines changed

7 files changed

+433
-41
lines changed

fe/fe-core/src/main/java/com/starrocks/qe/SessionVariable.java

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -340,6 +340,18 @@ public class SessionVariable implements Serializable, Writable, Cloneable {
340340
public static final String ENABLE_MATERIALIZED_VIEW_REWRITE = "enable_materialized_view_rewrite";
341341
public static final String ENABLE_MATERIALIZED_VIEW_UNION_REWRITE = "enable_materialized_view_union_rewrite";
342342

343+
<<<<<<< HEAD
344+
=======
345+
public static final String LARGE_DECIMAL_UNDERLYING_TYPE = "large_decimal_underlying_type";
346+
347+
public static final String ENABLE_ICEBERG_IDENTITY_COLUMN_OPTIMIZE = "enable_iceberg_identity_column_optimize";
348+
public static final String ENABLE_PIPELINE_LEVEL_SHUFFLE = "enable_pipeline_level_shuffle";
349+
350+
public static final String ENABLE_PLAN_SERIALIZE_CONCURRENTLY = "enable_plan_serialize_concurrently";
351+
352+
public static final String ENABLE_STRICT_ORDER_BY = "enable_strict_order_by";
353+
354+
>>>>>>> fa72214349 ([BugFix] fix wrong order by scope for distinct query (#37910))
343355
// Flag to control whether to proxy follower's query statement to leader/follower.
344356
public enum FollowerQueryForwardMode {
345357
DEFAULT, // proxy queries by the follower's replay progress (default)
@@ -1072,6 +1084,9 @@ public String getCboEqBaseType() {
10721084
@VarAttr(name = FOLLOWER_QUERY_FORWARD_MODE, flag = VariableMgr.INVISIBLE | VariableMgr.DISABLE_FORWARD_TO_LEADER)
10731085
private String followerForwardMode = "";
10741086

1087+
@VarAttr(name = ENABLE_STRICT_ORDER_BY)
1088+
private boolean enableStrictOrderBy = true;
1089+
10751090
public void setFollowerQueryForwardMode(String mode) {
10761091
this.followerForwardMode = mode;
10771092
}
@@ -1996,6 +2011,25 @@ public void setCrossJoinCostPenalty(long crossJoinCostPenalty) {
19962011
this.crossJoinCostPenalty = crossJoinCostPenalty;
19972012
}
19982013

2014+
<<<<<<< HEAD
2015+
=======
2016+
public int getSkewJoinRandRange() {
2017+
return skewJoinRandRange;
2018+
}
2019+
2020+
public void setSkewJoinRandRange(int skewJoinRandRange) {
2021+
this.skewJoinRandRange = skewJoinRandRange;
2022+
}
2023+
2024+
public boolean isEnableStrictOrderBy() {
2025+
return enableStrictOrderBy;
2026+
}
2027+
2028+
public void setEnableStrictOrderBy(boolean enableStrictOrderBy) {
2029+
this.enableStrictOrderBy = enableStrictOrderBy;
2030+
}
2031+
2032+
>>>>>>> fa72214349 ([BugFix] fix wrong order by scope for distinct query (#37910))
19992033
// Serialize to thrift object
20002034
// used for rest api
20012035
public TQueryOptions toThrift() {

fe/fe-core/src/main/java/com/starrocks/sql/analyzer/AggregationAnalyzer.java

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,11 @@
3434
import com.starrocks.qe.ConnectContext;
3535
import com.starrocks.qe.SqlModeHelper;
3636
import com.starrocks.sql.ast.AstVisitor;
37+
<<<<<<< HEAD
38+
=======
39+
import com.starrocks.sql.ast.DictionaryGetExpr;
40+
import com.starrocks.sql.ast.FieldReference;
41+
>>>>>>> fa72214349 ([BugFix] fix wrong order by scope for distinct query (#37910))
3742
import com.starrocks.sql.ast.LambdaFunctionExpr;
3843
import com.starrocks.sql.ast.QueryStatement;
3944

@@ -103,9 +108,21 @@ public Boolean visit(ParseNode expr) {
103108
return super.visit(expr);
104109
}
105110

111+
@Override
112+
public Boolean visitFieldReference(FieldReference node, Void context) {
113+
String colInfo = node.getTblName() == null ? "column" : "column of " + node.getTblName().toString();
114+
throw new SemanticException(colInfo + " must appear in the GROUP BY clause or be used in an aggregate function",
115+
node.getPos());
116+
}
117+
106118
@Override
107119
public Boolean visitExpression(Expr node, Void context) {
120+
<<<<<<< HEAD
108121
throw new SemanticException("%s is not support in GROUP BY clause", node.toSql());
122+
=======
123+
throw new SemanticException(node.toSql() + " must appear in the GROUP BY clause or be used in an aggregate function",
124+
node.getPos());
125+
>>>>>>> fa72214349 ([BugFix] fix wrong order by scope for distinct query (#37910))
109126
}
110127

111128
private boolean isGroupingKey(Expr node) {

fe/fe-core/src/main/java/com/starrocks/sql/analyzer/SelectAnalyzer.java

Lines changed: 63 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
// This file is licensed under the Elastic License 2.0. Copyright 2021-present, StarRocks Inc.
22
package com.starrocks.sql.analyzer;
33

4+
import com.google.common.base.Preconditions;
45
import com.google.common.collect.ImmutableList;
56
import com.google.common.collect.Lists;
67
import com.google.common.collect.Sets;
@@ -72,10 +73,11 @@ public void analyze(AnalyzeState analyzeState,
7273
analyzeHaving(havingClause, analyzeState, sourceScope, outputScope, outputExpressions);
7374

7475
// Construct sourceAndOutputScope with sourceScope and outputScope
75-
Scope sourceAndOutputScope = computeAndAssignOrderScope(analyzeState, sourceScope, outputScope);
76+
Scope sourceAndOutputScope = computeAndAssignOrderScope(analyzeState, sourceScope, outputScope,
77+
selectList.isDistinct());
7678

7779
List<OrderByElement> orderByElements =
78-
analyzeOrderBy(sortClause, analyzeState, sourceAndOutputScope, outputExpressions);
80+
analyzeOrderBy(sortClause, analyzeState, sourceAndOutputScope, outputExpressions, selectList.isDistinct());
7981
List<Expr> orderByExpressions =
8082
orderByElements.stream().map(OrderByElement::getExpr).collect(Collectors.toList());
8183

@@ -165,7 +167,7 @@ public void analyze(AnalyzeState analyzeState,
165167
.collect(Collectors.toList());
166168

167169
Scope sourceScopeForOrder = new Scope(RelationId.anonymous(), new RelationFields(sourceForOrderFields));
168-
computeAndAssignOrderScope(analyzeState, sourceScopeForOrder, outputScope);
170+
computeAndAssignOrderScope(analyzeState, sourceScopeForOrder, outputScope, selectList.isDistinct());
169171
analyzeState.setOrderSourceExpressions(orderSourceExpressions);
170172
}
171173

@@ -300,7 +302,8 @@ private List<Expr> analyzeSelect(SelectList selectList, Relation fromRelation, b
300302

301303
private List<OrderByElement> analyzeOrderBy(List<OrderByElement> orderByElements, AnalyzeState analyzeState,
302304
Scope orderByScope,
303-
List<Expr> outputExpressions) {
305+
List<Expr> outputExpressions,
306+
boolean isDistinct) {
304307
if (orderByElements == null) {
305308
analyzeState.setOrderBy(Collections.emptyList());
306309
return Collections.emptyList();
@@ -316,20 +319,33 @@ private List<OrderByElement> analyzeOrderBy(List<OrderByElement> orderByElements
316319
if (ordinal < 1 || ordinal > outputExpressions.size()) {
317320
throw new SemanticException("ORDER BY position %s is not in select list", ordinal);
318321
}
322+
// index can ensure no ambiguous, we don't need to re-analyze this output expression
319323
expression = outputExpressions.get((int) ordinal - 1);
320-
}
321-
322-
if (expression instanceof FieldReference) {
323-
// If the expression of order by is a FieldReference, it means that the type of sql is
324+
} else if (expression instanceof FieldReference) {
325+
// If the expression of order by is a FieldReference, and it's not a distinct select,
326+
// it means that the type of sql is
324327
// "select * from t order by 1", then this FieldReference cannot be parsed in OrderByScope,
325328
// but should be parsed in sourceScope
326-
analyzeExpression(expression, analyzeState, orderByScope.getParent());
329+
if (isDistinct) {
330+
analyzeExpression(expression, analyzeState, orderByScope);
331+
} else {
332+
analyzeExpression(expression, analyzeState, orderByScope.getParent());
333+
}
327334
} else {
328335
ExpressionAnalyzer expressionAnalyzer = new ExpressionAnalyzer(session);
329336
expressionAnalyzer.analyzeWithoutUpdateState(expression, analyzeState, orderByScope);
330337
List<Expr> aggregations = Lists.newArrayList();
331338
expression.collectAll(e -> e.isAggregate(), aggregations);
332-
aggregations.forEach(e -> analyzeExpression(e, analyzeState, orderByScope.getParent()));
339+
if (isDistinct && !aggregations.isEmpty()) {
340+
throw new SemanticException("for SELECT DISTINCT, ORDER BY expressions must appear in select list",
341+
expression.getPos());
342+
}
343+
344+
if (!aggregations.isEmpty()) {
345+
// use parent scope to analyze agg func firstly
346+
Preconditions.checkState(orderByScope.getParent() != null, "parent scope not be set");
347+
aggregations.forEach(e -> analyzeExpression(e, analyzeState, orderByScope.getParent()));
348+
}
333349
analyzeExpression(expression, analyzeState, orderByScope);
334350
}
335351

@@ -643,23 +659,24 @@ public Expr visitSlot(SlotRef slotRef, Void context) {
643659
}
644660
}
645661

646-
private Scope computeAndAssignOrderScope(AnalyzeState analyzeState, Scope sourceScope, Scope outputScope) {
647-
// The Scope used by order by allows parsing of the same column,
648-
// such as 'select v1 as v, v1 as v from t0 order by v'
649-
// but normal parsing does not allow it. So add a de-duplication operation here.
662+
private Scope computeAndAssignOrderScope(AnalyzeState analyzeState, Scope sourceScope, Scope outputScope,
663+
boolean isDistinct) {
664+
665+
List<Field> allFields = Lists.newArrayList();
666+
// order by can only "see" fields from distinct output
667+
if (isDistinct) {
668+
allFields = removeDuplicateField(outputScope.getRelationFields().getAllFields());
669+
Scope orderScope = new Scope(outputScope.getRelationId(), new RelationFields(allFields));
670+
analyzeState.setOrderScope(orderScope);
671+
return orderScope;
672+
}
650673

651-
List<Field> allFields = new ArrayList<>();
652674
for (int i = 0; i < analyzeState.getOutputExprInOrderByScope().size(); ++i) {
653675
Field field = outputScope.getRelationFields()
654676
.getFieldByIndex(analyzeState.getOutputExprInOrderByScope().get(i));
655-
if (field.getName() != null && field.getOriginExpression() != null &&
656-
allFields.stream().anyMatch(f -> f.getOriginExpression() != null
657-
&& f.getName() != null && field.getName().equals(f.getName())
658-
&& field.getOriginExpression().equals(f.getOriginExpression()))) {
659-
continue;
660-
}
661677
allFields.add(field);
662678
}
679+
allFields = removeDuplicateField(allFields);
663680

664681
Scope orderScope = new Scope(outputScope.getRelationId(), new RelationFields(allFields));
665682

@@ -676,4 +693,29 @@ private Scope computeAndAssignOrderScope(AnalyzeState analyzeState, Scope source
676693
private void analyzeExpression(Expr expr, AnalyzeState analyzeState, Scope scope) {
677694
ExpressionAnalyzer.analyzeExpression(expr, analyzeState, scope, session);
678695
}
696+
697+
698+
// The Scope used by order by allows parsing of the same column,
699+
// such as 'select v1 as v, v1 as v from t0 order by v'
700+
// but normal parsing does not allow it. So add a de-duplication operation here.
701+
private List<Field> removeDuplicateField(List<Field> originalFields) {
702+
List<Field> allFields = Lists.newArrayList();
703+
for (Field field : originalFields) {
704+
if (session.getSessionVariable().isEnableStrictOrderBy()) {
705+
if (field.getName() != null && field.getOriginExpression() != null &&
706+
allFields.stream().anyMatch(f -> f.getOriginExpression() != null
707+
&& f.getName() != null && field.getName().equals(f.getName())
708+
&& field.getOriginExpression().equals(f.getOriginExpression()))) {
709+
continue;
710+
}
711+
} else {
712+
if (field.getName() != null &&
713+
allFields.stream().anyMatch(f -> f.getName() != null && field.getName().equals(f.getName()))) {
714+
continue;
715+
}
716+
}
717+
allFields.add(field);
718+
}
719+
return allFields;
720+
}
679721
}

fe/fe-core/src/main/java/com/starrocks/sql/ast/FieldReference.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,10 @@ public int getFieldIndex() {
3535
return fieldIndex;
3636
}
3737

38+
public TableName getTblName() {
39+
return tblName;
40+
}
41+
3842
@Override
3943
public boolean equals(Object o) {
4044
if (this == o) {

fe/fe-core/src/test/java/com/starrocks/sql/analyzer/AnalyzeAggregateTest.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -109,9 +109,11 @@ public void testDistinct() {
109109
analyzeSuccess("select distinct v1, v2 as v from t0 order by v");
110110
analyzeSuccess("select distinct abs(v1) as v from t0 order by v");
111111
analyzeFail("select distinct v1 from t0 order by v2",
112-
"must be an aggregate expression or appear in GROUP BY clause");
112+
"Column 'v2' cannot be resolved");
113113
analyzeFail("select distinct v1 as v from t0 order by v2",
114-
"must be an aggregate expression or appear in GROUP BY clause");
114+
"Column 'v2' cannot be resolved");
115+
analyzeFail("select * from t0 order by max(v2)",
116+
"column must appear in the GROUP BY clause or be used in an aggregate function.");
115117

116118
analyzeSuccess("select distinct v1 as v from t0 having v = 1");
117119
analyzeFail("select distinct v1 as v from t0 having v2 = 2",

fe/fe-core/src/test/java/com/starrocks/sql/parser/ParserTest.java

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,92 @@ void testSettingSqlMode() throws InterruptedException {
180180
exprs[1] instanceof FunctionCallExpr);
181181
}
182182

183+
<<<<<<< HEAD
184+
=======
185+
@ParameterizedTest
186+
@MethodSource("keyWordSqls")
187+
void testNodeReservedWords_3(String sql) {
188+
SessionVariable sessionVariable = new SessionVariable();
189+
try {
190+
SqlParser.parse(sql, sessionVariable).get(0);
191+
} catch (Exception e) {
192+
fail("sql should success. errMsg: " + e.getMessage());
193+
}
194+
}
195+
196+
@ParameterizedTest
197+
@MethodSource("reservedWordSqls")
198+
void testReservedWords(String sql) {
199+
SessionVariable sessionVariable = new SessionVariable();
200+
try {
201+
SqlParser.parse(sql, sessionVariable).get(0);
202+
fail("Not quoting reserved words. sql should fail.");
203+
} catch (Exception e) {
204+
Assert.assertTrue(e instanceof ParsingException);
205+
}
206+
}
207+
208+
@ParameterizedTest
209+
@MethodSource("multipleStatements")
210+
void testMultipleStatements(String sql, boolean isValid) {
211+
SessionVariable sessionVariable = new SessionVariable();
212+
try {
213+
SqlParser.parse(sql, sessionVariable).get(0);
214+
if (!isValid) {
215+
fail("sql should fail.");
216+
}
217+
} catch (Exception e) {
218+
if (isValid) {
219+
fail("sql should success. errMsg: " + e.getMessage());
220+
}
221+
}
222+
}
223+
224+
@ParameterizedTest
225+
@MethodSource("setQuantifierInAggFunc")
226+
void testSetQuantifierInAggFunc(String sql, boolean isValid) {
227+
SessionVariable sessionVariable = new SessionVariable();
228+
try {
229+
SqlParser.parse(sql, sessionVariable).get(0);
230+
if (!isValid) {
231+
fail("sql should fail.");
232+
}
233+
} catch (Exception e) {
234+
if (isValid) {
235+
fail("sql should success. errMsg: " + e.getMessage());
236+
}
237+
}
238+
}
239+
240+
@ParameterizedTest
241+
@MethodSource("unexpectedTokenSqls")
242+
void testUnexpectedTokenSqls(String sql, String expecting) {
243+
SessionVariable sessionVariable = new SessionVariable();
244+
try {
245+
SqlParser.parse(sql, sessionVariable).get(0);
246+
fail("sql should fail.");
247+
} catch (Exception e) {
248+
System.out.println(e.getMessage());
249+
assertContains(e.getMessage(), expecting);
250+
}
251+
}
252+
253+
@Test
254+
void testWrongVariableName() {
255+
String res = VariableMgr.findSimilarVarNames("disable_coloce_join");
256+
assertContains(res, "{'disable_colocate_join', 'disable_join_reorder', 'disable_function_fold_constants'}");
257+
258+
res = VariableMgr.findSimilarVarNames("SQL_AUTO_NULL");
259+
assertContains(res, "{'SQL_AUTO_IS_NULL', 'sql_dialect', 'sql_mode_v2'}");
260+
261+
res = VariableMgr.findSimilarVarNames("pipeline");
262+
assertContains(res, "{'pipeline_dop', 'pipeline_sink_dop', 'pipeline_profile_level'}");
263+
264+
res = VariableMgr.findSimilarVarNames("disable_joinreorder");
265+
assertContains(res, "{'disable_join_reorder', 'disable_colocate_join'");
266+
}
267+
268+
>>>>>>> fa72214349 ([BugFix] fix wrong order by scope for distinct query (#37910))
183269
@Test
184270
void testModOperator() {
185271
String sql = "select 100 MOD 2";

0 commit comments

Comments
 (0)