Skip to content

Commit bfac842

Browse files
packy92mergify[bot]
authored andcommitted
[BugFix] fix wrong order by scope for distinct query (#37910)
Signed-off-by: packy92 <[email protected]> (cherry picked from commit fa72214) # Conflicts: # fe/fe-core/src/main/java/com/starrocks/qe/SessionVariable.java # fe/fe-core/src/main/java/com/starrocks/sql/analyzer/AggregationAnalyzer.java # fe/fe-core/src/test/java/com/starrocks/sql/parser/ParserTest.java # fe/fe-core/src/test/java/com/starrocks/sql/plan/OrderByTest.java
1 parent 14c50ca commit bfac842

File tree

7 files changed

+360
-44
lines changed

7 files changed

+360
-44
lines changed

fe/fe-core/src/main/java/com/starrocks/qe/SessionVariable.java

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -375,6 +375,16 @@ public class SessionVariable implements Serializable, Writable, Cloneable {
375375

376376
public static final String LARGE_DECIMAL_UNDERLYING_TYPE = "large_decimal_underlying_type";
377377

378+
<<<<<<< HEAD
379+
=======
380+
public static final String ENABLE_ICEBERG_IDENTITY_COLUMN_OPTIMIZE = "enable_iceberg_identity_column_optimize";
381+
public static final String ENABLE_PIPELINE_LEVEL_SHUFFLE = "enable_pipeline_level_shuffle";
382+
383+
public static final String ENABLE_PLAN_SERIALIZE_CONCURRENTLY = "enable_plan_serialize_concurrently";
384+
385+
public static final String ENABLE_STRICT_ORDER_BY = "enable_strict_order_by";
386+
387+
>>>>>>> fa72214349 ([BugFix] fix wrong order by scope for distinct query (#37910))
378388
// Flag to control whether to proxy follower's query statement to leader/follower.
379389
public enum FollowerQueryForwardMode {
380390
DEFAULT, // proxy queries by the follower's replay progress (default)
@@ -1270,6 +1280,9 @@ public String getCboEqBaseType() {
12701280
@VarAttr(name = FOLLOWER_QUERY_FORWARD_MODE, flag = VariableMgr.INVISIBLE | VariableMgr.DISABLE_FORWARD_TO_LEADER)
12711281
private String followerForwardMode = "";
12721282

1283+
@VarAttr(name = ENABLE_STRICT_ORDER_BY)
1284+
private boolean enableStrictOrderBy = true;
1285+
12731286
public void setFollowerQueryForwardMode(String mode) {
12741287
this.followerForwardMode = mode;
12751288
}
@@ -2398,6 +2411,25 @@ public void setCrossJoinCostPenalty(long crossJoinCostPenalty) {
23982411
this.crossJoinCostPenalty = crossJoinCostPenalty;
23992412
}
24002413

2414+
<<<<<<< HEAD
2415+
=======
2416+
public int getSkewJoinRandRange() {
2417+
return skewJoinRandRange;
2418+
}
2419+
2420+
public void setSkewJoinRandRange(int skewJoinRandRange) {
2421+
this.skewJoinRandRange = skewJoinRandRange;
2422+
}
2423+
2424+
public boolean isEnableStrictOrderBy() {
2425+
return enableStrictOrderBy;
2426+
}
2427+
2428+
public void setEnableStrictOrderBy(boolean enableStrictOrderBy) {
2429+
this.enableStrictOrderBy = enableStrictOrderBy;
2430+
}
2431+
2432+
>>>>>>> fa72214349 ([BugFix] fix wrong order by scope for distinct query (#37910))
24012433
// Serialize to thrift object
24022434
// used for rest api
24032435
public TQueryOptions toThrift() {

fe/fe-core/src/main/java/com/starrocks/sql/analyzer/AggregationAnalyzer.java

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,11 @@
4747
import com.starrocks.qe.SqlModeHelper;
4848
import com.starrocks.sql.ast.ArrayExpr;
4949
import com.starrocks.sql.ast.AstVisitor;
50+
<<<<<<< HEAD
51+
=======
52+
import com.starrocks.sql.ast.DictionaryGetExpr;
53+
import com.starrocks.sql.ast.FieldReference;
54+
>>>>>>> fa72214349 ([BugFix] fix wrong order by scope for distinct query (#37910))
5055
import com.starrocks.sql.ast.LambdaFunctionExpr;
5156
import com.starrocks.sql.ast.QueryStatement;
5257

@@ -116,9 +121,21 @@ public Boolean visit(ParseNode expr) {
116121
return super.visit(expr);
117122
}
118123

124+
@Override
125+
public Boolean visitFieldReference(FieldReference node, Void context) {
126+
String colInfo = node.getTblName() == null ? "column" : "column of " + node.getTblName().toString();
127+
throw new SemanticException(colInfo + " must appear in the GROUP BY clause or be used in an aggregate function",
128+
node.getPos());
129+
}
130+
119131
@Override
120132
public Boolean visitExpression(Expr node, Void context) {
133+
<<<<<<< HEAD
121134
throw new SemanticException("%s is not support in GROUP BY clause", node.toSql());
135+
=======
136+
throw new SemanticException(node.toSql() + " must appear in the GROUP BY clause or be used in an aggregate function",
137+
node.getPos());
138+
>>>>>>> fa72214349 ([BugFix] fix wrong order by scope for distinct query (#37910))
122139
}
123140

124141
private boolean isGroupingKey(Expr node) {

fe/fe-core/src/main/java/com/starrocks/sql/analyzer/SelectAnalyzer.java

Lines changed: 63 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
package com.starrocks.sql.analyzer;
1616

17+
import com.google.common.base.Preconditions;
1718
import com.google.common.collect.ImmutableList;
1819
import com.google.common.collect.Lists;
1920
import com.google.common.collect.Sets;
@@ -85,10 +86,11 @@ public void analyze(AnalyzeState analyzeState,
8586
analyzeHaving(havingClause, analyzeState, sourceScope, outputScope, outputExpressions);
8687

8788
// Construct sourceAndOutputScope with sourceScope and outputScope
88-
Scope sourceAndOutputScope = computeAndAssignOrderScope(analyzeState, sourceScope, outputScope);
89+
Scope sourceAndOutputScope = computeAndAssignOrderScope(analyzeState, sourceScope, outputScope,
90+
selectList.isDistinct());
8991

9092
List<OrderByElement> orderByElements =
91-
analyzeOrderBy(sortClause, analyzeState, sourceAndOutputScope, outputExpressions);
93+
analyzeOrderBy(sortClause, analyzeState, sourceAndOutputScope, outputExpressions, selectList.isDistinct());
9294
List<Expr> orderByExpressions =
9395
orderByElements.stream().map(OrderByElement::getExpr).collect(Collectors.toList());
9496

@@ -182,7 +184,7 @@ public void analyze(AnalyzeState analyzeState,
182184
.collect(Collectors.toList());
183185

184186
Scope sourceScopeForOrder = new Scope(RelationId.anonymous(), new RelationFields(sourceForOrderFields));
185-
computeAndAssignOrderScope(analyzeState, sourceScopeForOrder, outputScope);
187+
computeAndAssignOrderScope(analyzeState, sourceScopeForOrder, outputScope, selectList.isDistinct());
186188
analyzeState.setOrderSourceExpressions(orderSourceExpressions);
187189
}
188190

@@ -316,7 +318,8 @@ private List<Expr> analyzeSelect(SelectList selectList, Relation fromRelation, b
316318

317319
private List<OrderByElement> analyzeOrderBy(List<OrderByElement> orderByElements, AnalyzeState analyzeState,
318320
Scope orderByScope,
319-
List<Expr> outputExpressions) {
321+
List<Expr> outputExpressions,
322+
boolean isDistinct) {
320323
if (orderByElements == null) {
321324
analyzeState.setOrderBy(Collections.emptyList());
322325
return Collections.emptyList();
@@ -332,20 +335,33 @@ private List<OrderByElement> analyzeOrderBy(List<OrderByElement> orderByElements
332335
if (ordinal < 1 || ordinal > outputExpressions.size()) {
333336
throw new SemanticException("ORDER BY position %s is not in select list", ordinal);
334337
}
338+
// index can ensure no ambiguous, we don't need to re-analyze this output expression
335339
expression = outputExpressions.get((int) ordinal - 1);
336-
}
337-
338-
if (expression instanceof FieldReference) {
339-
// If the expression of order by is a FieldReference, it means that the type of sql is
340+
} else if (expression instanceof FieldReference) {
341+
// If the expression of order by is a FieldReference, and it's not a distinct select,
342+
// it means that the type of sql is
340343
// "select * from t order by 1", then this FieldReference cannot be parsed in OrderByScope,
341344
// but should be parsed in sourceScope
342-
analyzeExpression(expression, analyzeState, orderByScope.getParent());
345+
if (isDistinct) {
346+
analyzeExpression(expression, analyzeState, orderByScope);
347+
} else {
348+
analyzeExpression(expression, analyzeState, orderByScope.getParent());
349+
}
343350
} else {
344351
ExpressionAnalyzer expressionAnalyzer = new ExpressionAnalyzer(session);
345352
expressionAnalyzer.analyzeWithoutUpdateState(expression, analyzeState, orderByScope);
346353
List<Expr> aggregations = Lists.newArrayList();
347354
expression.collectAll(e -> e.isAggregate(), aggregations);
348-
aggregations.forEach(e -> analyzeExpression(e, analyzeState, orderByScope.getParent()));
355+
if (isDistinct && !aggregations.isEmpty()) {
356+
throw new SemanticException("for SELECT DISTINCT, ORDER BY expressions must appear in select list",
357+
expression.getPos());
358+
}
359+
360+
if (!aggregations.isEmpty()) {
361+
// use parent scope to analyze agg func firstly
362+
Preconditions.checkState(orderByScope.getParent() != null, "parent scope not be set");
363+
aggregations.forEach(e -> analyzeExpression(e, analyzeState, orderByScope.getParent()));
364+
}
349365
analyzeExpression(expression, analyzeState, orderByScope);
350366
}
351367

@@ -659,23 +675,24 @@ public Expr visitSlot(SlotRef slotRef, Void context) {
659675
}
660676
}
661677

662-
private Scope computeAndAssignOrderScope(AnalyzeState analyzeState, Scope sourceScope, Scope outputScope) {
663-
// The Scope used by order by allows parsing of the same column,
664-
// such as 'select v1 as v, v1 as v from t0 order by v'
665-
// but normal parsing does not allow it. So add a de-duplication operation here.
678+
private Scope computeAndAssignOrderScope(AnalyzeState analyzeState, Scope sourceScope, Scope outputScope,
679+
boolean isDistinct) {
680+
681+
List<Field> allFields = Lists.newArrayList();
682+
// order by can only "see" fields from distinct output
683+
if (isDistinct) {
684+
allFields = removeDuplicateField(outputScope.getRelationFields().getAllFields());
685+
Scope orderScope = new Scope(outputScope.getRelationId(), new RelationFields(allFields));
686+
analyzeState.setOrderScope(orderScope);
687+
return orderScope;
688+
}
666689

667-
List<Field> allFields = new ArrayList<>();
668690
for (int i = 0; i < analyzeState.getOutputExprInOrderByScope().size(); ++i) {
669691
Field field = outputScope.getRelationFields()
670692
.getFieldByIndex(analyzeState.getOutputExprInOrderByScope().get(i));
671-
if (field.getName() != null && field.getOriginExpression() != null &&
672-
allFields.stream().anyMatch(f -> f.getOriginExpression() != null
673-
&& f.getName() != null && field.getName().equals(f.getName())
674-
&& field.getOriginExpression().equals(f.getOriginExpression()))) {
675-
continue;
676-
}
677693
allFields.add(field);
678694
}
695+
allFields = removeDuplicateField(allFields);
679696

680697
Scope orderScope = new Scope(outputScope.getRelationId(), new RelationFields(allFields));
681698

@@ -692,4 +709,29 @@ private Scope computeAndAssignOrderScope(AnalyzeState analyzeState, Scope source
692709
private void analyzeExpression(Expr expr, AnalyzeState analyzeState, Scope scope) {
693710
ExpressionAnalyzer.analyzeExpression(expr, analyzeState, scope, session);
694711
}
712+
713+
714+
// The Scope used by order by allows parsing of the same column,
715+
// such as 'select v1 as v, v1 as v from t0 order by v'
716+
// but normal parsing does not allow it. So add a de-duplication operation here.
717+
private List<Field> removeDuplicateField(List<Field> originalFields) {
718+
List<Field> allFields = Lists.newArrayList();
719+
for (Field field : originalFields) {
720+
if (session.getSessionVariable().isEnableStrictOrderBy()) {
721+
if (field.getName() != null && field.getOriginExpression() != null &&
722+
allFields.stream().anyMatch(f -> f.getOriginExpression() != null
723+
&& f.getName() != null && field.getName().equals(f.getName())
724+
&& field.getOriginExpression().equals(f.getOriginExpression()))) {
725+
continue;
726+
}
727+
} else {
728+
if (field.getName() != null &&
729+
allFields.stream().anyMatch(f -> f.getName() != null && field.getName().equals(f.getName()))) {
730+
continue;
731+
}
732+
}
733+
allFields.add(field);
734+
}
735+
return allFields;
736+
}
695737
}

fe/fe-core/src/main/java/com/starrocks/sql/ast/FieldReference.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,10 @@ public int getFieldIndex() {
4848
return fieldIndex;
4949
}
5050

51+
public TableName getTblName() {
52+
return tblName;
53+
}
54+
5155
@Override
5256
public boolean equals(Object o) {
5357
if (this == o) {

fe/fe-core/src/test/java/com/starrocks/sql/analyzer/AnalyzeAggregateTest.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -122,9 +122,11 @@ public void testDistinct() {
122122
analyzeSuccess("select distinct v1, v2 as v from t0 order by v");
123123
analyzeSuccess("select distinct abs(v1) as v from t0 order by v");
124124
analyzeFail("select distinct v1 from t0 order by v2",
125-
"must be an aggregate expression or appear in GROUP BY clause");
125+
"Column 'v2' cannot be resolved");
126126
analyzeFail("select distinct v1 as v from t0 order by v2",
127-
"must be an aggregate expression or appear in GROUP BY clause");
127+
"Column 'v2' cannot be resolved");
128+
analyzeFail("select * from t0 order by max(v2)",
129+
"column must appear in the GROUP BY clause or be used in an aggregate function.");
128130

129131
analyzeSuccess("select distinct v1 as v from t0 having v = 1");
130132
analyzeFail("select distinct v1 as v from t0 having v2 = 2",

fe/fe-core/src/test/java/com/starrocks/sql/parser/ParserTest.java

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -273,6 +273,126 @@ void testSettingSqlMode() throws InterruptedException {
273273
exprs[1] instanceof FunctionCallExpr);
274274
}
275275

276+
<<<<<<< HEAD
277+
=======
278+
@ParameterizedTest
279+
@MethodSource("keyWordSqls")
280+
void testNodeReservedWords_3(String sql) {
281+
SessionVariable sessionVariable = new SessionVariable();
282+
try {
283+
SqlParser.parse(sql, sessionVariable).get(0);
284+
} catch (Exception e) {
285+
fail("sql should success. errMsg: " + e.getMessage());
286+
}
287+
}
288+
289+
@ParameterizedTest
290+
@MethodSource("reservedWordSqls")
291+
void testReservedWords(String sql) {
292+
SessionVariable sessionVariable = new SessionVariable();
293+
try {
294+
SqlParser.parse(sql, sessionVariable).get(0);
295+
fail("Not quoting reserved words. sql should fail.");
296+
} catch (Exception e) {
297+
Assert.assertTrue(e instanceof ParsingException);
298+
}
299+
}
300+
301+
@ParameterizedTest
302+
@MethodSource("multipleStatements")
303+
void testMultipleStatements(String sql, boolean isValid) {
304+
SessionVariable sessionVariable = new SessionVariable();
305+
try {
306+
SqlParser.parse(sql, sessionVariable).get(0);
307+
if (!isValid) {
308+
fail("sql should fail.");
309+
}
310+
} catch (Exception e) {
311+
if (isValid) {
312+
fail("sql should success. errMsg: " + e.getMessage());
313+
}
314+
}
315+
}
316+
317+
@ParameterizedTest
318+
@MethodSource("setQuantifierInAggFunc")
319+
void testSetQuantifierInAggFunc(String sql, boolean isValid) {
320+
SessionVariable sessionVariable = new SessionVariable();
321+
try {
322+
SqlParser.parse(sql, sessionVariable).get(0);
323+
if (!isValid) {
324+
fail("sql should fail.");
325+
}
326+
} catch (Exception e) {
327+
if (isValid) {
328+
fail("sql should success. errMsg: " + e.getMessage());
329+
}
330+
}
331+
}
332+
333+
@ParameterizedTest
334+
@MethodSource("unexpectedTokenSqls")
335+
void testUnexpectedTokenSqls(String sql, String expecting) {
336+
SessionVariable sessionVariable = new SessionVariable();
337+
try {
338+
SqlParser.parse(sql, sessionVariable).get(0);
339+
fail("sql should fail.");
340+
} catch (Exception e) {
341+
System.out.println(e.getMessage());
342+
assertContains(e.getMessage(), expecting);
343+
}
344+
}
345+
346+
@Test
347+
void testWrongVariableName() {
348+
String res = VariableMgr.findSimilarVarNames("disable_coloce_join");
349+
assertContains(res, "{'disable_colocate_join', 'disable_join_reorder', 'disable_function_fold_constants'}");
350+
351+
res = VariableMgr.findSimilarVarNames("SQL_AUTO_NULL");
352+
assertContains(res, "{'SQL_AUTO_IS_NULL', 'sql_dialect', 'sql_mode_v2'}");
353+
354+
res = VariableMgr.findSimilarVarNames("pipeline");
355+
assertContains(res, "{'pipeline_dop', 'pipeline_sink_dop', 'pipeline_profile_level'}");
356+
357+
res = VariableMgr.findSimilarVarNames("disable_joinreorder");
358+
assertContains(res, "{'disable_join_reorder', 'disable_colocate_join'");
359+
}
360+
361+
@Test
362+
void testModOperator() {
363+
String sql = "select 100 MOD 2";
364+
List<StatementBase> stmts = SqlParser.parse(sql, new SessionVariable());
365+
String newSql = AstToSQLBuilder.toSQL(stmts.get(0));
366+
assertEquals("SELECT 100 % 2", newSql);
367+
}
368+
369+
private static Stream<Arguments> keyWordSqls() {
370+
List<String> sqls = Lists.newArrayList();
371+
sqls.add("select current_role()");
372+
sqls.add("select current_role");
373+
sqls.add("SHOW ALL AUTHENTICATION ");
374+
sqls.add("CANCEL BACKUP from tbl");
375+
sqls.add("select current_role() from tbl");
376+
sqls.add("grant all privileges on DATABASE db1 to test");
377+
sqls.add("revoke export on DATABASE db1 from test");
378+
sqls.add("ALTER SYSTEM MODIFY BACKEND HOST '1' to '1'");
379+
sqls.add("SHOW COMPUTE NODES");
380+
sqls.add("trace times select 1");
381+
sqls.add("select anti from t1 left anti join t2 on true");
382+
sqls.add("select anti, semi from t1 left semi join t2 on true");
383+
sqls.add("select * from tbl1 MINUS select * from tbl2");
384+
return sqls.stream().map(e -> Arguments.of(e));
385+
}
386+
387+
388+
private static Stream<Arguments> reservedWordSqls() {
389+
List<String> sqls = Lists.newArrayList();
390+
sqls.add("select * from current_role ");
391+
sqls.add("select * from full full join anti anti on anti.col join t1 on true");
392+
return sqls.stream().map(e -> Arguments.of(e));
393+
}
394+
395+
>>>>>>> fa72214349 ([BugFix] fix wrong order by scope for distinct query (#37910))
276396
private static Stream<Arguments> multipleStatements() {
277397
List<Pair<String, Boolean>> sqls = Lists.newArrayList();
278398
sqls.add(Pair.create("select 1;;;;;;select 2", true));

0 commit comments

Comments
 (0)