[BugFix] fix wrong order by scope for distinct query (#37910)

packy92 · mergify[bot] · commit 576872ae1eb0 · 2023-12-28T06:56:40.000Z
Signed-off-by: packy92 <wangchao@starrocks.com> (cherry picked from commit fa72214) # Conflicts: # fe/fe-core/src/main/java/com/starrocks/qe/SessionVariable.java # fe/fe-core/src/main/java/com/starrocks/sql/analyzer/AggregationAnalyzer.java # fe/fe-core/src/test/java/com/starrocks/sql/parser/ParserTest.java # fe/fe-core/src/test/java/com/starrocks/sql/plan/OrderByTest.java
diff --git a/fe/fe-core/src/main/java/com/starrocks/qe/SessionVariable.java b/fe/fe-core/src/main/java/com/starrocks/qe/SessionVariable.java
@@ -340,6 +340,18 @@ public class SessionVariable implements Serializable, Writable, Cloneable {
     public static final String ENABLE_MATERIALIZED_VIEW_REWRITE = "enable_materialized_view_rewrite";
     public static final String ENABLE_MATERIALIZED_VIEW_UNION_REWRITE = "enable_materialized_view_union_rewrite";
 
+<<<<<<< HEAD
+=======
+    public static final String LARGE_DECIMAL_UNDERLYING_TYPE = "large_decimal_underlying_type";
+
+    public static final String ENABLE_ICEBERG_IDENTITY_COLUMN_OPTIMIZE = "enable_iceberg_identity_column_optimize";
+    public static final String ENABLE_PIPELINE_LEVEL_SHUFFLE = "enable_pipeline_level_shuffle";
+
+    public static final String ENABLE_PLAN_SERIALIZE_CONCURRENTLY = "enable_plan_serialize_concurrently";
+
+    public static final String ENABLE_STRICT_ORDER_BY = "enable_strict_order_by";
+
+>>>>>>> fa72214349 ([BugFix] fix wrong order by scope for distinct query (#37910))
     // Flag to control whether to proxy follower's query statement to leader/follower.
     public enum FollowerQueryForwardMode {
         DEFAULT,    // proxy queries by the follower's replay progress (default)
@@ -1072,6 +1084,9 @@ public String getCboEqBaseType() {
     @VarAttr(name = FOLLOWER_QUERY_FORWARD_MODE, flag = VariableMgr.INVISIBLE | VariableMgr.DISABLE_FORWARD_TO_LEADER)
     private String followerForwardMode = "";
 
+    @VarAttr(name = ENABLE_STRICT_ORDER_BY)
+    private boolean enableStrictOrderBy = true;
+
     public void setFollowerQueryForwardMode(String mode) {
         this.followerForwardMode = mode;
     }
@@ -1996,6 +2011,25 @@ public void setCrossJoinCostPenalty(long crossJoinCostPenalty) {
         this.crossJoinCostPenalty = crossJoinCostPenalty;
     }
 
+<<<<<<< HEAD
+=======
+    public int getSkewJoinRandRange() {
+        return skewJoinRandRange;
+    }
+
+    public void setSkewJoinRandRange(int skewJoinRandRange) {
+        this.skewJoinRandRange = skewJoinRandRange;
+    }
+
+    public boolean isEnableStrictOrderBy() {
+        return enableStrictOrderBy;
+    }
+
+    public void setEnableStrictOrderBy(boolean enableStrictOrderBy) {
+        this.enableStrictOrderBy = enableStrictOrderBy;
+    }
+
+>>>>>>> fa72214349 ([BugFix] fix wrong order by scope for distinct query (#37910))
     // Serialize to thrift object
     // used for rest api
     public TQueryOptions toThrift() {
diff --git a/fe/fe-core/src/main/java/com/starrocks/sql/analyzer/AggregationAnalyzer.java b/fe/fe-core/src/main/java/com/starrocks/sql/analyzer/AggregationAnalyzer.java
@@ -34,6 +34,11 @@
 import com.starrocks.qe.ConnectContext;
 import com.starrocks.qe.SqlModeHelper;
 import com.starrocks.sql.ast.AstVisitor;
+<<<<<<< HEAD
+=======
+import com.starrocks.sql.ast.DictionaryGetExpr;
+import com.starrocks.sql.ast.FieldReference;
+>>>>>>> fa72214349 ([BugFix] fix wrong order by scope for distinct query (#37910))
 import com.starrocks.sql.ast.LambdaFunctionExpr;
 import com.starrocks.sql.ast.QueryStatement;
 
@@ -103,9 +108,21 @@ public Boolean visit(ParseNode expr) {
             return super.visit(expr);
         }
 
+        @Override
+        public Boolean visitFieldReference(FieldReference node, Void context) {
+            String colInfo = node.getTblName() == null ? "column" : "column of " + node.getTblName().toString();
+            throw new SemanticException(colInfo + " must appear in the GROUP BY clause or be used in an aggregate function",
+                    node.getPos());
+        }
+
         @Override
         public Boolean visitExpression(Expr node, Void context) {
+<<<<<<< HEAD
             throw new SemanticException("%s is not support in GROUP BY clause", node.toSql());
+=======
+            throw new SemanticException(node.toSql() + " must appear in the GROUP BY clause or be used in an aggregate function",
+                    node.getPos());
+>>>>>>> fa72214349 ([BugFix] fix wrong order by scope for distinct query (#37910))
         }
 
         private boolean isGroupingKey(Expr node) {
diff --git a/fe/fe-core/src/main/java/com/starrocks/sql/analyzer/SelectAnalyzer.java b/fe/fe-core/src/main/java/com/starrocks/sql/analyzer/SelectAnalyzer.java
@@ -1,6 +1,7 @@
 // This file is licensed under the Elastic License 2.0. Copyright 2021-present, StarRocks Inc.
 package com.starrocks.sql.analyzer;
 
+import com.google.common.base.Preconditions;
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Sets;
@@ -72,10 +73,11 @@ public void analyze(AnalyzeState analyzeState,
         analyzeHaving(havingClause, analyzeState, sourceScope, outputScope, outputExpressions);
 
         // Construct sourceAndOutputScope with sourceScope and outputScope
-        Scope sourceAndOutputScope = computeAndAssignOrderScope(analyzeState, sourceScope, outputScope);
+        Scope sourceAndOutputScope = computeAndAssignOrderScope(analyzeState, sourceScope, outputScope,
+                selectList.isDistinct());
 
         List<OrderByElement> orderByElements =
-                analyzeOrderBy(sortClause, analyzeState, sourceAndOutputScope, outputExpressions);
+                analyzeOrderBy(sortClause, analyzeState, sourceAndOutputScope, outputExpressions, selectList.isDistinct());
         List<Expr> orderByExpressions =
                 orderByElements.stream().map(OrderByElement::getExpr).collect(Collectors.toList());
 
@@ -165,7 +167,7 @@ public void analyze(AnalyzeState analyzeState,
                     .collect(Collectors.toList());
 
             Scope sourceScopeForOrder = new Scope(RelationId.anonymous(), new RelationFields(sourceForOrderFields));
-            computeAndAssignOrderScope(analyzeState, sourceScopeForOrder, outputScope);
+            computeAndAssignOrderScope(analyzeState, sourceScopeForOrder, outputScope, selectList.isDistinct());
             analyzeState.setOrderSourceExpressions(orderSourceExpressions);
         }
 
@@ -300,7 +302,8 @@ private List<Expr> analyzeSelect(SelectList selectList, Relation fromRelation, b
 
     private List<OrderByElement> analyzeOrderBy(List<OrderByElement> orderByElements, AnalyzeState analyzeState,
                                                 Scope orderByScope,
-                                                List<Expr> outputExpressions) {
+                                                List<Expr> outputExpressions,
+                                                boolean isDistinct) {
         if (orderByElements == null) {
             analyzeState.setOrderBy(Collections.emptyList());
             return Collections.emptyList();
@@ -316,20 +319,33 @@ private List<OrderByElement> analyzeOrderBy(List<OrderByElement> orderByElements
                 if (ordinal < 1 || ordinal > outputExpressions.size()) {
                     throw new SemanticException("ORDER BY position %s is not in select list", ordinal);
                 }
+                // index can ensure no ambiguous, we don't need to re-analyze this output expression
                 expression = outputExpressions.get((int) ordinal - 1);
-            }
-
-            if (expression instanceof FieldReference) {
-                // If the expression of order by is a FieldReference, it means that the type of sql is
+            } else if (expression instanceof FieldReference) {
+                // If the expression of order by is a FieldReference, and it's not a distinct select,
+                // it means that the type of sql is
                 // "select * from t order by 1", then this FieldReference cannot be parsed in OrderByScope,
                 // but should be parsed in sourceScope
-                analyzeExpression(expression, analyzeState, orderByScope.getParent());
+                if (isDistinct) {
+                    analyzeExpression(expression, analyzeState, orderByScope);
+                } else {
+                    analyzeExpression(expression, analyzeState, orderByScope.getParent());
+                }
             } else {
                 ExpressionAnalyzer expressionAnalyzer = new ExpressionAnalyzer(session);
                 expressionAnalyzer.analyzeWithoutUpdateState(expression, analyzeState, orderByScope);
                 List<Expr> aggregations = Lists.newArrayList();
                 expression.collectAll(e -> e.isAggregate(), aggregations);
-                aggregations.forEach(e -> analyzeExpression(e, analyzeState, orderByScope.getParent()));
+                if (isDistinct && !aggregations.isEmpty()) {
+                    throw new SemanticException("for SELECT DISTINCT, ORDER BY expressions must appear in select list",
+                            expression.getPos());
+                }
+
+                if (!aggregations.isEmpty()) {
+                    // use parent scope to analyze agg func firstly
+                    Preconditions.checkState(orderByScope.getParent() != null, "parent scope not be set");
+                    aggregations.forEach(e -> analyzeExpression(e, analyzeState, orderByScope.getParent()));
+                }
                 analyzeExpression(expression, analyzeState, orderByScope);
             }
 
@@ -643,23 +659,24 @@ public Expr visitSlot(SlotRef slotRef, Void context) {
         }
     }
 
-    private Scope computeAndAssignOrderScope(AnalyzeState analyzeState, Scope sourceScope, Scope outputScope) {
-        // The Scope used by order by allows parsing of the same column,
-        // such as 'select v1 as v, v1 as v from t0 order by v'
-        // but normal parsing does not allow it. So add a de-duplication operation here.
+    private Scope computeAndAssignOrderScope(AnalyzeState analyzeState, Scope sourceScope, Scope outputScope,
+                                             boolean isDistinct) {
+
+        List<Field> allFields = Lists.newArrayList();
+        // order by can only "see" fields from distinct output
+        if (isDistinct) {
+            allFields = removeDuplicateField(outputScope.getRelationFields().getAllFields());
+            Scope orderScope = new Scope(outputScope.getRelationId(), new RelationFields(allFields));
+            analyzeState.setOrderScope(orderScope);
+            return orderScope;
+        }
 
-        List<Field> allFields = new ArrayList<>();
         for (int i = 0; i < analyzeState.getOutputExprInOrderByScope().size(); ++i) {
             Field field = outputScope.getRelationFields()
                     .getFieldByIndex(analyzeState.getOutputExprInOrderByScope().get(i));
-            if (field.getName() != null && field.getOriginExpression() != null &&
-                    allFields.stream().anyMatch(f -> f.getOriginExpression() != null
-                            && f.getName() != null && field.getName().equals(f.getName())
-                            && field.getOriginExpression().equals(f.getOriginExpression()))) {
-                continue;
-            }
             allFields.add(field);
         }
+        allFields = removeDuplicateField(allFields);
 
         Scope orderScope = new Scope(outputScope.getRelationId(), new RelationFields(allFields));
 
@@ -676,4 +693,29 @@ private Scope computeAndAssignOrderScope(AnalyzeState analyzeState, Scope source
     private void analyzeExpression(Expr expr, AnalyzeState analyzeState, Scope scope) {
         ExpressionAnalyzer.analyzeExpression(expr, analyzeState, scope, session);
     }
+
+
+    // The Scope used by order by allows parsing of the same column,
+    // such as 'select v1 as v, v1 as v from t0 order by v'
+    // but normal parsing does not allow it. So add a de-duplication operation here.
+    private List<Field> removeDuplicateField(List<Field> originalFields) {
+        List<Field> allFields = Lists.newArrayList();
+        for (Field field : originalFields) {
+            if (session.getSessionVariable().isEnableStrictOrderBy()) {
+                if (field.getName() != null && field.getOriginExpression() != null &&
+                        allFields.stream().anyMatch(f -> f.getOriginExpression() != null
+                                && f.getName() != null && field.getName().equals(f.getName())
+                                && field.getOriginExpression().equals(f.getOriginExpression()))) {
+                    continue;
+                }
+            } else {
+                if (field.getName() != null &&
+                        allFields.stream().anyMatch(f -> f.getName() != null && field.getName().equals(f.getName()))) {
+                    continue;
+                }
+            }
+            allFields.add(field);
+        }
+        return allFields;
+    }
 }
diff --git a/fe/fe-core/src/main/java/com/starrocks/sql/ast/FieldReference.java b/fe/fe-core/src/main/java/com/starrocks/sql/ast/FieldReference.java
@@ -35,6 +35,10 @@ public int getFieldIndex() {
         return fieldIndex;
     }
 
+    public TableName getTblName() {
+        return tblName;
+    }
+
     @Override
     public boolean equals(Object o) {
         if (this == o) {
diff --git a/fe/fe-core/src/test/java/com/starrocks/sql/analyzer/AnalyzeAggregateTest.java b/fe/fe-core/src/test/java/com/starrocks/sql/analyzer/AnalyzeAggregateTest.java
@@ -109,9 +109,11 @@ public void testDistinct() {
         analyzeSuccess("select distinct v1, v2 as v from t0 order by v");
         analyzeSuccess("select distinct abs(v1) as v from t0 order by v");
         analyzeFail("select distinct v1 from t0 order by v2",
-                "must be an aggregate expression or appear in GROUP BY clause");
+                "Column 'v2' cannot be resolved");
         analyzeFail("select distinct v1 as v from t0 order by v2",
-                "must be an aggregate expression or appear in GROUP BY clause");
+                "Column 'v2' cannot be resolved");
+        analyzeFail("select * from t0 order by max(v2)",
+                "column must appear in the GROUP BY clause or be used in an aggregate function.");
 
         analyzeSuccess("select distinct v1 as v from t0 having v = 1");
         analyzeFail("select distinct v1 as v from t0 having v2 = 2",
diff --git a/fe/fe-core/src/test/java/com/starrocks/sql/parser/ParserTest.java b/fe/fe-core/src/test/java/com/starrocks/sql/parser/ParserTest.java
@@ -180,6 +180,92 @@ void testSettingSqlMode() throws InterruptedException {
                 exprs[1] instanceof FunctionCallExpr);
     }
 
+<<<<<<< HEAD
+=======
+    @ParameterizedTest
+    @MethodSource("keyWordSqls")
+    void testNodeReservedWords_3(String sql) {
+        SessionVariable sessionVariable = new SessionVariable();
+        try {
+            SqlParser.parse(sql, sessionVariable).get(0);
+        } catch (Exception e) {
+            fail("sql should success. errMsg: " +  e.getMessage());
+        }
+    }
+
+    @ParameterizedTest
+    @MethodSource("reservedWordSqls")
+    void testReservedWords(String sql) {
+        SessionVariable sessionVariable = new SessionVariable();
+        try {
+            SqlParser.parse(sql, sessionVariable).get(0);
+            fail("Not quoting reserved words. sql should fail.");
+        } catch (Exception e) {
+            Assert.assertTrue(e instanceof ParsingException);
+        }
+    }
+
+    @ParameterizedTest
+    @MethodSource("multipleStatements")
+    void testMultipleStatements(String sql, boolean isValid) {
+        SessionVariable sessionVariable = new SessionVariable();
+        try {
+            SqlParser.parse(sql, sessionVariable).get(0);
+            if (!isValid) {
+                fail("sql should fail.");
+            }
+        } catch (Exception e) {
+            if (isValid) {
+                fail("sql should success. errMsg: " +  e.getMessage());
+            }
+        }
+    }
+
+    @ParameterizedTest
+    @MethodSource("setQuantifierInAggFunc")
+    void testSetQuantifierInAggFunc(String sql, boolean isValid) {
+        SessionVariable sessionVariable = new SessionVariable();
+        try {
+            SqlParser.parse(sql, sessionVariable).get(0);
+            if (!isValid) {
+                fail("sql should fail.");
+            }
+        } catch (Exception e) {
+            if (isValid) {
+                fail("sql should success. errMsg: " +  e.getMessage());
+            }
+        }
+    }
+
+    @ParameterizedTest
+    @MethodSource("unexpectedTokenSqls")
+    void testUnexpectedTokenSqls(String sql, String expecting) {
+        SessionVariable sessionVariable = new SessionVariable();
+        try {
+            SqlParser.parse(sql, sessionVariable).get(0);
+            fail("sql should fail.");
+        } catch (Exception e) {
+            System.out.println(e.getMessage());
+            assertContains(e.getMessage(), expecting);
+        }
+    }
+
+    @Test
+    void testWrongVariableName() {
+        String res = VariableMgr.findSimilarVarNames("disable_coloce_join");
+        assertContains(res, "{'disable_colocate_join', 'disable_join_reorder', 'disable_function_fold_constants'}");
+
+        res = VariableMgr.findSimilarVarNames("SQL_AUTO_NULL");
+        assertContains(res, "{'SQL_AUTO_IS_NULL', 'sql_dialect', 'sql_mode_v2'}");
+
+        res = VariableMgr.findSimilarVarNames("pipeline");
+        assertContains(res, "{'pipeline_dop', 'pipeline_sink_dop', 'pipeline_profile_level'}");
+
+        res = VariableMgr.findSimilarVarNames("disable_joinreorder");
+        assertContains(res, "{'disable_join_reorder', 'disable_colocate_join'");
+    }
+
+>>>>>>> fa72214349 ([BugFix] fix wrong order by scope for distinct query (#37910))
     @Test
     void testModOperator() {
         String sql = "select 100 MOD 2";
diff --git a/fe/fe-core/src/test/java/com/starrocks/sql/plan/OrderByTest.java b/fe/fe-core/src/test/java/com/starrocks/sql/plan/OrderByTest.java