1
1
// This file is licensed under the Elastic License 2.0. Copyright 2021-present, StarRocks Inc.
2
2
package com .starrocks .sql .analyzer ;
3
3
4
+ import com .google .common .base .Preconditions ;
4
5
import com .google .common .collect .ImmutableList ;
5
6
import com .google .common .collect .Lists ;
6
7
import com .google .common .collect .Sets ;
@@ -72,10 +73,11 @@ public void analyze(AnalyzeState analyzeState,
72
73
analyzeHaving (havingClause , analyzeState , sourceScope , outputScope , outputExpressions );
73
74
74
75
// Construct sourceAndOutputScope with sourceScope and outputScope
75
- Scope sourceAndOutputScope = computeAndAssignOrderScope (analyzeState , sourceScope , outputScope );
76
+ Scope sourceAndOutputScope = computeAndAssignOrderScope (analyzeState , sourceScope , outputScope ,
77
+ selectList .isDistinct ());
76
78
77
79
List <OrderByElement > orderByElements =
78
- analyzeOrderBy (sortClause , analyzeState , sourceAndOutputScope , outputExpressions );
80
+ analyzeOrderBy (sortClause , analyzeState , sourceAndOutputScope , outputExpressions , selectList . isDistinct () );
79
81
List <Expr > orderByExpressions =
80
82
orderByElements .stream ().map (OrderByElement ::getExpr ).collect (Collectors .toList ());
81
83
@@ -165,7 +167,7 @@ public void analyze(AnalyzeState analyzeState,
165
167
.collect (Collectors .toList ());
166
168
167
169
Scope sourceScopeForOrder = new Scope (RelationId .anonymous (), new RelationFields (sourceForOrderFields ));
168
- computeAndAssignOrderScope (analyzeState , sourceScopeForOrder , outputScope );
170
+ computeAndAssignOrderScope (analyzeState , sourceScopeForOrder , outputScope , selectList . isDistinct () );
169
171
analyzeState .setOrderSourceExpressions (orderSourceExpressions );
170
172
}
171
173
@@ -300,7 +302,8 @@ private List<Expr> analyzeSelect(SelectList selectList, Relation fromRelation, b
300
302
301
303
private List <OrderByElement > analyzeOrderBy (List <OrderByElement > orderByElements , AnalyzeState analyzeState ,
302
304
Scope orderByScope ,
303
- List <Expr > outputExpressions ) {
305
+ List <Expr > outputExpressions ,
306
+ boolean isDistinct ) {
304
307
if (orderByElements == null ) {
305
308
analyzeState .setOrderBy (Collections .emptyList ());
306
309
return Collections .emptyList ();
@@ -316,20 +319,33 @@ private List<OrderByElement> analyzeOrderBy(List<OrderByElement> orderByElements
316
319
if (ordinal < 1 || ordinal > outputExpressions .size ()) {
317
320
throw new SemanticException ("ORDER BY position %s is not in select list" , ordinal );
318
321
}
322
+ // index can ensure no ambiguous, we don't need to re-analyze this output expression
319
323
expression = outputExpressions .get ((int ) ordinal - 1 );
320
- }
321
-
322
- if (expression instanceof FieldReference ) {
323
- // If the expression of order by is a FieldReference, it means that the type of sql is
324
+ } else if (expression instanceof FieldReference ) {
325
+ // If the expression of order by is a FieldReference, and it's not a distinct select,
326
+ // it means that the type of sql is
324
327
// "select * from t order by 1", then this FieldReference cannot be parsed in OrderByScope,
325
328
// but should be parsed in sourceScope
326
- analyzeExpression (expression , analyzeState , orderByScope .getParent ());
329
+ if (isDistinct ) {
330
+ analyzeExpression (expression , analyzeState , orderByScope );
331
+ } else {
332
+ analyzeExpression (expression , analyzeState , orderByScope .getParent ());
333
+ }
327
334
} else {
328
335
ExpressionAnalyzer expressionAnalyzer = new ExpressionAnalyzer (session );
329
336
expressionAnalyzer .analyzeWithoutUpdateState (expression , analyzeState , orderByScope );
330
337
List <Expr > aggregations = Lists .newArrayList ();
331
338
expression .collectAll (e -> e .isAggregate (), aggregations );
332
- aggregations .forEach (e -> analyzeExpression (e , analyzeState , orderByScope .getParent ()));
339
+ if (isDistinct && !aggregations .isEmpty ()) {
340
+ throw new SemanticException ("for SELECT DISTINCT, ORDER BY expressions must appear in select list" ,
341
+ expression .getPos ());
342
+ }
343
+
344
+ if (!aggregations .isEmpty ()) {
345
+ // use parent scope to analyze agg func firstly
346
+ Preconditions .checkState (orderByScope .getParent () != null , "parent scope not be set" );
347
+ aggregations .forEach (e -> analyzeExpression (e , analyzeState , orderByScope .getParent ()));
348
+ }
333
349
analyzeExpression (expression , analyzeState , orderByScope );
334
350
}
335
351
@@ -643,23 +659,24 @@ public Expr visitSlot(SlotRef slotRef, Void context) {
643
659
}
644
660
}
645
661
646
- private Scope computeAndAssignOrderScope (AnalyzeState analyzeState , Scope sourceScope , Scope outputScope ) {
647
- // The Scope used by order by allows parsing of the same column,
648
- // such as 'select v1 as v, v1 as v from t0 order by v'
649
- // but normal parsing does not allow it. So add a de-duplication operation here.
662
+ private Scope computeAndAssignOrderScope (AnalyzeState analyzeState , Scope sourceScope , Scope outputScope ,
663
+ boolean isDistinct ) {
664
+
665
+ List <Field > allFields = Lists .newArrayList ();
666
+ // order by can only "see" fields from distinct output
667
+ if (isDistinct ) {
668
+ allFields = removeDuplicateField (outputScope .getRelationFields ().getAllFields ());
669
+ Scope orderScope = new Scope (outputScope .getRelationId (), new RelationFields (allFields ));
670
+ analyzeState .setOrderScope (orderScope );
671
+ return orderScope ;
672
+ }
650
673
651
- List <Field > allFields = new ArrayList <>();
652
674
for (int i = 0 ; i < analyzeState .getOutputExprInOrderByScope ().size (); ++i ) {
653
675
Field field = outputScope .getRelationFields ()
654
676
.getFieldByIndex (analyzeState .getOutputExprInOrderByScope ().get (i ));
655
- if (field .getName () != null && field .getOriginExpression () != null &&
656
- allFields .stream ().anyMatch (f -> f .getOriginExpression () != null
657
- && f .getName () != null && field .getName ().equals (f .getName ())
658
- && field .getOriginExpression ().equals (f .getOriginExpression ()))) {
659
- continue ;
660
- }
661
677
allFields .add (field );
662
678
}
679
+ allFields = removeDuplicateField (allFields );
663
680
664
681
Scope orderScope = new Scope (outputScope .getRelationId (), new RelationFields (allFields ));
665
682
@@ -676,4 +693,29 @@ private Scope computeAndAssignOrderScope(AnalyzeState analyzeState, Scope source
676
693
private void analyzeExpression (Expr expr , AnalyzeState analyzeState , Scope scope ) {
677
694
ExpressionAnalyzer .analyzeExpression (expr , analyzeState , scope , session );
678
695
}
696
+
697
+
698
+ // The Scope used by order by allows parsing of the same column,
699
+ // such as 'select v1 as v, v1 as v from t0 order by v'
700
+ // but normal parsing does not allow it. So add a de-duplication operation here.
701
+ private List <Field > removeDuplicateField (List <Field > originalFields ) {
702
+ List <Field > allFields = Lists .newArrayList ();
703
+ for (Field field : originalFields ) {
704
+ if (session .getSessionVariable ().isEnableStrictOrderBy ()) {
705
+ if (field .getName () != null && field .getOriginExpression () != null &&
706
+ allFields .stream ().anyMatch (f -> f .getOriginExpression () != null
707
+ && f .getName () != null && field .getName ().equals (f .getName ())
708
+ && field .getOriginExpression ().equals (f .getOriginExpression ()))) {
709
+ continue ;
710
+ }
711
+ } else {
712
+ if (field .getName () != null &&
713
+ allFields .stream ().anyMatch (f -> f .getName () != null && field .getName ().equals (f .getName ()))) {
714
+ continue ;
715
+ }
716
+ }
717
+ allFields .add (field );
718
+ }
719
+ return allFields ;
720
+ }
679
721
}
0 commit comments