1
1
// This file is licensed under the Elastic License 2.0. Copyright 2021-present, StarRocks Inc.
2
2
package com .starrocks .sql .analyzer ;
3
3
4
+ import com .google .common .base .Preconditions ;
4
5
import com .google .common .collect .ImmutableList ;
5
6
import com .google .common .collect .Lists ;
6
7
import com .google .common .collect .Sets ;
@@ -72,10 +73,11 @@ public void analyze(AnalyzeState analyzeState,
72
73
analyzeHaving (havingClause , analyzeState , sourceScope , outputScope , outputExpressions );
73
74
74
75
// Construct sourceAndOutputScope with sourceScope and outputScope
75
- Scope sourceAndOutputScope = computeAndAssignOrderScope (analyzeState , sourceScope , outputScope );
76
+ Scope sourceAndOutputScope = computeAndAssignOrderScope (analyzeState , sourceScope , outputScope ,
77
+ selectList .isDistinct ());
76
78
77
79
List <OrderByElement > orderByElements =
78
- analyzeOrderBy (sortClause , analyzeState , sourceAndOutputScope , outputExpressions );
80
+ analyzeOrderBy (sortClause , analyzeState , sourceAndOutputScope , outputExpressions , selectList . isDistinct () );
79
81
List <Expr > orderByExpressions =
80
82
orderByElements .stream ().map (OrderByElement ::getExpr ).collect (Collectors .toList ());
81
83
@@ -165,7 +167,7 @@ public void analyze(AnalyzeState analyzeState,
165
167
.collect (Collectors .toList ());
166
168
167
169
Scope sourceScopeForOrder = new Scope (RelationId .anonymous (), new RelationFields (sourceForOrderFields ));
168
- computeAndAssignOrderScope (analyzeState , sourceScopeForOrder , outputScope );
170
+ computeAndAssignOrderScope (analyzeState , sourceScopeForOrder , outputScope , selectList . isDistinct () );
169
171
analyzeState .setOrderSourceExpressions (orderSourceExpressions );
170
172
}
171
173
@@ -300,7 +302,8 @@ private List<Expr> analyzeSelect(SelectList selectList, Relation fromRelation, b
300
302
301
303
private List <OrderByElement > analyzeOrderBy (List <OrderByElement > orderByElements , AnalyzeState analyzeState ,
302
304
Scope orderByScope ,
303
- List <Expr > outputExpressions ) {
305
+ List <Expr > outputExpressions ,
306
+ boolean isDistinct ) {
304
307
if (orderByElements == null ) {
305
308
analyzeState .setOrderBy (Collections .emptyList ());
306
309
return Collections .emptyList ();
@@ -316,20 +319,32 @@ private List<OrderByElement> analyzeOrderBy(List<OrderByElement> orderByElements
316
319
if (ordinal < 1 || ordinal > outputExpressions .size ()) {
317
320
throw new SemanticException ("ORDER BY position %s is not in select list" , ordinal );
318
321
}
322
+ // index can ensure no ambiguous, we don't need to re-analyze this output expression
319
323
expression = outputExpressions .get ((int ) ordinal - 1 );
320
- }
321
-
322
- if (expression instanceof FieldReference ) {
323
- // If the expression of order by is a FieldReference, it means that the type of sql is
324
+ } else if (expression instanceof FieldReference ) {
325
+ // If the expression of order by is a FieldReference, and it's not a distinct select,
326
+ // it means that the type of sql is
324
327
// "select * from t order by 1", then this FieldReference cannot be parsed in OrderByScope,
325
328
// but should be parsed in sourceScope
326
- analyzeExpression (expression , analyzeState , orderByScope .getParent ());
329
+ if (isDistinct ) {
330
+ analyzeExpression (expression , analyzeState , orderByScope );
331
+ } else {
332
+ analyzeExpression (expression , analyzeState , orderByScope .getParent ());
333
+ }
327
334
} else {
328
335
ExpressionAnalyzer expressionAnalyzer = new ExpressionAnalyzer (session );
329
336
expressionAnalyzer .analyzeWithoutUpdateState (expression , analyzeState , orderByScope );
330
337
List <Expr > aggregations = Lists .newArrayList ();
331
338
expression .collectAll (e -> e .isAggregate (), aggregations );
332
- aggregations .forEach (e -> analyzeExpression (e , analyzeState , orderByScope .getParent ()));
339
+ if (isDistinct && !aggregations .isEmpty ()) {
340
+ throw new SemanticException ("for SELECT DISTINCT, ORDER BY expressions must appear in select list" );
341
+ }
342
+
343
+ if (!aggregations .isEmpty ()) {
344
+ // use parent scope to analyze agg func firstly
345
+ Preconditions .checkState (orderByScope .getParent () != null , "parent scope not be set" );
346
+ aggregations .forEach (e -> analyzeExpression (e , analyzeState , orderByScope .getParent ()));
347
+ }
333
348
analyzeExpression (expression , analyzeState , orderByScope );
334
349
}
335
350
@@ -643,23 +658,24 @@ public Expr visitSlot(SlotRef slotRef, Void context) {
643
658
}
644
659
}
645
660
646
- private Scope computeAndAssignOrderScope (AnalyzeState analyzeState , Scope sourceScope , Scope outputScope ) {
647
- // The Scope used by order by allows parsing of the same column,
648
- // such as 'select v1 as v, v1 as v from t0 order by v'
649
- // but normal parsing does not allow it. So add a de-duplication operation here.
661
+ private Scope computeAndAssignOrderScope (AnalyzeState analyzeState , Scope sourceScope , Scope outputScope ,
662
+ boolean isDistinct ) {
663
+
664
+ List <Field > allFields = Lists .newArrayList ();
665
+ // order by can only "see" fields from distinct output
666
+ if (isDistinct ) {
667
+ allFields = removeDuplicateField (outputScope .getRelationFields ().getAllFields ());
668
+ Scope orderScope = new Scope (outputScope .getRelationId (), new RelationFields (allFields ));
669
+ analyzeState .setOrderScope (orderScope );
670
+ return orderScope ;
671
+ }
650
672
651
- List <Field > allFields = new ArrayList <>();
652
673
for (int i = 0 ; i < analyzeState .getOutputExprInOrderByScope ().size (); ++i ) {
653
674
Field field = outputScope .getRelationFields ()
654
675
.getFieldByIndex (analyzeState .getOutputExprInOrderByScope ().get (i ));
655
- if (field .getName () != null && field .getOriginExpression () != null &&
656
- allFields .stream ().anyMatch (f -> f .getOriginExpression () != null
657
- && f .getName () != null && field .getName ().equals (f .getName ())
658
- && field .getOriginExpression ().equals (f .getOriginExpression ()))) {
659
- continue ;
660
- }
661
676
allFields .add (field );
662
677
}
678
+ allFields = removeDuplicateField (allFields );
663
679
664
680
Scope orderScope = new Scope (outputScope .getRelationId (), new RelationFields (allFields ));
665
681
@@ -676,4 +692,28 @@ private Scope computeAndAssignOrderScope(AnalyzeState analyzeState, Scope source
676
692
private void analyzeExpression (Expr expr , AnalyzeState analyzeState , Scope scope ) {
677
693
ExpressionAnalyzer .analyzeExpression (expr , analyzeState , scope , session );
678
694
}
695
+
696
+ // The Scope used by order by allows parsing of the same column,
697
+ // such as 'select v1 as v, v1 as v from t0 order by v'
698
+ // but normal parsing does not allow it. So add a de-duplication operation here.
699
+ private List <Field > removeDuplicateField (List <Field > originalFields ) {
700
+ List <Field > allFields = Lists .newArrayList ();
701
+ for (Field field : originalFields ) {
702
+ if (session .getSessionVariable ().isEnableStrictOrderBy ()) {
703
+ if (field .getName () != null && field .getOriginExpression () != null &&
704
+ allFields .stream ().anyMatch (f -> f .getOriginExpression () != null
705
+ && f .getName () != null && field .getName ().equals (f .getName ())
706
+ && field .getOriginExpression ().equals (f .getOriginExpression ()))) {
707
+ continue ;
708
+ }
709
+ } else {
710
+ if (field .getName () != null &&
711
+ allFields .stream ().anyMatch (f -> f .getName () != null && field .getName ().equals (f .getName ()))) {
712
+ continue ;
713
+ }
714
+ }
715
+ allFields .add (field );
716
+ }
717
+ return allFields ;
718
+ }
679
719
}
0 commit comments