-
Notifications
You must be signed in to change notification settings - Fork 2k
[BugFix] fix wrong order by scope for distinct query #37910
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
8ed3338
cba1a3a
5eb931e
3f7a91a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,6 +14,7 @@ | |
|
||
package com.starrocks.sql.analyzer; | ||
|
||
import com.google.common.base.Preconditions; | ||
import com.google.common.collect.ImmutableList; | ||
import com.google.common.collect.Lists; | ||
import com.google.common.collect.Sets; | ||
|
@@ -86,10 +87,11 @@ public void analyze(AnalyzeState analyzeState, | |
analyzeHaving(havingClause, analyzeState, sourceScope, outputScope, outputExpressions); | ||
|
||
// Construct sourceAndOutputScope with sourceScope and outputScope | ||
Scope sourceAndOutputScope = computeAndAssignOrderScope(analyzeState, sourceScope, outputScope); | ||
Scope sourceAndOutputScope = computeAndAssignOrderScope(analyzeState, sourceScope, outputScope, | ||
selectList.isDistinct()); | ||
|
||
List<OrderByElement> orderByElements = | ||
analyzeOrderBy(sortClause, analyzeState, sourceAndOutputScope, outputExpressions); | ||
analyzeOrderBy(sortClause, analyzeState, sourceAndOutputScope, outputExpressions, selectList.isDistinct()); | ||
List<Expr> orderByExpressions = | ||
orderByElements.stream().map(OrderByElement::getExpr).collect(Collectors.toList()); | ||
|
||
|
@@ -183,7 +185,7 @@ public void analyze(AnalyzeState analyzeState, | |
.collect(Collectors.toList()); | ||
|
||
Scope sourceScopeForOrder = new Scope(RelationId.anonymous(), new RelationFields(sourceForOrderFields)); | ||
computeAndAssignOrderScope(analyzeState, sourceScopeForOrder, outputScope); | ||
computeAndAssignOrderScope(analyzeState, sourceScopeForOrder, outputScope, selectList.isDistinct()); | ||
analyzeState.setOrderSourceExpressions(orderSourceExpressions); | ||
} | ||
|
||
|
@@ -311,7 +313,8 @@ private List<Expr> analyzeSelect(SelectList selectList, Relation fromRelation, b | |
|
||
private List<OrderByElement> analyzeOrderBy(List<OrderByElement> orderByElements, AnalyzeState analyzeState, | ||
Scope orderByScope, | ||
List<Expr> outputExpressions) { | ||
List<Expr> outputExpressions, | ||
boolean isDistinct) { | ||
if (orderByElements == null) { | ||
analyzeState.setOrderBy(Collections.emptyList()); | ||
return Collections.emptyList(); | ||
|
@@ -327,20 +330,33 @@ private List<OrderByElement> analyzeOrderBy(List<OrderByElement> orderByElements | |
if (ordinal < 1 || ordinal > outputExpressions.size()) { | ||
throw new SemanticException("ORDER BY position %s is not in select list", ordinal); | ||
} | ||
// index can ensure no ambiguous, we don't need to re-analyze this output expression | ||
expression = outputExpressions.get((int) ordinal - 1); | ||
} | ||
|
||
if (expression instanceof FieldReference) { | ||
// If the expression of order by is a FieldReference, it means that the type of sql is | ||
} else if (expression instanceof FieldReference) { | ||
// If the expression of order by is a FieldReference, and it's not a distinct select, | ||
// it means that the type of sql is | ||
// "select * from t order by 1", then this FieldReference cannot be parsed in OrderByScope, | ||
// but should be parsed in sourceScope | ||
analyzeExpression(expression, analyzeState, orderByScope.getParent()); | ||
if (isDistinct) { | ||
analyzeExpression(expression, analyzeState, orderByScope); | ||
} else { | ||
analyzeExpression(expression, analyzeState, orderByScope.getParent()); | ||
} | ||
} else { | ||
ExpressionAnalyzer expressionAnalyzer = new ExpressionAnalyzer(session); | ||
expressionAnalyzer.analyzeWithoutUpdateState(expression, analyzeState, orderByScope); | ||
List<Expr> aggregations = Lists.newArrayList(); | ||
expression.collectAll(e -> e.isAggregate(), aggregations); | ||
aggregations.forEach(e -> analyzeExpression(e, analyzeState, orderByScope.getParent())); | ||
if (isDistinct && !aggregations.isEmpty()) { | ||
throw new SemanticException("for SELECT DISTINCT, ORDER BY expressions must appear in select list", | ||
expression.getPos()); | ||
} | ||
|
||
if (!aggregations.isEmpty()) { | ||
// use parent scope to analyze agg func firstly | ||
Preconditions.checkState(orderByScope.getParent() != null, "parent scope not be set"); | ||
aggregations.forEach(e -> analyzeExpression(e, analyzeState, orderByScope.getParent())); | ||
} | ||
analyzeExpression(expression, analyzeState, orderByScope); | ||
} | ||
|
||
|
@@ -655,23 +671,24 @@ public Expr visitSlot(SlotRef slotRef, Void context) { | |
} | ||
} | ||
|
||
private Scope computeAndAssignOrderScope(AnalyzeState analyzeState, Scope sourceScope, Scope outputScope) { | ||
// The Scope used by order by allows parsing of the same column, | ||
// such as 'select v1 as v, v1 as v from t0 order by v' | ||
// but normal parsing does not allow it. So add a de-duplication operation here. | ||
private Scope computeAndAssignOrderScope(AnalyzeState analyzeState, Scope sourceScope, Scope outputScope, | ||
boolean isDistinct) { | ||
|
||
List<Field> allFields = Lists.newArrayList(); | ||
// order by can only "see" fields from distinct output | ||
if (isDistinct) { | ||
allFields = removeDuplicateField(outputScope.getRelationFields().getAllFields()); | ||
Scope orderScope = new Scope(outputScope.getRelationId(), new RelationFields(allFields)); | ||
analyzeState.setOrderScope(orderScope); | ||
return orderScope; | ||
} | ||
|
||
List<Field> allFields = new ArrayList<>(); | ||
for (int i = 0; i < analyzeState.getOutputExprInOrderByScope().size(); ++i) { | ||
Field field = outputScope.getRelationFields() | ||
.getFieldByIndex(analyzeState.getOutputExprInOrderByScope().get(i)); | ||
if (field.getName() != null && field.getOriginExpression() != null && | ||
allFields.stream().anyMatch(f -> f.getOriginExpression() != null | ||
&& f.getName() != null && field.getName().equals(f.getName()) | ||
&& field.getOriginExpression().equals(f.getOriginExpression()))) { | ||
continue; | ||
} | ||
allFields.add(field); | ||
} | ||
allFields = removeDuplicateField(allFields); | ||
|
||
Scope orderScope = new Scope(outputScope.getRelationId(), new RelationFields(allFields)); | ||
|
||
|
@@ -688,4 +705,29 @@ private Scope computeAndAssignOrderScope(AnalyzeState analyzeState, Scope source | |
private void analyzeExpression(Expr expr, AnalyzeState analyzeState, Scope scope) { | ||
ExpressionAnalyzer.analyzeExpression(expr, analyzeState, scope, session); | ||
} | ||
|
||
|
||
// The Scope used by order by allows parsing of the same column, | ||
// such as 'select v1 as v, v1 as v from t0 order by v' | ||
// but normal parsing does not allow it. So add a de-duplication operation here. | ||
private List<Field> removeDuplicateField(List<Field> originalFields) { | ||
List<Field> allFields = Lists.newArrayList(); | ||
for (Field field : originalFields) { | ||
if (session.getSessionVariable().isEnableStrictOrderBy()) { | ||
if (field.getName() != null && field.getOriginExpression() != null && | ||
allFields.stream().anyMatch(f -> f.getOriginExpression() != null | ||
&& f.getName() != null && field.getName().equals(f.getName()) | ||
&& field.getOriginExpression().equals(f.getOriginExpression()))) { | ||
continue; | ||
} | ||
} else { | ||
if (field.getName() != null && | ||
allFields.stream().anyMatch(f -> f.getName() != null && field.getName().equals(f.getName()))) { | ||
continue; | ||
} | ||
} | ||
allFields.add(field); | ||
} | ||
return allFields; | ||
} | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The most risky bug in this code is: You can modify the code like this: // In analyzeOrderBy method, adjust the part where aggregations are checked within a SELECT DISTINCT context
if (isDistinct && !aggregations.isEmpty()) {
// Here we need to not only throw an exception if there are aggregations,
// but also ensure that columns in the ORDER BY clause are present in the SELECT list.
boolean orderByNotInSelectList = !outputExpressions.containsAll(expression.toExprList());
if(orderByNotInSelectList) {
throw new SemanticException("for SELECT DISTINCT, ORDER BY expressions must appear in the select list",
expression.getPos());
}
}
// Later in the same method, we also need to ensure that the ORDER BY fields are consistently validated
// against the correct scope when SELECT DISTINCT is used.
if (!isDistinct) {
analyzeExpression(expression, analyzeState, orderByScope);
} else {
analyzeExpression(expression, analyzeState, orderByScope.getParent());
} This addresses the issue where the ORDER BY clause might reference expressions not in the SELECT list when using DISTINCT, which should result in an error as implemented in this proposed fix. Additionally, when SELECT DISTINCT is true, the ORDER BY expressions would correctly be analyzed against the parent scope rather than the possibly incorrect orderByScope. |
Uh oh!
There was an error while loading. Please reload this page.