Skip to content

Reuse alias if possible #14781

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 10 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 36 additions & 2 deletions datafusion/expr/src/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -490,6 +490,20 @@ impl Alias {
}
}

/// Create an alias with an optional schema/field qualifier.
pub fn new_boxed(
expr: Box<Expr>,
relation: Option<impl Into<TableReference>>,
name: impl Into<String>,
) -> Self {
Self {
expr,
relation: relation.map(|r| r.into()),
name: name.into(),
metadata: None,
}
}

pub fn with_metadata(
mut self,
metadata: Option<std::collections::HashMap<String, String>>,
Expand Down Expand Up @@ -1385,8 +1399,23 @@ impl Expr {
}

/// Return `self AS name` alias expression
///
/// # Example
/// ```
/// # use datafusion_expr::col;
/// let expr = col("foo").alias("bar");
/// assert_eq!(expr.to_string(), "foo AS bar");
///
/// // when aliasing over the exising alias, the previous one is removed
/// let expr = col("foo").alias("bar").alias("baz");
/// assert_eq!(expr.to_string(), "foo AS baz");
pub fn alias(self, name: impl Into<String>) -> Expr {
Expr::Alias(Alias::new(self, None::<&str>, name.into()))
if let Expr::Alias(Alias { expr, .. }) = self {
// reuse the existing layer if possible
Expr::Alias(Alias::new_boxed(expr, None::<&str>, name.into()))
} else {
Expr::Alias(Alias::new(self, None::<&str>, name.into()))
}
}

/// Return `self AS name` alias expression with metadata
Expand Down Expand Up @@ -1416,7 +1445,12 @@ impl Expr {
relation: Option<impl Into<TableReference>>,
name: impl Into<String>,
) -> Expr {
Expr::Alias(Alias::new(self, relation, name.into()))
if let Expr::Alias(Alias { expr, .. }) = self {
// reuse the existing layer if possible
Expr::Alias(Alias::new_boxed(expr, relation, name.into()))
} else {
Expr::Alias(Alias::new(self, relation, name.into()))
}
}

/// Return `self AS name` alias expression with a specific qualifier and metadata
Expand Down
2 changes: 1 addition & 1 deletion datafusion/optimizer/tests/optimizer_integration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -310,7 +310,7 @@ fn eliminate_redundant_null_check_on_count() {
GROUP BY col_int32
HAVING c IS NOT NULL";
let plan = test_sql(sql).unwrap();
let expected = "Projection: test.col_int32, count(Int64(1)) AS count(*) AS c\
let expected = "Projection: test.col_int32, count(Int64(1)) AS c\
\n Aggregate: groupBy=[[test.col_int32]], aggr=[[count(Int64(1))]]\
\n TableScan: test projection=[col_int32]";
assert_eq!(expected, format!("{plan}"));
Expand Down
48 changes: 31 additions & 17 deletions datafusion/sql/src/unparser/plan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -371,29 +371,33 @@ impl Unparser<'_> {
// Projection can be top-level plan for unnest relation
// The projection generated by the `RecursiveUnnestRewriter` from a UNNEST relation will have
// only one expression, which is the placeholder column generated by the rewriter.
let unnest_input_type = if p.expr.len() == 1 {
let unnest_params = if p.expr.len() == 1 {
Self::check_unnest_placeholder_with_outer_ref(&p.expr[0])
} else {
None
};
if self.dialect.unnest_as_table_factor() && unnest_input_type.is_some() {
if let LogicalPlan::Unnest(unnest) = &p.input.as_ref() {
if let Some(unnest_relation) =
self.try_unnest_to_table_factor_sql(unnest)?
{
relation.unnest(unnest_relation);
return self.select_to_sql_recursively(
p.input.as_ref(),
query,
select,
relation,
);
if self.dialect.unnest_as_table_factor() {
if let Some((_, unnest_alias)) = &unnest_params {
if let LogicalPlan::Unnest(unnest) = &p.input.as_ref() {
if let Some(unnest_relation) =
self.try_unnest_to_table_factor_sql(unnest, unnest_alias)?
{
relation.unnest(unnest_relation);
return self.select_to_sql_recursively(
p.input.as_ref(),
query,
select,
relation,
);
}
}
}
}

// Projection can be top-level plan for derived table
if select.already_projected() {
let unnest_input_type = unnest_params.map(|(t, _)| t);

return self.derive_with_dialect_alias(
"derived_projection",
plan,
Expand Down Expand Up @@ -906,15 +910,22 @@ impl Unparser<'_> {
/// - If the column is not a placeholder column, return [None].
///
/// `outer_ref` is the display result of [Expr::OuterReferenceColumn]
fn check_unnest_placeholder_with_outer_ref(expr: &Expr) -> Option<UnnestInputType> {
if let Expr::Alias(Alias { expr, .. }) = expr {
fn check_unnest_placeholder_with_outer_ref(
expr: &Expr,
) -> Option<(UnnestInputType, &str)> {
if let Expr::Alias(Alias {
expr,
name: alias_name,
..
}) = expr
{
if let Expr::Column(Column { name, .. }) = expr.as_ref() {
if let Some(prefix) = name.strip_prefix(UNNEST_PLACEHOLDER) {
if prefix.starts_with(&format!("({}(", OUTER_REFERENCE_COLUMN_PREFIX))
{
return Some(UnnestInputType::OuterReference);
return Some((UnnestInputType::OuterReference, alias_name));
}
return Some(UnnestInputType::Scalar);
return Some((UnnestInputType::Scalar, alias_name));
}
}
}
Expand All @@ -924,12 +935,15 @@ impl Unparser<'_> {
fn try_unnest_to_table_factor_sql(
&self,
unnest: &Unnest,
alias: &str,
) -> Result<Option<UnnestRelationBuilder>> {
let mut unnest_relation = UnnestRelationBuilder::default();
let LogicalPlan::Projection(projection) = unnest.input.as_ref() else {
return Ok(None);
};

unnest_relation.alias(Some(self.new_table_alias(alias.to_string(), vec![])));

if !matches!(projection.input.as_ref(), LogicalPlan::EmptyRelation(_)) {
// It may be possible that UNNEST is used as a source for the query.
// However, at this point, we don't yet know if it is just a single expression
Expand Down
10 changes: 5 additions & 5 deletions datafusion/sql/tests/cases/plan_to_sql.rs
Original file line number Diff line number Diff line change
Expand Up @@ -572,7 +572,7 @@ fn roundtrip_statement_with_dialect() -> Result<()> {
},
TestStatementWithDialect {
sql: "SELECT * FROM UNNEST([1,2,3])",
expected: r#"SELECT UNNEST(make_array(Int64(1),Int64(2),Int64(3))) FROM UNNEST([1, 2, 3])"#,
expected: r#"SELECT UNNEST(make_array(Int64(1),Int64(2),Int64(3))) FROM UNNEST([1, 2, 3]) AS UNNEST(make_array(Int64(1),Int64(2),Int64(3)))"#,
parser_dialect: Box::new(GenericDialect {}),
unparser_dialect: Box::new(CustomDialectBuilder::default().with_unnest_as_table_factor(true).build()),
},
Expand All @@ -590,7 +590,7 @@ fn roundtrip_statement_with_dialect() -> Result<()> {
},
TestStatementWithDialect {
sql: "SELECT * FROM UNNEST([1,2,3]), j1",
expected: r#"SELECT UNNEST(make_array(Int64(1),Int64(2),Int64(3))), j1.j1_id, j1.j1_string FROM UNNEST([1, 2, 3]) CROSS JOIN j1"#,
expected: r#"SELECT UNNEST(make_array(Int64(1),Int64(2),Int64(3))), j1.j1_id, j1.j1_string FROM UNNEST([1, 2, 3]) AS UNNEST(make_array(Int64(1),Int64(2),Int64(3))) CROSS JOIN j1"#,
parser_dialect: Box::new(GenericDialect {}),
unparser_dialect: Box::new(CustomDialectBuilder::default().with_unnest_as_table_factor(true).build()),
},
Expand All @@ -608,7 +608,7 @@ fn roundtrip_statement_with_dialect() -> Result<()> {
},
TestStatementWithDialect {
sql: "SELECT UNNEST([1,2,3])",
expected: r#"SELECT * FROM UNNEST([1, 2, 3])"#,
expected: r#"SELECT * FROM UNNEST([1, 2, 3]) AS UNNEST(make_array(Int64(1),Int64(2),Int64(3)))"#,
parser_dialect: Box::new(GenericDialect {}),
unparser_dialect: Box::new(CustomDialectBuilder::default().with_unnest_as_table_factor(true).build()),
},
Expand All @@ -626,7 +626,7 @@ fn roundtrip_statement_with_dialect() -> Result<()> {
},
TestStatementWithDialect {
sql: "SELECT * FROM unnest_table u, UNNEST(u.array_col)",
expected: r#"SELECT u.array_col, u.struct_col, UNNEST(outer_ref(u.array_col)) FROM unnest_table AS u CROSS JOIN UNNEST(u.array_col)"#,
expected: r#"SELECT u.array_col, u.struct_col, UNNEST(outer_ref(u.array_col)) FROM unnest_table AS u CROSS JOIN UNNEST(u.array_col) AS UNNEST(outer_ref(u.array_col))"#,
parser_dialect: Box::new(GenericDialect {}),
unparser_dialect: Box::new(CustomDialectBuilder::default().with_unnest_as_table_factor(true).build()),
},
Expand All @@ -638,7 +638,7 @@ fn roundtrip_statement_with_dialect() -> Result<()> {
},
TestStatementWithDialect {
sql: "SELECT unnest([1, 2, 3, 4]) from unnest([1, 2, 3]);",
expected: r#"SELECT UNNEST([1, 2, 3, 4]) AS UNNEST(make_array(Int64(1),Int64(2),Int64(3),Int64(4))) FROM UNNEST([1, 2, 3])"#,
expected: r#"SELECT UNNEST([1, 2, 3, 4]) AS UNNEST(make_array(Int64(1),Int64(2),Int64(3),Int64(4))) FROM UNNEST([1, 2, 3]) AS UNNEST(make_array(Int64(1),Int64(2),Int64(3)))"#,
parser_dialect: Box::new(GenericDialect {}),
unparser_dialect: Box::new(CustomDialectBuilder::default().with_unnest_as_table_factor(true).build()),
},
Expand Down
4 changes: 2 additions & 2 deletions datafusion/sqllogictest/test_files/count_star_rule.slt
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ query TT
EXPLAIN SELECT t1.a, COUNT() AS cnt FROM t1 GROUP BY t1.a HAVING COUNT() > 0;
----
logical_plan
01)Projection: t1.a, count(Int64(1)) AS count() AS cnt
01)Projection: t1.a, count(Int64(1)) AS cnt
02)--Filter: count(Int64(1)) > Int64(0)
03)----Aggregate: groupBy=[[t1.a]], aggr=[[count(Int64(1))]]
04)------TableScan: t1 projection=[a]
Expand All @@ -83,7 +83,7 @@ query TT
EXPLAIN SELECT a, COUNT() OVER (PARTITION BY a) AS count_a FROM t1;
----
logical_plan
01)Projection: t1.a, count(Int64(1)) PARTITION BY [t1.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS count() PARTITION BY [t1.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS count_a
01)Projection: t1.a, count(Int64(1)) PARTITION BY [t1.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS count_a
02)--WindowAggr: windowExpr=[[count(Int64(1)) PARTITION BY [t1.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
03)----TableScan: t1 projection=[a]
physical_plan
Expand Down
2 changes: 1 addition & 1 deletion datafusion/sqllogictest/test_files/group_by.slt
Original file line number Diff line number Diff line change
Expand Up @@ -4188,7 +4188,7 @@ EXPLAIN SELECT SUM(DISTINCT CAST(x AS DOUBLE)), MAX(DISTINCT CAST(x AS DOUBLE))
logical_plan
01)Projection: sum(alias1) AS sum(DISTINCT t1.x), max(alias1) AS max(DISTINCT t1.x)
02)--Aggregate: groupBy=[[t1.y]], aggr=[[sum(alias1), max(alias1)]]
03)----Aggregate: groupBy=[[t1.y, __common_expr_1 AS t1.x AS alias1]], aggr=[[]]
03)----Aggregate: groupBy=[[t1.y, __common_expr_1 AS alias1]], aggr=[[]]
04)------Projection: CAST(t1.x AS Float64) AS __common_expr_1, t1.y
05)--------TableScan: t1 projection=[x, y]
physical_plan
Expand Down
2 changes: 1 addition & 1 deletion datafusion/sqllogictest/test_files/insert.slt
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ logical_plan
01)Dml: op=[Insert Into] table=[table_without_values]
02)--Projection: a1 AS a1, a2 AS a2
03)----Sort: aggregate_test_100.c1 ASC NULLS LAST
04)------Projection: sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING AS a1, count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING AS count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING AS a2, aggregate_test_100.c1
04)------Projection: sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING AS a1, count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING AS a2, aggregate_test_100.c1
05)--------WindowAggr: windowExpr=[[sum(CAST(aggregate_test_100.c4 AS Int64)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING]]
06)----------TableScan: aggregate_test_100 projection=[c1, c4, c9]
physical_plan
Expand Down
4 changes: 2 additions & 2 deletions datafusion/sqllogictest/test_files/union.slt
Original file line number Diff line number Diff line change
Expand Up @@ -491,7 +491,7 @@ EXPLAIN
logical_plan
01)Limit: skip=0, fetch=3
02)--Union
03)----Projection: count(Int64(1)) AS count(*) AS cnt
03)----Projection: count(Int64(1)) AS cnt
04)------Limit: skip=0, fetch=3
05)--------Aggregate: groupBy=[[]], aggr=[[count(Int64(1))]]
06)----------SubqueryAlias: a
Expand Down Expand Up @@ -650,7 +650,7 @@ select x, y from (select 1 as x , max(10) as y) b
----
logical_plan
01)Union
02)--Projection: count(Int64(1)) AS count(*) AS count, a.n
02)--Projection: count(Int64(1)) AS count, a.n
03)----Aggregate: groupBy=[[a.n]], aggr=[[count(Int64(1))]]
04)------SubqueryAlias: a
05)--------Projection: Int64(5) AS n
Expand Down
2 changes: 1 addition & 1 deletion datafusion/sqllogictest/test_files/unnest.slt
Original file line number Diff line number Diff line change
Expand Up @@ -595,7 +595,7 @@ explain select unnest(unnest(column3)), column3 from recursive_unnest_table;
----
logical_plan
01)Unnest: lists[] structs[__unnest_placeholder(UNNEST(recursive_unnest_table.column3))]
02)--Projection: __unnest_placeholder(recursive_unnest_table.column3,depth=1) AS UNNEST(recursive_unnest_table.column3) AS __unnest_placeholder(UNNEST(recursive_unnest_table.column3)), recursive_unnest_table.column3
02)--Projection: __unnest_placeholder(recursive_unnest_table.column3,depth=1) AS __unnest_placeholder(UNNEST(recursive_unnest_table.column3)), recursive_unnest_table.column3
03)----Unnest: lists[__unnest_placeholder(recursive_unnest_table.column3)|depth=1] structs[]
04)------Projection: recursive_unnest_table.column3 AS __unnest_placeholder(recursive_unnest_table.column3), recursive_unnest_table.column3
05)--------TableScan: recursive_unnest_table projection=[column3]
Expand Down
2 changes: 1 addition & 1 deletion datafusion/sqllogictest/test_files/window.slt
Original file line number Diff line number Diff line change
Expand Up @@ -1764,7 +1764,7 @@ EXPLAIN SELECT count(*) as global_count FROM
ORDER BY c1 ) AS a
----
logical_plan
01)Projection: count(Int64(1)) AS count(*) AS global_count
01)Projection: count(Int64(1)) AS global_count
02)--Aggregate: groupBy=[[]], aggr=[[count(Int64(1))]]
03)----SubqueryAlias: a
04)------Projection:
Expand Down
Loading