Skip to content

Commit 2330b0c

Browse files
cpcloudjcrist
authored andcommitted
feat(datafusion): pivot_longer
1 parent a706f54 commit 2330b0c

File tree

5 files changed

+100
-5
lines changed

5 files changed

+100
-5
lines changed

ibis/backends/sql/compilers/datafusion.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -478,3 +478,10 @@ def visit_Aggregate(self, op, *, parent, groups, metrics):
478478
sel = sel.group_by(*by_names_quoted)
479479

480480
return sel
481+
482+
def visit_StructColumn(self, op, *, names, values):
483+
args = []
484+
for name, value in zip(names, values):
485+
args.append(sge.convert(name))
486+
args.append(value)
487+
return self.f.named_struct(*args)
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
WITH "t5" AS (
2+
SELECT
3+
"t4"."field_of_study",
4+
FIRST_VALUE("t4"."diff") FILTER(WHERE
5+
NOT "t4"."diff" IS NULL) AS "diff"
6+
FROM (
7+
SELECT
8+
"t4"."years",
9+
"t4"."degrees",
10+
"t4"."earliest_degrees",
11+
"t4"."latest_degrees",
12+
"t4"."diff",
13+
"t4"."field_of_study"
14+
FROM (
15+
SELECT
16+
"t3"."field_of_study",
17+
"t3"."years",
18+
"t3"."degrees",
19+
"t3"."earliest_degrees",
20+
"t3"."latest_degrees",
21+
"t3"."latest_degrees" - "t3"."earliest_degrees" AS "diff"
22+
FROM (
23+
SELECT
24+
"t2"."field_of_study",
25+
"t2"."years",
26+
"t2"."degrees",
27+
FIRST_VALUE("t2"."degrees") OVER (PARTITION BY "t2"."field_of_study" ORDER BY "t2"."years" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS "earliest_degrees",
28+
LAST_VALUE("t2"."degrees") OVER (PARTITION BY "t2"."field_of_study" ORDER BY "t2"."years" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS "latest_degrees"
29+
FROM (
30+
SELECT
31+
"t1"."field_of_study",
32+
"t1"."__pivoted__"."years" AS "years",
33+
"t1"."__pivoted__"."degrees" AS "degrees"
34+
FROM (
35+
SELECT
36+
"t0"."field_of_study",
37+
UNNEST(
38+
MAKE_ARRAY(
39+
NAMED_STRUCT('years', '1970-71', 'degrees', "t0"."1970-71"),
40+
NAMED_STRUCT('years', '1975-76', 'degrees', "t0"."1975-76"),
41+
NAMED_STRUCT('years', '1980-81', 'degrees', "t0"."1980-81"),
42+
NAMED_STRUCT('years', '1985-86', 'degrees', "t0"."1985-86"),
43+
NAMED_STRUCT('years', '1990-91', 'degrees', "t0"."1990-91"),
44+
NAMED_STRUCT('years', '1995-96', 'degrees', "t0"."1995-96"),
45+
NAMED_STRUCT('years', '2000-01', 'degrees', "t0"."2000-01"),
46+
NAMED_STRUCT('years', '2005-06', 'degrees', "t0"."2005-06"),
47+
NAMED_STRUCT('years', '2010-11', 'degrees', "t0"."2010-11"),
48+
NAMED_STRUCT('years', '2011-12', 'degrees', "t0"."2011-12"),
49+
NAMED_STRUCT('years', '2012-13', 'degrees', "t0"."2012-13"),
50+
NAMED_STRUCT('years', '2013-14', 'degrees', "t0"."2013-14"),
51+
NAMED_STRUCT('years', '2014-15', 'degrees', "t0"."2014-15"),
52+
NAMED_STRUCT('years', '2015-16', 'degrees', "t0"."2015-16"),
53+
NAMED_STRUCT('years', '2016-17', 'degrees', "t0"."2016-17"),
54+
NAMED_STRUCT('years', '2017-18', 'degrees', "t0"."2017-18"),
55+
NAMED_STRUCT('years', '2018-19', 'degrees', "t0"."2018-19"),
56+
NAMED_STRUCT('years', '2019-20', 'degrees', "t0"."2019-20")
57+
)
58+
) AS "__pivoted__"
59+
FROM "humanities" AS "t0"
60+
) AS "t1"
61+
) AS "t2"
62+
) AS "t3"
63+
) AS "t4"
64+
) AS t4
65+
GROUP BY
66+
"t4"."field_of_study"
67+
)
68+
SELECT
69+
*
70+
FROM (
71+
SELECT
72+
*
73+
FROM "t5" AS "t6"
74+
ORDER BY
75+
"t6"."diff" DESC NULLS LAST
76+
LIMIT 10
77+
) AS "t9"
78+
UNION ALL
79+
SELECT
80+
*
81+
FROM (
82+
SELECT
83+
*
84+
FROM "t5" AS "t6"
85+
WHERE
86+
"t6"."diff" < 0
87+
ORDER BY
88+
"t6"."diff" ASC
89+
LIMIT 10
90+
) AS "t10"

ibis/backends/tests/test_array.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -901,9 +901,7 @@ def test_zip_null(con, fn):
901901
@builtin_array
902902
@pytest.mark.notimpl(["postgres"], raises=PsycoPg2SyntaxError)
903903
@pytest.mark.notimpl(["risingwave"], raises=PsycoPg2ProgrammingError)
904-
@pytest.mark.notimpl(
905-
["datafusion"], raises=Exception, reason="probably generating invalid SQL"
906-
)
904+
@pytest.mark.notimpl(["datafusion"], raises=Exception, reason="not yet supported")
907905
@pytest.mark.notimpl(
908906
["polars"],
909907
raises=com.OperationNotDefinedError,

ibis/backends/tests/test_generic.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1403,7 +1403,7 @@ def query(t, group_cols):
14031403
@pytest.mark.notimpl(["oracle", "exasol"], raises=com.OperationNotDefinedError)
14041404
@pytest.mark.notimpl(["druid"], raises=AssertionError)
14051405
@pytest.mark.notyet(
1406-
["datafusion", "impala", "mssql", "mysql", "sqlite"],
1406+
["impala", "mssql", "mysql", "sqlite"],
14071407
reason="backend doesn't support arrays and we don't implement pivot_longer with unions yet",
14081408
raises=com.OperationNotDefinedError,
14091409
)

ibis/backends/tests/test_sql.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ def test_isin_bug(con, snapshot):
102102
raises=NotImplementedError,
103103
)
104104
@pytest.mark.notyet(
105-
["datafusion", "exasol", "oracle", "flink"],
105+
["exasol", "oracle", "flink"],
106106
reason="no unnest support",
107107
raises=exc.OperationNotDefinedError,
108108
)

0 commit comments

Comments
 (0)