diff --git a/README.md b/README.md index c0b30408..7cce7446 100644 --- a/README.md +++ b/README.md @@ -257,6 +257,40 @@ Arguments: - then_value: Value to use if comparison succeeds, default is 1 - else_value: Value to use if comparison fails, default is 0 +#### unpivot ([source](macros/sql/unpivot.sql)) +This macro "un-pivots" a table from wide format to long format. Functionality is similar to pandas [melt](http://pandas.pydata.org/pandas-docs/stable/generated/pandas.melt.html) function. + +Usage: +``` +{{ dbt_utils.unpivot(table=ref('table_name'), cast_to='datatype', exclude=[]) }} +``` + +Example: + + Input: orders + + | date | size | color | status | + |------------|------|-------|------------| + | 2017-01-01 | S | red | complete | + | 2017-03-01 | S | red | processing | + + {{ dbt_utils.unpivot(ref('orders'), cast_to='varchar', exclude=['date','status']) }} + + Output: + + | date | status | field_name | value | + |------------|------------|------------|-------| + | 2017-01-01 | complete | size | S | + | 2017-01-01 | complete | color | red | + | 2017-03-01 | processing | size | S | + | 2017-03-01 | processing | color | red | + +Arguments: + + - table: Table name, required + - cast_to: The data type to cast the unpivoted values to, default is varchar + - exclude: A list of columns to exclude from the unpivot. + --- ### Web #### get_url_parameter ([source](macros/web/get_url_parameter.sql)) diff --git a/dbt_project.yml b/dbt_project.yml index 1ef4029f..30af29df 100644 --- a/dbt_project.yml +++ b/dbt_project.yml @@ -4,4 +4,4 @@ version: '0.1.0' target-path: "target" clean-targets: ["target", "dbt_modules"] macro-paths: ["macros"] -log-path: "logs" \ No newline at end of file +log-path: "logs" diff --git a/integration_tests/data/sql/data_unpivot.csv b/integration_tests/data/sql/data_unpivot.csv new file mode 100644 index 00000000..d9b712dc --- /dev/null +++ b/integration_tests/data/sql/data_unpivot.csv @@ -0,0 +1,4 @@ +customer_id,created_at,status,segment +123,2017-01-01,active,tier 1 +234,2017-02-01,active,tier 3 +567,2017-03-01,churned,tier 2 diff --git a/integration_tests/data/sql/data_unpivot_expected.csv b/integration_tests/data/sql/data_unpivot_expected.csv new file mode 100644 index 00000000..f972c1f1 --- /dev/null +++ b/integration_tests/data/sql/data_unpivot_expected.csv @@ -0,0 +1,7 @@ +customer_id,created_at,field_name,value +123,2017-01-01,status,active +123,2017-01-01,segment,tier 1 +234,2017-02-01,status,active +234,2017-02-01,segment,tier 3 +567,2017-03-01,status,churned +567,2017-03-01,segment,tier 2 diff --git a/integration_tests/models/sql/schema.yml b/integration_tests/models/sql/schema.yml index e7d1c401..ec89f16f 100644 --- a/integration_tests/models/sql/schema.yml +++ b/integration_tests/models/sql/schema.yml @@ -41,6 +41,11 @@ test_pivot: dbt_utils.equality: - ref('data_pivot_expected') +test_unpivot: + constraints: + dbt_utils.equality: + - ref('data_unpivot_expected') + test_star: constraints: diff --git a/integration_tests/models/sql/test_unpivot.sql b/integration_tests/models/sql/test_unpivot.sql new file mode 100644 index 00000000..7364dd09 --- /dev/null +++ b/integration_tests/models/sql/test_unpivot.sql @@ -0,0 +1 @@ +{{ dbt_utils.unpivot(table=ref('data_unpivot'), cast_to='varchar', exclude=['customer_id','created_at']) }} diff --git a/macros/sql/unpivot.sql b/macros/sql/unpivot.sql new file mode 100644 index 00000000..0ba489b8 --- /dev/null +++ b/macros/sql/unpivot.sql @@ -0,0 +1,43 @@ +{# +Pivot values from columns to rows. + +Example Usage: {{ dbt_utils.unpivot(table=ref('users'), cast_to='integer', exclude=['id','created_at']) }} + +Arguments: + table: Table name, required. + cast_to: The datatype to cast all unpivoted columns to. Default is varchar. + exclude: A list of columns to exclude from the unpivot operation. Default is none. +#} + +{% macro unpivot(table, cast_to='varchar', exclude=none) -%} + + {%- set exclude = exclude if exclude is not none else [] %} + + {%- set table_columns = {} %} + + {%- set _ = table_columns.update({table: []}) %} + + {%- if table.name -%} + {%- set schema, table_name = table.schema, table.name -%} + {%- else -%} + {%- set schema, table_name = (table | string).split(".") -%} + {%- endif -%} + + {%- set cols = adapter.get_columns_in_table(schema, table_name) %} + + {%- for col in cols -%} + + {%- if col.column not in exclude -%} + select + {%- for exclude_col in exclude %} + {{ exclude_col }}, + {%- endfor %} + cast('{{ col.column }}' as varchar) as field_name, + {{ dbt_utils.safe_cast(field=col.column, type=cast_to) }} as value + from {{ table }} + {% if not loop.last -%} + union all + {% endif -%} + {%- endif -%} + {%- endfor -%} +{%- endmacro %}