apache
diff --git a/‎docs/reference/decorators/with_columns.rst
Lines changed: 10 additions & 0 deletions b/‎docs/reference/decorators/with_columns.rst
Lines changed: 10 additions & 0 deletions
diff --git a/‎examples/polars/with_columns/DAG_DataFrame.png
116 KB b/‎examples/polars/with_columns/DAG_DataFrame.png
116 KB
diff --git a/‎examples/polars/with_columns/DAG_lazy.png
116 KB b/‎examples/polars/with_columns/DAG_lazy.png
116 KB
diff --git a/‎examples/polars/with_columns/README
Lines changed: 7 additions & 0 deletions b/‎examples/polars/with_columns/README
Lines changed: 7 additions & 0 deletions
diff --git a/‎examples/polars/with_columns/my_functions.py
Lines changed: 51 additions & 0 deletions b/‎examples/polars/with_columns/my_functions.py
Lines changed: 51 additions & 0 deletions
diff --git a/‎examples/polars/with_columns/my_functions_lazy.py
Lines changed: 47 additions & 0 deletions b/‎examples/polars/with_columns/my_functions_lazy.py
Lines changed: 47 additions & 0 deletions
diff --git a/‎examples/polars/with_columns/notebook.ipynb
Lines changed: 1219 additions & 0 deletions b/‎examples/polars/with_columns/notebook.ipynb
Lines changed: 1219 additions & 0 deletions
diff --git a/‎hamilton/plugins/h_pandas.py
Lines changed: 3 additions & 0 deletions b/‎hamilton/plugins/h_pandas.py
Lines changed: 3 additions & 0 deletions
@@ -13,6 +13,16 @@ We have a ``with_columns`` option to run operations on columns of a Pandas dataf
    :special-members: __init__
 
 
+Polars
+--------------
+
+We have a ``with_columns`` decorator to run operations on columns of a Polars dataframe or lazyframe and append the results as new columns.
+
+**Reference Documentation**
+
+.. autoclass:: hamilton.plugins.h_polars.with_columns
+   :special-members: __init__
+
 PySpark
 --------------
 
 
@@ -0,0 +1,7 @@
+# Using with_columns with Pandas
+
+We show the ability to use the familiar `with_columns` from `polars`. Supported for both `pl.DataFrame` and `pl.LazyFrame`.
+
+To see the example look at the notebook.
+
+![image info](./dag.png)
@@ -0,0 +1,51 @@
+import polars as pl
+
+from hamilton.function_modifiers import config
+
+"""
+Notes:
+  1. This file is used for all the [ray|dask|spark]/hello_world examples.
+  2. It therefore show cases how you can write something once and not only scale it, but port it
+     to different frameworks with ease!
+"""
+
+
+@config.when(case="millions")
+def avg_3wk_spend__millions(spend: pl.Series) -> pl.Series:
+    """Rolling 3 week average spend."""
+    return (
+        spend.to_frame("spend").select(pl.col("spend").rolling_mean(window_size=3) / 1e6)
+    ).to_series(0)
+
+
+@config.when(case="thousands")
+def avg_3wk_spend__thousands(spend: pl.Series) -> pl.Series:
+    """Rolling 3 week average spend."""
+    return (
+        spend.to_frame("spend").select(pl.col("spend").rolling_mean(window_size=3) / 1e3)
+    ).to_series(0)
+
+
+def spend_per_signup(spend: pl.Series, signups: pl.Series) -> pl.Series:
+    """The cost per signup in relation to spend."""
+    return spend / signups
+
+
+def spend_mean(spend: pl.Series) -> float:
+    """Shows function creating a scalar. In this case it computes the mean of the entire column."""
+    return spend.mean()
+
+
+def spend_zero_mean(spend: pl.Series, spend_mean: float) -> pl.Series:
+    """Shows function that takes a scalar. In this case to zero mean spend."""
+    return spend - spend_mean
+
+
+def spend_std_dev(spend: pl.Series) -> float:
+    """Function that computes the standard deviation of the spend column."""
+    return spend.std()
+
+
+def spend_zero_mean_unit_variance(spend_zero_mean: pl.Series, spend_std_dev: float) -> pl.Series:
+    """Function showing one way to make spend have zero mean and unit variance."""
+    return spend_zero_mean / spend_std_dev
@@ -0,0 +1,47 @@
+import polars as pl
+
+from hamilton.function_modifiers import config
+
+"""
+Notes:
+  1. This file is used for all the [ray|dask|spark]/hello_world examples.
+  2. It therefore show cases how you can write something once and not only scale it, but port it
+     to different frameworks with ease!
+"""
+
+
+@config.when(case="millions")
+def avg_3wk_spend__millions(spend: pl.Expr) -> pl.Expr:
+    """Rolling 3 week average spend."""
+    return spend.rolling_mean(window_size=3) / 1e6
+
+
+@config.when(case="thousands")
+def avg_3wk_spend__thousands(spend: pl.Expr) -> pl.Expr:
+    """Rolling 3 week average spend."""
+    return spend.rolling_mean(window_size=3) / 1e3
+
+
+def spend_per_signup(spend: pl.Expr, signups: pl.Expr) -> pl.Expr:
+    """The cost per signup in relation to spend."""
+    return spend / signups
+
+
+def spend_mean(spend: pl.Expr) -> float:
+    """Shows function creating a scalar. In this case it computes the mean of the entire column."""
+    return spend.mean()
+
+
+def spend_zero_mean(spend: pl.Expr, spend_mean: float) -> pl.Expr:
+    """Shows function that takes a scalar. In this case to zero mean spend."""
+    return spend - spend_mean
+
+
+def spend_std_dev(spend: pl.Expr) -> float:
+    """Function that computes the standard deviation of the spend column."""
+    return spend.std()
+
+
+def spend_zero_mean_unit_variance(spend_zero_mean: pl.Expr, spend_std_dev: float) -> pl.Expr:
+    """Function showing one way to make spend have zero mean and unit variance."""
+    return spend_zero_mean / spend_std_dev
@@ -144,6 +144,8 @@ def __init__(
         :param pass_dataframe_as: The name of the dataframe that we're modifying, as known to the subdag.
             If you pass this in, you are responsible for extracting columns out. If not provided, you have
             to pass columns_to_pass in, and we will extract the columns out for you.
+        :param select: The end nodes that represent columns to be appended to the original dataframe
+            via with_columns. Existing columns will be overridden.
         :param namespace: The namespace of the nodes, so they don't clash with the global namespace
             and so this can be reused. If its left out, there will be no namespace (in which case you'll want
             to be careful about repeating it/reusing the nodes in other parts of the DAG.)
@@ -153,6 +155,7 @@ def __init__(
 
         self.subdag_functions = subdag.collect_functions(load_from)
 
+        # TODO: select none should append all nodes like h_spark
         if select is None:
             raise ValueError("Please specify at least one column to append or update.")
         else: