posit-dev · amol- · Apr 1, 2025 · Mar 28, 2025 · Mar 28, 2025 · Mar 28, 2025
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -0,0 +1,32 @@
+name: Run tests
+
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+    branches:
+      - '**'
+
+jobs:
+  tests:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v3
+
+      - name: Set up Python
+        uses: actions/setup-python@v4
+
+      - name: Install uv
+        run: |
+          pip install uv
+
+      - name: Install dependencies
+        run: |
+          uv sync --dev
+
+      - name: Run Test Suite
+        run: |
+          uv run pytest -v --tb=short --disable-warnings --maxfail=1 --cov=mustela
diff --git a/examples/_debug_pipeline_tree_regressor.py b/examples/_debug_pipeline_tree_regressor.py
diff --git a/examples/pipeline_boosted_tree_classifier.py b/examples/pipeline_boosted_tree_classifier.py
@@ -103,12 +103,18 @@ def categorize_price(price: float) -> str:
 print(mustela_pipeline)
 
 # Translate the pipeline to a query
-ibis_expression = mustela.translate(ibis.memtable(data_sample), mustela_pipeline)
-con = ibis.duckdb.connect()
+ibis_table = ibis.memtable(data_sample, name="DATA_TABLE")
+ibis_expression = mustela.translate(ibis_table, mustela_pipeline)
 
+con = ibis.duckdb.connect()
 if PRINT_SQL:
+    sql = mustela.export_sql("DATA_TABLE", mustela_pipeline, dialect="duckdb")
     print("\nGenerated Query for DuckDB:")
-    print(con.compile(ibis_expression))
+    print(sql)
+    print("\nPrediction with SQL")
+    # We need to create the table for the SQL to query it.
+    con.create_table(ibis_table.get_name(), obj=ibis_table)
+    print(con.raw_sql(sql).df())
 
 print("\nPrediction with SKLearn")
 target = model.predict(data_sample)
@@ -120,4 +126,4 @@ def categorize_price(price: float) -> str:
 
 if ASSERT:
     assert np.array_equal(target, ibis_target["output_label"]), "Predictions do not match!"
-    print("\tPredictions match!")
+    print("\nPredictions match!")
diff --git a/examples/pipeline_boosted_tree_regressor.py b/examples/pipeline_boosted_tree_regressor.py
@@ -91,13 +91,22 @@
 mustela_pipeline = mustela.parse_pipeline(model, features=features)
 print(mustela_pipeline)
 
-ibis_expression = mustela.translate(ibis.memtable(data_sample), mustela_pipeline)
+ibis_table = ibis.memtable(data_sample, name="DATA_TABLE")
+ibis_expression = mustela.translate(ibis_table, mustela_pipeline)
 con = ibis.duckdb.connect()
 
 if PRINT_SQL:
-    print("\nGenerated Query for DuckDB:")
+    con = ibis.duckdb.connect()
     print(con.compile(ibis_expression))
 
+    sql = mustela.export_sql("DATA_TABLE", mustela_pipeline, dialect="duckdb")
+    print("\nGenerated Query for DuckDB:")
+    print(sql)
+    print("\nPrediction with SQL")
+    # We need to create the table for the SQL to query it.
+    con.create_table(ibis_table.get_name(), obj=ibis_table)
+    print(con.raw_sql(sql).df())
+
 print("\nPrediction with SKLearn")
 target = model.predict(data_sample)
 print(target)
@@ -109,4 +118,9 @@
 #       which seems to allow DuckDB to complete the query as probably the DuckDB
 #       optimizer has less work to do in that case.
 print("\nPrediction with Ibis")
-print(con.execute(ibis_expression))
+ibis_target = con.execute(ibis_expression)["variable"].to_numpy()
+print(ibis_target)
+
+if ASSERT:
+    assert np.allclose(target, ibis_target), "Predictions do not match!"
+    print("\nPredictions match!")
diff --git a/examples/pipeline_decision_tree_classifier.py b/examples/pipeline_decision_tree_classifier.py
@@ -99,17 +99,28 @@ def categorize_area(a: float) -> str:
     }
 )
 
-ibis_expression = mustela.translate(ibis.memtable(example_data), mustela_pipeline)
+con = ibis.duckdb.connect()
+ibis_table = ibis.memtable(example_data, name="DATA_TABLE")
+ibis_expression = mustela.translate(ibis_table, mustela_pipeline)
 
 if PRINT_SQL:
-    print("\nGenerated Query:")
-    con = ibis.duckdb.connect()
-    print(con.compile(ibis_expression))
+    sql = mustela.export_sql("DATA_TABLE", mustela_pipeline, dialect="duckdb")
+    print("\nGenerated Query for DuckDB:")
+    print(sql)
+    print("\nPrediction with SQL")
+    # We need to create the table for the SQL to query it.
+    con.create_table(ibis_table.get_name(), obj=ibis_table)
+    print(con.raw_sql(sql).df())
 
 print("\nPrediction with Ibis")
-print(ibis_expression.execute())
+ibis_target = ibis_expression.execute()
+print(ibis_target)
 
 print("\nPrediction with SKLearn")
 test_df = example_data.to_pandas()
-pred = pipeline.predict(test_df)
-print(pred)
+target = pipeline.predict(test_df)
+print(target)
+
+if ASSERT:
+    assert np.array_equal(target, ibis_target["output_label"]), "Predictions do not match!"
+    print("\nPredictions match!")
diff --git a/examples/pipeline_decision_tree_regressor.py b/examples/pipeline_decision_tree_regressor.py
@@ -82,17 +82,29 @@
 )
 
 # Genera la query SQL con Mustela
-ibis_expression = mustela.translate(ibis.memtable(example_data), mustela_pipeline)
+ibis_table = ibis.memtable(example_data, name="DATA_TABLE")
+ibis_expression = mustela.translate(ibis_table, mustela_pipeline)
 
+con = ibis.duckdb.connect()
 if PRINT_SQL:
-    print("\nGenerated Query:")
-    con = ibis.duckdb.connect()
-    print(con.compile(ibis_expression))
+    sql = mustela.export_sql("DATA_TABLE", mustela_pipeline, dialect="duckdb")
+    print("\nGenerated Query for DuckDB:")
+    print(sql)
+    print("\nPrediction with SQL")
+    # We need to create the table for the SQL to query it.
+    con.create_table(ibis_table.get_name(), obj=ibis_table)
+    print(con.raw_sql(sql).df())
 
-print("\nPrediction with Ibis")
-print(ibis_expression.execute())
 
 print("\nPrediction with SKLearn")
 test_df = example_data.to_pandas()
-pred = pipeline.predict(test_df)
-print(pred)
+target = pipeline.predict(test_df)
+print(target)
+
+print("\nPrediction with Ibis")
+ibis_target = con.execute(ibis_expression)["variable"].to_numpy()
+print(ibis_target)
+
+if ASSERT:
+    assert np.allclose(target, ibis_target), "Predictions do not match!"
+    print("\nPredictions match!")
diff --git a/examples/pipeline_elasticnet.py b/examples/pipeline_elasticnet.py
@@ -1,6 +1,7 @@
 import os
 import logging
 import ibis
+import numpy as np
 import pyarrow as pa
 from sklearn.compose import ColumnTransformer
 from sklearn.datasets import load_iris
@@ -61,10 +62,10 @@
 )
 
 # Generate a query expression using Mustela
-ibis_expression = mustela.translate(ibis.memtable(example_data), mustela_pipeline)
+ibis_table = ibis.memtable(example_data, name="DATA_TABLE")
+ibis_expression = mustela.translate(ibis_table, mustela_pipeline)
 
 con = ibis.duckdb.connect()
-
 if PRINT_SQL:
     sql = mustela.export_sql("DATA_TABLE", mustela_pipeline, dialect="duckdb")
     print("\nGenerated Query for DuckDB:")
@@ -75,8 +76,13 @@
     print(con.raw_sql(sql).df())
 
 print("\nPrediction with Ibis")
-print(ibis_expression.execute())
+ibis_target = ibis_expression.execute()["variable"].to_numpy()
+print(ibis_target)
 
 print("\nPrediction with SKLearn")
-predictions = pipeline.predict(example_data.to_pandas())
-print(predictions)
+target = pipeline.predict(example_data.to_pandas())
+print(target)
+
+if ASSERT:
+    assert np.allclose(target, ibis_target), "Predictions do not match!"
+    print("\nPredictions match!")