Skip to content

Introduce E2E tests #33

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 18 commits into from
Apr 1, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
name: Run tests

on:
push:
branches:
- main
pull_request:
branches:
- '**'

jobs:
tests:
runs-on: ubuntu-latest

steps:
- name: Checkout repository
uses: actions/checkout@v3

- name: Set up Python
uses: actions/setup-python@v4

- name: Install uv
run: |
pip install uv

- name: Install dependencies
run: |
uv sync --dev

- name: Run Test Suite
run: |
uv run pytest -v --tb=short --disable-warnings --maxfail=1 --cov=mustela
167 changes: 0 additions & 167 deletions examples/_debug_pipeline_tree_regressor.py

This file was deleted.

14 changes: 10 additions & 4 deletions examples/pipeline_boosted_tree_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,12 +103,18 @@ def categorize_price(price: float) -> str:
print(mustela_pipeline)

# Translate the pipeline to a query
ibis_expression = mustela.translate(ibis.memtable(data_sample), mustela_pipeline)
con = ibis.duckdb.connect()
ibis_table = ibis.memtable(data_sample, name="DATA_TABLE")
ibis_expression = mustela.translate(ibis_table, mustela_pipeline)

con = ibis.duckdb.connect()
if PRINT_SQL:
sql = mustela.export_sql("DATA_TABLE", mustela_pipeline, dialect="duckdb")
print("\nGenerated Query for DuckDB:")
print(con.compile(ibis_expression))
print(sql)
print("\nPrediction with SQL")
# We need to create the table for the SQL to query it.
con.create_table(ibis_table.get_name(), obj=ibis_table)
print(con.raw_sql(sql).df())

print("\nPrediction with SKLearn")
target = model.predict(data_sample)
Expand All @@ -120,4 +126,4 @@ def categorize_price(price: float) -> str:

if ASSERT:
assert np.array_equal(target, ibis_target["output_label"]), "Predictions do not match!"
print("\tPredictions match!")
print("\nPredictions match!")
20 changes: 17 additions & 3 deletions examples/pipeline_boosted_tree_regressor.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,13 +91,22 @@
mustela_pipeline = mustela.parse_pipeline(model, features=features)
print(mustela_pipeline)

ibis_expression = mustela.translate(ibis.memtable(data_sample), mustela_pipeline)
ibis_table = ibis.memtable(data_sample, name="DATA_TABLE")
ibis_expression = mustela.translate(ibis_table, mustela_pipeline)
con = ibis.duckdb.connect()

if PRINT_SQL:
print("\nGenerated Query for DuckDB:")
con = ibis.duckdb.connect()
print(con.compile(ibis_expression))

sql = mustela.export_sql("DATA_TABLE", mustela_pipeline, dialect="duckdb")
print("\nGenerated Query for DuckDB:")
print(sql)
print("\nPrediction with SQL")
# We need to create the table for the SQL to query it.
con.create_table(ibis_table.get_name(), obj=ibis_table)
print(con.raw_sql(sql).df())

print("\nPrediction with SKLearn")
target = model.predict(data_sample)
print(target)
Expand All @@ -109,4 +118,9 @@
# which seems to allow DuckDB to complete the query as probably the DuckDB
# optimizer has less work to do in that case.
print("\nPrediction with Ibis")
print(con.execute(ibis_expression))
ibis_target = con.execute(ibis_expression)["variable"].to_numpy()
print(ibis_target)

if ASSERT:
assert np.allclose(target, ibis_target), "Predictions do not match!"
print("\nPredictions match!")
25 changes: 18 additions & 7 deletions examples/pipeline_decision_tree_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,17 +99,28 @@ def categorize_area(a: float) -> str:
}
)

ibis_expression = mustela.translate(ibis.memtable(example_data), mustela_pipeline)
con = ibis.duckdb.connect()
ibis_table = ibis.memtable(example_data, name="DATA_TABLE")
ibis_expression = mustela.translate(ibis_table, mustela_pipeline)

if PRINT_SQL:
print("\nGenerated Query:")
con = ibis.duckdb.connect()
print(con.compile(ibis_expression))
sql = mustela.export_sql("DATA_TABLE", mustela_pipeline, dialect="duckdb")
print("\nGenerated Query for DuckDB:")
print(sql)
print("\nPrediction with SQL")
# We need to create the table for the SQL to query it.
con.create_table(ibis_table.get_name(), obj=ibis_table)
print(con.raw_sql(sql).df())

print("\nPrediction with Ibis")
print(ibis_expression.execute())
ibis_target = ibis_expression.execute()
print(ibis_target)

print("\nPrediction with SKLearn")
test_df = example_data.to_pandas()
pred = pipeline.predict(test_df)
print(pred)
target = pipeline.predict(test_df)
print(target)

if ASSERT:
assert np.array_equal(target, ibis_target["output_label"]), "Predictions do not match!"
print("\nPredictions match!")
28 changes: 20 additions & 8 deletions examples/pipeline_decision_tree_regressor.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,17 +82,29 @@
)

# Genera la query SQL con Mustela
ibis_expression = mustela.translate(ibis.memtable(example_data), mustela_pipeline)
ibis_table = ibis.memtable(example_data, name="DATA_TABLE")
ibis_expression = mustela.translate(ibis_table, mustela_pipeline)

con = ibis.duckdb.connect()
if PRINT_SQL:
print("\nGenerated Query:")
con = ibis.duckdb.connect()
print(con.compile(ibis_expression))
sql = mustela.export_sql("DATA_TABLE", mustela_pipeline, dialect="duckdb")
print("\nGenerated Query for DuckDB:")
print(sql)
print("\nPrediction with SQL")
# We need to create the table for the SQL to query it.
con.create_table(ibis_table.get_name(), obj=ibis_table)
print(con.raw_sql(sql).df())

print("\nPrediction with Ibis")
print(ibis_expression.execute())

print("\nPrediction with SKLearn")
test_df = example_data.to_pandas()
pred = pipeline.predict(test_df)
print(pred)
target = pipeline.predict(test_df)
print(target)

print("\nPrediction with Ibis")
ibis_target = con.execute(ibis_expression)["variable"].to_numpy()
print(ibis_target)

if ASSERT:
assert np.allclose(target, ibis_target), "Predictions do not match!"
print("\nPredictions match!")
16 changes: 11 additions & 5 deletions examples/pipeline_elasticnet.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import os
import logging
import ibis
import numpy as np
import pyarrow as pa
from sklearn.compose import ColumnTransformer
from sklearn.datasets import load_iris
Expand Down Expand Up @@ -61,10 +62,10 @@
)

# Generate a query expression using Mustela
ibis_expression = mustela.translate(ibis.memtable(example_data), mustela_pipeline)
ibis_table = ibis.memtable(example_data, name="DATA_TABLE")
ibis_expression = mustela.translate(ibis_table, mustela_pipeline)

con = ibis.duckdb.connect()

if PRINT_SQL:
sql = mustela.export_sql("DATA_TABLE", mustela_pipeline, dialect="duckdb")
print("\nGenerated Query for DuckDB:")
Expand All @@ -75,8 +76,13 @@
print(con.raw_sql(sql).df())

print("\nPrediction with Ibis")
print(ibis_expression.execute())
ibis_target = ibis_expression.execute()["variable"].to_numpy()
print(ibis_target)

print("\nPrediction with SKLearn")
predictions = pipeline.predict(example_data.to_pandas())
print(predictions)
target = pipeline.predict(example_data.to_pandas())
print(target)

if ASSERT:
assert np.allclose(target, ibis_target), "Predictions do not match!"
print("\nPredictions match!")
Loading
Loading