|
| 1 | +import csv |
| 2 | +from enum import Enum |
| 3 | +from pathlib import Path |
| 4 | + |
| 5 | +import pandas as pd |
| 6 | +import requests |
| 7 | +import streamlit as st |
| 8 | + |
| 9 | +st.set_page_config(layout='wide') |
| 10 | + |
| 11 | + |
| 12 | +@st.cache |
| 13 | +def load_ops_data(): |
| 14 | + response = requests.get( |
| 15 | + 'https://ibis-project.org/docs/dev/backends/raw_support_matrix.csv' |
| 16 | + ) |
| 17 | + response.raise_for_status() |
| 18 | + reader = csv.DictReader(response.text.splitlines()) |
| 19 | + |
| 20 | + data = [ |
| 21 | + {k: v if k == "Backends" else v == "True" for k, v in row.items()} |
| 22 | + for row in reader |
| 23 | + ] |
| 24 | + for row in data: |
| 25 | + row["FullOperation"] = row['Backends'] |
| 26 | + del row['Backends'] |
| 27 | + row['ShortOperation'] = row['FullOperation'].rsplit(".")[4] |
| 28 | + row['OpCategory'] = row['FullOperation'].rsplit(".")[3] |
| 29 | + return data |
| 30 | + |
| 31 | + |
| 32 | +ops_data = load_ops_data() |
| 33 | + |
| 34 | + |
| 35 | +class BackendCategory(Enum): |
| 36 | + string = "STRING" |
| 37 | + sqlalchemy = "SQLALCHEMY" |
| 38 | + dataframe = "DATAFRAME" |
| 39 | + sql = "SQL" |
| 40 | + |
| 41 | + |
| 42 | +BACKENDS = { |
| 43 | + "bigquery": [BackendCategory.string, BackendCategory.sql], |
| 44 | + "clickhouse": [BackendCategory.string, BackendCategory.sql], |
| 45 | + 'dask': [BackendCategory.dataframe], |
| 46 | + "datafusion": [BackendCategory.dataframe], |
| 47 | + "duckdb": [BackendCategory.sqlalchemy, BackendCategory.sql], |
| 48 | + "impala": [BackendCategory.string, BackendCategory.sql], |
| 49 | + "mssql": [BackendCategory.sqlalchemy, BackendCategory.sql], |
| 50 | + "mysql": [BackendCategory.sqlalchemy, BackendCategory.sql], |
| 51 | + "pandas": [BackendCategory.dataframe], |
| 52 | + "polars": [BackendCategory.dataframe], |
| 53 | + "postgres": [BackendCategory.sqlalchemy, BackendCategory.sql], |
| 54 | + "pyspark": [BackendCategory.dataframe], |
| 55 | + "snowflake": [BackendCategory.sqlalchemy, BackendCategory.sql], |
| 56 | + "sqlite": [BackendCategory.sqlalchemy, BackendCategory.sql], |
| 57 | + "trino": [BackendCategory.sqlalchemy, BackendCategory.sql], |
| 58 | +} |
| 59 | + |
| 60 | + |
| 61 | +def get_selected_backend_name(): |
| 62 | + all_categories_names = [cat.name for cat in iter(BackendCategory)] |
| 63 | + selected_categories_names = st.sidebar.multiselect( |
| 64 | + 'Backend category', |
| 65 | + options=sorted(all_categories_names), |
| 66 | + default=None, |
| 67 | + ) |
| 68 | + selected_categories = [ |
| 69 | + getattr(BackendCategory, name) for name in selected_categories_names |
| 70 | + ] |
| 71 | + |
| 72 | + selected_backend_names = { |
| 73 | + backend_name |
| 74 | + for backend_name, backend_types in BACKENDS.items() |
| 75 | + if any(cat in backend_types for cat in selected_categories) |
| 76 | + } |
| 77 | + if not selected_backend_names: |
| 78 | + selected_backend_names = list(BACKENDS.keys()) |
| 79 | + return selected_backend_names |
| 80 | + |
| 81 | + |
| 82 | +def get_selected_operation_categories(): |
| 83 | + all_ops_categories = {row['OpCategory'] for row in ops_data} |
| 84 | + selected_ops_categories = st.sidebar.multiselect( |
| 85 | + 'Operation category', |
| 86 | + options=sorted(all_ops_categories), |
| 87 | + default=None, |
| 88 | + ) |
| 89 | + if not selected_ops_categories: |
| 90 | + return all_ops_categories |
| 91 | + return selected_ops_categories |
| 92 | + |
| 93 | + |
| 94 | +current_backend_names = get_selected_backend_name() |
| 95 | +current_ops_categories = get_selected_operation_categories() |
| 96 | +hide_supported_by_all_backends = st.sidebar.selectbox( |
| 97 | + 'Operation compatibility', |
| 98 | + ['Show all', 'Show supported by all backends', 'Hide supported by all backends'], |
| 99 | + 0, |
| 100 | +) |
| 101 | +show_full_ops_name = st.sidebar.checkbox('Show full operation name', False) |
| 102 | + |
| 103 | +df = pd.DataFrame(ops_data) |
| 104 | +if show_full_ops_name: |
| 105 | + df = df.set_index("FullOperation") |
| 106 | +else: |
| 107 | + df = df.set_index("ShortOperation") |
| 108 | +df = df.sort_index() |
| 109 | + |
| 110 | +# Show only selected operation |
| 111 | +df = df[df['OpCategory'].isin(current_ops_categories)] |
| 112 | + |
| 113 | +# Show only selected backend |
| 114 | +df = df[list(current_backend_names)] |
| 115 | + |
| 116 | +if hide_supported_by_all_backends == 'Show supported by all backends': |
| 117 | + df = df[df.sum(1) == len(df.columns)] |
| 118 | +elif hide_supported_by_all_backends == 'Hide supported by all backends': |
| 119 | + df = df[df.sum(1) != len(df.columns)] |
| 120 | + |
| 121 | +all_visible_ops_count = len(df.index) |
| 122 | +if all_visible_ops_count: |
| 123 | + # Compute coverage |
| 124 | + coverage = ( |
| 125 | + df.sum() |
| 126 | + .sort_values(ascending=False) |
| 127 | + .map(lambda n: f"{n} ({round(100 * n / all_visible_ops_count)}%)") |
| 128 | + .to_frame(name="API Coverage") |
| 129 | + .T |
| 130 | + ) |
| 131 | + |
| 132 | + table = pd.concat( |
| 133 | + [ |
| 134 | + coverage, |
| 135 | + df.replace( |
| 136 | + { |
| 137 | + True: "✔️", |
| 138 | + False: "✖", |
| 139 | + } |
| 140 | + ), |
| 141 | + ] |
| 142 | + ) |
| 143 | + st.table(table) |
| 144 | +else: |
| 145 | + st.write("No data") |
| 146 | + |
| 147 | +with st.expander("Source code"): |
| 148 | + st.code(Path(__file__).read_text()) |
0 commit comments