Skip to content

Commit 3228f64

Browse files
committed
docs: add streamlit app for showing the backend operation matrix
1 parent 9b9b315 commit 3228f64

File tree

1 file changed

+148
-0
lines changed

1 file changed

+148
-0
lines changed

docs/backend_info_app.py

Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
import csv
2+
from enum import Enum
3+
from pathlib import Path
4+
5+
import pandas as pd
6+
import requests
7+
import streamlit as st
8+
9+
st.set_page_config(layout='wide')
10+
11+
12+
@st.cache
13+
def load_ops_data():
14+
response = requests.get(
15+
'https://ibis-project.org/docs/dev/backends/raw_support_matrix.csv'
16+
)
17+
response.raise_for_status()
18+
reader = csv.DictReader(response.text.splitlines())
19+
20+
data = [
21+
{k: v if k == "Backends" else v == "True" for k, v in row.items()}
22+
for row in reader
23+
]
24+
for row in data:
25+
row["FullOperation"] = row['Backends']
26+
del row['Backends']
27+
row['ShortOperation'] = row['FullOperation'].rsplit(".")[4]
28+
row['OpCategory'] = row['FullOperation'].rsplit(".")[3]
29+
return data
30+
31+
32+
ops_data = load_ops_data()
33+
34+
35+
class BackendCategory(Enum):
36+
string = "STRING"
37+
sqlalchemy = "SQLALCHEMY"
38+
dataframe = "DATAFRAME"
39+
sql = "SQL"
40+
41+
42+
BACKENDS = {
43+
"bigquery": [BackendCategory.string, BackendCategory.sql],
44+
"clickhouse": [BackendCategory.string, BackendCategory.sql],
45+
'dask': [BackendCategory.dataframe],
46+
"datafusion": [BackendCategory.dataframe],
47+
"duckdb": [BackendCategory.sqlalchemy, BackendCategory.sql],
48+
"impala": [BackendCategory.string, BackendCategory.sql],
49+
"mssql": [BackendCategory.sqlalchemy, BackendCategory.sql],
50+
"mysql": [BackendCategory.sqlalchemy, BackendCategory.sql],
51+
"pandas": [BackendCategory.dataframe],
52+
"polars": [BackendCategory.dataframe],
53+
"postgres": [BackendCategory.sqlalchemy, BackendCategory.sql],
54+
"pyspark": [BackendCategory.dataframe],
55+
"snowflake": [BackendCategory.sqlalchemy, BackendCategory.sql],
56+
"sqlite": [BackendCategory.sqlalchemy, BackendCategory.sql],
57+
"trino": [BackendCategory.sqlalchemy, BackendCategory.sql],
58+
}
59+
60+
61+
def get_selected_backend_name():
62+
all_categories_names = [cat.name for cat in iter(BackendCategory)]
63+
selected_categories_names = st.sidebar.multiselect(
64+
'Backend category',
65+
options=sorted(all_categories_names),
66+
default=None,
67+
)
68+
selected_categories = [
69+
getattr(BackendCategory, name) for name in selected_categories_names
70+
]
71+
72+
selected_backend_names = {
73+
backend_name
74+
for backend_name, backend_types in BACKENDS.items()
75+
if any(cat in backend_types for cat in selected_categories)
76+
}
77+
if not selected_backend_names:
78+
selected_backend_names = list(BACKENDS.keys())
79+
return selected_backend_names
80+
81+
82+
def get_selected_operation_categories():
83+
all_ops_categories = {row['OpCategory'] for row in ops_data}
84+
selected_ops_categories = st.sidebar.multiselect(
85+
'Operation category',
86+
options=sorted(all_ops_categories),
87+
default=None,
88+
)
89+
if not selected_ops_categories:
90+
return all_ops_categories
91+
return selected_ops_categories
92+
93+
94+
current_backend_names = get_selected_backend_name()
95+
current_ops_categories = get_selected_operation_categories()
96+
hide_supported_by_all_backends = st.sidebar.selectbox(
97+
'Operation compatibility',
98+
['Show all', 'Show supported by all backends', 'Hide supported by all backends'],
99+
0,
100+
)
101+
show_full_ops_name = st.sidebar.checkbox('Show full operation name', False)
102+
103+
df = pd.DataFrame(ops_data)
104+
if show_full_ops_name:
105+
df = df.set_index("FullOperation")
106+
else:
107+
df = df.set_index("ShortOperation")
108+
df = df.sort_index()
109+
110+
# Show only selected operation
111+
df = df[df['OpCategory'].isin(current_ops_categories)]
112+
113+
# Show only selected backend
114+
df = df[list(current_backend_names)]
115+
116+
if hide_supported_by_all_backends == 'Show supported by all backends':
117+
df = df[df.sum(1) == len(df.columns)]
118+
elif hide_supported_by_all_backends == 'Hide supported by all backends':
119+
df = df[df.sum(1) != len(df.columns)]
120+
121+
all_visible_ops_count = len(df.index)
122+
if all_visible_ops_count:
123+
# Compute coverage
124+
coverage = (
125+
df.sum()
126+
.sort_values(ascending=False)
127+
.map(lambda n: f"{n} ({round(100 * n / all_visible_ops_count)}%)")
128+
.to_frame(name="API Coverage")
129+
.T
130+
)
131+
132+
table = pd.concat(
133+
[
134+
coverage,
135+
df.replace(
136+
{
137+
True: "✔️",
138+
False: "✖",
139+
}
140+
),
141+
]
142+
)
143+
st.table(table)
144+
else:
145+
st.write("No data")
146+
147+
with st.expander("Source code"):
148+
st.code(Path(__file__).read_text())

0 commit comments

Comments
 (0)