Skip to content

Commit 5be3385

Browse files
authored
feat(ibis): introduce the relationship validation (#815)
* implement relationship validation * fix tests for other source * fix the method argument * fix test * fix test * address comment
1 parent f792e27 commit 5be3385

File tree

14 files changed

+314
-25
lines changed

14 files changed

+314
-25
lines changed

ibis-server/app/model/validator.py

+117-4
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,31 @@
11
from __future__ import annotations
22

3+
import base64
4+
import json
5+
36
from app.mdl.rewriter import Rewriter
47
from app.model import UnprocessableEntityError
58
from app.model.connector import Connector
69

7-
rules = ["column_is_valid"]
10+
rules = ["column_is_valid", "relationship_is_valid"]
811

912

1013
class Validator:
1114
def __init__(self, connector: Connector, rewriter: Rewriter):
1215
self.connector = connector
1316
self.rewriter = rewriter
1417

15-
def validate(self, rule: str, parameters: dict[str, str]):
18+
def validate(self, rule: str, parameters: dict[str, str], manifest_str: str):
1619
if rule not in rules:
1720
raise RuleNotFoundError(rule)
1821
try:
19-
getattr(self, f"_validate_{rule}")(parameters)
22+
getattr(self, f"_validate_{rule}")(parameters, manifest_str)
2023
except ValidationError as e:
2124
raise e
2225
except Exception as e:
2326
raise ValidationError(f"Unknown exception: {type(e)}, message: {e!s}")
2427

25-
def _validate_column_is_valid(self, parameters: dict[str, str]):
28+
def _validate_column_is_valid(self, parameters: dict[str, str], manifest_str: str):
2629
model_name = parameters.get("modelName")
2730
column_name = parameters.get("columnName")
2831
if model_name is None:
@@ -37,6 +40,116 @@ def _validate_column_is_valid(self, parameters: dict[str, str]):
3740
except Exception as e:
3841
raise ValidationError(f"Exception: {type(e)}, message: {e!s}")
3942

43+
def _validate_relationship_is_valid(
44+
self, parameters: dict[str, str], manifest_str: str
45+
):
46+
relationship_name = parameters.get("relationshipName")
47+
if relationship_name is None:
48+
raise MissingRequiredParameterError("relationship")
49+
decoded_manifest = base64.b64decode(manifest_str).decode("utf-8")
50+
manifest = json.loads(decoded_manifest)
51+
52+
relationship = list(
53+
filter(lambda r: r["name"] == relationship_name, manifest["relationships"])
54+
)
55+
56+
if len(relationship) == 0:
57+
raise ValidationError(
58+
f"Relationship {relationship_name} not found in manifest"
59+
)
60+
61+
left_model = self._get_model(manifest, relationship[0]["models"][0])
62+
right_model = self._get_model(manifest, relationship[0]["models"][1])
63+
relationship_type = relationship[0]["joinType"].lower()
64+
condition = relationship[0]["condition"]
65+
columns = condition.split("=")
66+
left_column = columns[0].strip().split(".")[1]
67+
right_column = columns[1].strip().split(".")[1]
68+
69+
def generate_column_is_unique_sql(model_name, column_name):
70+
return f'SELECT count(*) = count(distinct {column_name}) AS result FROM "{model_name}"'
71+
72+
def generate_is_exist_join_sql(
73+
left_model, right_model, left_column, right_column
74+
):
75+
return f'SELECT count(*) > 0 AS result FROM "{left_model}" JOIN "{right_model}" ON "{left_model}"."{left_column}" = "{right_model}"."{right_column}"'
76+
77+
def generate_sql_from_type(
78+
relationship_type, left_model, right_model, left_column, right_column
79+
):
80+
if relationship_type == "one_to_one":
81+
return f"""WITH
82+
lefttable AS ({generate_column_is_unique_sql(left_model, left_column)}),
83+
righttable AS ({generate_column_is_unique_sql(right_model, right_column)}),
84+
joinexist AS ({generate_is_exist_join_sql(left_model, right_model, left_column, right_column)})
85+
SELECT lefttable.result AND righttable.result AND joinexist.result result,
86+
lefttable.result left_table_unique,
87+
righttable.result right_table_unique,
88+
joinexist.result is_related
89+
FROM lefttable, righttable, joinexist"""
90+
elif relationship_type == "many_to_one":
91+
return f"""WITH
92+
righttable AS ({generate_column_is_unique_sql(right_model, right_column)}),
93+
joinexist AS ({generate_is_exist_join_sql(left_model, right_model, left_column, right_column)})
94+
SELECT righttable.result AND joinexist.result result,
95+
righttable.result right_table_unique,
96+
joinexist.result is_related
97+
FROM righttable, joinexist"""
98+
elif relationship_type == "one_to_many":
99+
return f"""WITH
100+
lefttable AS ({generate_column_is_unique_sql(left_model, left_column)}),
101+
joinexist AS ({generate_is_exist_join_sql(left_model, right_model, left_column, right_column)})
102+
SELECT lefttable.result AND joinexist.result result,
103+
lefttable.result left_table_unique,
104+
joinexist.result is_related
105+
FROM lefttable, joinexist"""
106+
elif relationship_type == "many_to_many":
107+
return f"""WITH
108+
joinexist AS ({generate_is_exist_join_sql(left_model, right_model, left_column, right_column)})
109+
SELECT joinexist.result result,
110+
joinexist.result is_related
111+
FROM joinexist"""
112+
else:
113+
raise ValidationError(f"Unknown relationship type: {relationship_type}")
114+
115+
def format_result(result):
116+
output = {}
117+
output["result"] = str(result.get("result").get(0))
118+
output["is_related"] = str(result.get("is_related").get(0))
119+
if result.get("left_table_unique") is not None:
120+
output["left_table_unique"] = str(
121+
result.get("left_table_unique").get(0)
122+
)
123+
if result.get("right_table_unique") is not None:
124+
output["right_table_unique"] = str(
125+
result.get("right_table_unique").get(0)
126+
)
127+
return output
128+
129+
sql = generate_sql_from_type(
130+
relationship_type,
131+
left_model["name"],
132+
right_model["name"],
133+
left_column,
134+
right_column,
135+
)
136+
try:
137+
rewritten_sql = self.rewriter.rewrite(sql)
138+
result = self.connector.query(rewritten_sql, limit=1)
139+
if not result.get("result").get(0):
140+
raise ValidationError(
141+
f"Relationship {relationship_name} is not valid: {format_result(result)}"
142+
)
143+
144+
except Exception as e:
145+
raise ValidationError(f"Exception: {type(e)}, message: {e!s}")
146+
147+
def _get_model(self, manifest, model_name):
148+
models = list(filter(lambda m: m["name"] == model_name, manifest["models"]))
149+
if len(models) == 0:
150+
raise ValidationError(f"Model {model_name} not found in manifest")
151+
return models[0]
152+
40153

41154
class ValidationError(UnprocessableEntityError):
42155
pass

ibis-server/app/routers/v2/connector.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ def validate(data_source: DataSource, rule_name: str, dto: ValidateDTO) -> Respo
4141
Connector(data_source, dto.connection_info, dto.manifest_str),
4242
Rewriter(dto.manifest_str, data_source=data_source),
4343
)
44-
validator.validate(rule_name, dto.parameters)
44+
validator.validate(rule_name, dto.parameters, dto.manifest_str)
4545
return Response(status_code=204)
4646

4747

ibis-server/app/routers/v3/connector.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -53,5 +53,5 @@ def validate(data_source: DataSource, rule_name: str, dto: ValidateDTO) -> Respo
5353
Connector(data_source, dto.connection_info, dto.manifest_str),
5454
Rewriter(dto.manifest_str, data_source=data_source, experiment=True),
5555
)
56-
validator.validate(rule_name, dto.parameters)
56+
validator.validate(rule_name, dto.parameters, dto.manifest_str)
5757
return Response(status_code=204)

ibis-server/tests/routers/v2/connector/test_bigquery.py

+17-2
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from fastapi.testclient import TestClient
77

88
from app.main import app
9+
from app.model.validator import rules
910

1011
pytestmark = pytest.mark.bigquery
1112

@@ -185,6 +186,20 @@ def test_query_with_dry_run_and_invalid_sql():
185186
assert response.text is not None
186187

187188

189+
def test_query_values():
190+
response = client.post(
191+
url=f"{base_url}/query",
192+
params={"dryRun": True},
193+
json={
194+
"connectionInfo": connection_info,
195+
"manifestStr": manifest_str,
196+
"sql": "SELECT * FROM (VALUES (1, 2), (3, 4))",
197+
},
198+
)
199+
200+
assert response.status_code == 204
201+
202+
188203
def test_validate_with_unknown_rule():
189204
response = client.post(
190205
url=f"{base_url}/validate/unknown_rule",
@@ -194,10 +209,10 @@ def test_validate_with_unknown_rule():
194209
"parameters": {"modelName": "Orders", "columnName": "orderkey"},
195210
},
196211
)
212+
197213
assert response.status_code == 422
198214
assert (
199-
response.text
200-
== "The rule `unknown_rule` is not in the rules, rules: ['column_is_valid']"
215+
response.text == f"The rule `unknown_rule` is not in the rules, rules: {rules}"
201216
)
202217

203218

ibis-server/tests/routers/v2/connector/test_clickhouse.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from testcontainers.clickhouse import ClickHouseContainer
99

1010
from app.main import app
11+
from app.model.validator import rules
1112
from tests.confest import file_path
1213

1314
pytestmark = pytest.mark.clickhouse
@@ -410,8 +411,7 @@ def test_validate_with_unknown_rule(clickhouse: ClickHouseContainer):
410411
)
411412
assert response.status_code == 422
412413
assert (
413-
response.text
414-
== "The rule `unknown_rule` is not in the rules, rules: ['column_is_valid']"
414+
response.text == f"The rule `unknown_rule` is not in the rules, rules: {rules}"
415415
)
416416

417417

ibis-server/tests/routers/v2/connector/test_mssql.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from testcontainers.mssql import SqlServerContainer
1010

1111
from app.main import app
12+
from app.model.validator import rules
1213
from tests.confest import file_path
1314

1415
pytestmark = pytest.mark.mssql
@@ -251,8 +252,7 @@ def test_validate_with_unknown_rule(mssql: SqlServerContainer):
251252
)
252253
assert response.status_code == 422
253254
assert (
254-
response.text
255-
== "The rule `unknown_rule` is not in the rules, rules: ['column_is_valid']"
255+
response.text == f"The rule `unknown_rule` is not in the rules, rules: {rules}"
256256
)
257257

258258

ibis-server/tests/routers/v2/connector/test_mysql.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from testcontainers.mysql import MySqlContainer
1010

1111
from app.main import app
12+
from app.model.validator import rules
1213
from tests.confest import file_path
1314

1415
pytestmark = pytest.mark.mysql
@@ -257,8 +258,7 @@ def test_validate_with_unknown_rule(mysql: MySqlContainer):
257258
)
258259
assert response.status_code == 422
259260
assert (
260-
response.text
261-
== "The rule `unknown_rule` is not in the rules, rules: ['column_is_valid']"
261+
response.text == f"The rule `unknown_rule` is not in the rules, rules: {rules}"
262262
)
263263

264264

ibis-server/tests/routers/v2/connector/test_postgres.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from testcontainers.postgres import PostgresContainer
1212

1313
from app.main import app
14+
from app.model.validator import rules
1415
from tests.confest import file_path
1516

1617
pytestmark = pytest.mark.postgres
@@ -288,8 +289,7 @@ def test_validate_with_unknown_rule(postgres: PostgresContainer):
288289
)
289290
assert response.status_code == 422
290291
assert (
291-
response.text
292-
== "The rule `unknown_rule` is not in the rules, rules: ['column_is_valid']"
292+
response.text == f"The rule `unknown_rule` is not in the rules, rules: {rules}"
293293
)
294294

295295

ibis-server/tests/routers/v2/connector/test_snowflake.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from fastapi.testclient import TestClient
77

88
from app.main import app
9+
from app.model.validator import rules
910

1011
pytestmark = pytest.mark.snowflake
1112

@@ -192,8 +193,7 @@ def test_validate_with_unknown_rule():
192193
)
193194
assert response.status_code == 422
194195
assert (
195-
response.text
196-
== "The rule `unknown_rule` is not in the rules, rules: ['column_is_valid']"
196+
response.text == f"The rule `unknown_rule` is not in the rules, rules: {rules}"
197197
)
198198

199199

ibis-server/tests/routers/v2/connector/test_trino.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from trino.dbapi import connect
99

1010
from app.main import app
11+
from app.model.validator import rules
1112

1213
pytestmark = pytest.mark.trino
1314

@@ -268,8 +269,7 @@ def test_validate_with_unknown_rule(trino: TrinoContainer):
268269
)
269270
assert response.status_code == 422
270271
assert (
271-
response.text
272-
== "The rule `unknown_rule` is not in the rules, rules: ['column_is_valid']"
272+
response.text == f"The rule `unknown_rule` is not in the rules, rules: {rules}"
273273
)
274274

275275

0 commit comments

Comments
 (0)