Skip to content

Commit 861f978

Browse files
authored
feat(api): append cooments of column and table for metadata api (#812)
* feat(clickhouse): append comments of column and table * feat(clickhouse): append comments of column and table * feat(mssql): append comments of column and table * feat(mysql): append comments of column and table * feat(postgres): append comments of column and table * fix(trino): wait trino container ready to avoid 'nodes is empty' from Trino * feat(trino): append comments of column and table * chore(test): sort func and rename func * test(snowflake): mark not implemented feature * refactor(metadata): tidy up * chore(metadata): remove unimplemented data source * fix(trino): fix connection * chore(trino): merge two statement * test(trino): update excepted we changed expression but did not update excepted on #782
1 parent 531cad8 commit 861f978

File tree

14 files changed

+399
-299
lines changed

14 files changed

+399
-299
lines changed

ibis-server/app/model/metadata/bigquery.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
from json import loads
2-
31
from app.model import BigQueryConnectionInfo
42
from app.model.data_source import DataSource
53
from app.model.metadata.dto import (
@@ -49,7 +47,7 @@ def get_table_list(self) -> list[Table]:
4947
LEFT JOIN {dataset_id}.INFORMATION_SCHEMA.TABLE_OPTIONS table_options
5048
ON c.table_name = table_options.table_name
5149
"""
52-
response = loads(self.connection.sql(sql).to_pandas().to_json(orient="records"))
50+
response = self.connection.sql(sql).to_pandas().to_dict(orient="records")
5351

5452
def get_data_type(data_type) -> str:
5553
if "STRUCT" in data_type:
@@ -123,7 +121,7 @@ def get_constraints(self) -> list[Constraint]:
123121
ON ccu.constraint_name = tc.constraint_name
124122
WHERE tc.constraint_type = 'FOREIGN KEY'
125123
"""
126-
response = loads(self.connection.sql(sql).to_pandas().to_json(orient="records"))
124+
response = self.connection.sql(sql).to_pandas().to_dict(orient="records")
127125

128126
constraints = []
129127
for row in response:

ibis-server/app/model/metadata/clickhouse.py

+16-22
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
from json import loads
2-
31
from app.model import ClickHouseConnectionInfo
42
from app.model.data_source import DataSource
53
from app.model.metadata.dto import (
@@ -15,25 +13,27 @@
1513
class ClickHouseMetadata(Metadata):
1614
def __init__(self, connection_info: ClickHouseConnectionInfo):
1715
super().__init__(connection_info)
16+
self.connection = DataSource.clickhouse.get_connection(connection_info)
1817

1918
def get_table_list(self) -> list[Table]:
2019
sql = """
2120
SELECT
22-
database AS table_schema,
23-
table AS table_name,
24-
name AS column_name,
25-
type AS data_type
21+
c.database AS table_schema,
22+
c.table AS table_name,
23+
t.comment AS table_comment,
24+
c.name AS column_name,
25+
c.type AS data_type,
26+
c.comment AS column_comment
2627
FROM
27-
system.columns
28+
system.columns AS c
29+
JOIN
30+
system.tables AS t
31+
ON c.database = t.database
32+
AND c.table = t.name
2833
WHERE
29-
database NOT IN ('system', 'INFORMATION_SCHEMA', 'information_schema', 'pg_catalog');
34+
c.database NOT IN ('system', 'INFORMATION_SCHEMA', 'information_schema', 'pg_catalog');
3035
"""
31-
response = loads(
32-
DataSource.clickhouse.get_connection(self.connection_info)
33-
.sql(sql)
34-
.to_pandas()
35-
.to_json(orient="records")
36-
)
36+
response = self.connection.sql(sql).to_pandas().to_dict(orient="records")
3737

3838
unique_tables = {}
3939
for row in response:
@@ -45,7 +45,7 @@ def get_table_list(self) -> list[Table]:
4545
if schema_table not in unique_tables:
4646
unique_tables[schema_table] = Table(
4747
name=schema_table,
48-
description="",
48+
description=row["table_comment"],
4949
columns=[],
5050
properties=TableProperties(
5151
catalog=None,
@@ -61,7 +61,7 @@ def get_table_list(self) -> list[Table]:
6161
name=row["column_name"],
6262
type=self._transform_column_type(row["data_type"]),
6363
notNull=False,
64-
description="",
64+
description=row["column_comment"],
6565
properties=None,
6666
)
6767
)
@@ -103,9 +103,3 @@ def _transform_column_type(self, data_type):
103103
}
104104

105105
return switcher.get(data_type, WrenEngineColumnType.UNKNOWN)
106-
107-
108-
def to_json(df):
109-
json_obj = loads(df.to_json(orient="split"))
110-
del json_obj["index"]
111-
return json_obj
+15-38
Original file line numberDiff line numberDiff line change
@@ -1,49 +1,26 @@
1-
from json import loads
2-
3-
from app.model import ConnectionInfo
41
from app.model.data_source import DataSource
52
from app.model.metadata.bigquery import BigQueryMetadata
63
from app.model.metadata.clickhouse import ClickHouseMetadata
7-
from app.model.metadata.dto import (
8-
Constraint,
9-
Table,
10-
)
114
from app.model.metadata.metadata import Metadata
125
from app.model.metadata.mssql import MSSQLMetadata
136
from app.model.metadata.mysql import MySQLMetadata
147
from app.model.metadata.postgres import PostgresMetadata
158
from app.model.metadata.trino import TrinoMetadata
169

10+
mapping = {
11+
DataSource.bigquery: BigQueryMetadata,
12+
DataSource.clickhouse: ClickHouseMetadata,
13+
DataSource.mssql: MSSQLMetadata,
14+
DataSource.mysql: MySQLMetadata,
15+
DataSource.postgres: PostgresMetadata,
16+
DataSource.trino: TrinoMetadata,
17+
}
1718

18-
class MetadataFactory:
19-
def __init__(self, data_source: DataSource, connection_info: ConnectionInfo):
20-
self.metadata = self.get_metadata(data_source, connection_info)
21-
22-
def get_metadata(self, data_source: DataSource, connection_info) -> Metadata:
23-
if data_source == DataSource.postgres:
24-
return PostgresMetadata(connection_info)
25-
if data_source == DataSource.bigquery:
26-
return BigQueryMetadata(connection_info)
27-
if data_source == DataSource.mysql:
28-
return MySQLMetadata(connection_info)
29-
if data_source == DataSource.mssql:
30-
return MSSQLMetadata(connection_info)
31-
if data_source == DataSource.clickhouse:
32-
return ClickHouseMetadata(connection_info)
33-
if data_source == DataSource.trino:
34-
return TrinoMetadata(connection_info)
35-
36-
raise NotImplementedError(f"Unsupported data source: {self}")
3719

38-
def get_table_list(self) -> list[Table]:
39-
return self.metadata.get_table_list()
40-
41-
def get_constraints(self) -> list[Constraint]:
42-
return self.metadata.get_constraints()
43-
44-
45-
def to_json(df):
46-
json_obj = loads(df.to_json(orient="split"))
47-
del json_obj["index"]
48-
json_obj["dtypes"] = df.dtypes.apply(lambda x: x.name).to_dict()
49-
return json_obj
20+
class MetadataFactory:
21+
@staticmethod
22+
def get_metadata(data_source: DataSource, connection_info) -> Metadata:
23+
try:
24+
return mapping[data_source](connection_info)
25+
except KeyError:
26+
raise NotImplementedError(f"Unsupported data source: {data_source}")

ibis-server/app/model/metadata/mssql.py

+28-23
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
from json import loads
2-
31
from app.model import MSSqlConnectionInfo
42
from app.model.data_source import DataSource
53
from app.model.metadata.dto import (
@@ -16,6 +14,7 @@
1614
class MSSQLMetadata(Metadata):
1715
def __init__(self, connection_info: MSSqlConnectionInfo):
1816
super().__init__(connection_info)
17+
self.connection = DataSource.mssql.get_connection(connection_info)
1918

2019
def get_table_list(self) -> list[Table]:
2120
sql = """
@@ -26,7 +25,9 @@ def get_table_list(self) -> list[Table]:
2625
col.COLUMN_NAME AS column_name,
2726
col.DATA_TYPE AS data_type,
2827
CASE WHEN pk.COLUMN_NAME IS NOT NULL THEN 'YES' ELSE 'NO' END AS is_pk,
29-
col.IS_NULLABLE AS is_nullable
28+
col.IS_NULLABLE AS is_nullable,
29+
CAST(tprop.value AS NVARCHAR(MAX)) AS table_comment,
30+
CAST(cprop.value AS NVARCHAR(MAX)) AS column_comment
3031
FROM
3132
INFORMATION_SCHEMA.COLUMNS col
3233
LEFT JOIN
@@ -40,13 +41,28 @@ def get_table_list(self) -> list[Table]:
4041
AND col.TABLE_NAME = pk.TABLE_NAME
4142
AND col.COLUMN_NAME = pk.COLUMN_NAME
4243
AND pk.CONSTRAINT_NAME = tab.CONSTRAINT_NAME
44+
LEFT JOIN
45+
sys.tables st
46+
ON st.name = col.TABLE_NAME
47+
AND SCHEMA_NAME(st.schema_id) = col.TABLE_SCHEMA
48+
LEFT JOIN
49+
sys.extended_properties tprop
50+
ON tprop.major_id = st.object_id
51+
AND tprop.minor_id = 0
52+
AND tprop.name = 'MS_Description'
53+
LEFT JOIN
54+
sys.columns sc
55+
ON sc.object_id = st.object_id
56+
AND sc.name = col.COLUMN_NAME
57+
LEFT JOIN
58+
sys.extended_properties cprop
59+
ON cprop.major_id = sc.object_id
60+
AND cprop.minor_id = sc.column_id
61+
AND cprop.name = 'MS_Description'
62+
WHERE
63+
col.TABLE_SCHEMA NOT IN ('sys', 'INFORMATION_SCHEMA');
4364
"""
44-
response = loads(
45-
DataSource.mssql.get_connection(self.connection_info)
46-
.sql(sql)
47-
.to_pandas()
48-
.to_json(orient="records")
49-
)
65+
response = self.connection.sql(sql).to_pandas().to_dict(orient="records")
5066

5167
unique_tables = {}
5268
for row in response:
@@ -58,7 +74,7 @@ def get_table_list(self) -> list[Table]:
5874
if schema_table not in unique_tables:
5975
unique_tables[schema_table] = Table(
6076
name=schema_table,
61-
description="",
77+
description=row["table_comment"],
6278
columns=[],
6379
properties=TableProperties(
6480
schema=row["table_schema"],
@@ -74,7 +90,7 @@ def get_table_list(self) -> list[Table]:
7490
name=row["column_name"],
7591
type=self._transform_column_type(row["data_type"]),
7692
notNull=row["is_nullable"].lower() == "no",
77-
description="",
93+
description=row["column_comment"],
7894
properties=None,
7995
)
8096
)
@@ -121,12 +137,7 @@ def get_constraints(self) -> list[Constraint]:
121137
ON fkc.referenced_column_id = ref_c.column_id
122138
AND ref_c.object_id = ref_t.object_id
123139
"""
124-
res = loads(
125-
DataSource.mssql.get_connection(self.connection_info)
126-
.sql(sql)
127-
.to_pandas()
128-
.to_json(orient="records")
129-
)
140+
res = self.connection.sql(sql).to_pandas().to_dict(orient="records")
130141
constraints = []
131142
for row in res:
132143
constraints.append(
@@ -196,9 +207,3 @@ def _transform_column_type(self, data_type):
196207
}
197208

198209
return switcher.get(data_type.lower(), WrenEngineColumnType.UNKNOWN)
199-
200-
201-
def to_json(df):
202-
json_obj = loads(df.to_json(orient="split"))
203-
del json_obj["index"]
204-
return json_obj

ibis-server/app/model/metadata/mysql.py

+19-30
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
from json import loads
2-
31
from app.model import MySqlConnectionInfo
42
from app.model.data_source import DataSource
53
from app.model.metadata.dto import (
@@ -16,27 +14,29 @@
1614
class MySQLMetadata(Metadata):
1715
def __init__(self, connection_info: MySqlConnectionInfo):
1816
super().__init__(connection_info)
17+
self.connection = DataSource.mysql.get_connection(connection_info)
1918

2019
def get_table_list(self) -> list[Table]:
2120
sql = """
2221
SELECT
23-
TABLE_SCHEMA as table_schema,
24-
TABLE_NAME as table_name,
25-
COLUMN_NAME as column_name,
26-
DATA_TYPE as data_type,
27-
IS_NULLABLE as is_nullable,
28-
COLUMN_KEY as column_key
22+
c.TABLE_SCHEMA AS table_schema,
23+
c.TABLE_NAME AS table_name,
24+
c.COLUMN_NAME AS column_name,
25+
c.DATA_TYPE AS data_type,
26+
c.IS_NULLABLE AS is_nullable,
27+
c.COLUMN_KEY AS column_key,
28+
c.COLUMN_COMMENT AS column_comment,
29+
t.TABLE_COMMENT AS table_comment
2930
FROM
30-
information_schema.COLUMNS
31+
information_schema.COLUMNS c
32+
JOIN
33+
information_schema.TABLES t
34+
ON c.TABLE_SCHEMA = t.TABLE_SCHEMA
35+
AND c.TABLE_NAME = t.TABLE_NAME
3136
WHERE
32-
TABLE_SCHEMA not IN ("mysql", "information_schema", "performance_schema", "sys")
37+
c.TABLE_SCHEMA NOT IN ('mysql', 'information_schema', 'performance_schema', 'sys');
3338
"""
34-
response = loads(
35-
DataSource.mysql.get_connection(self.connection_info)
36-
.sql(sql)
37-
.to_pandas()
38-
.to_json(orient="records")
39-
)
39+
response = self.connection.sql(sql).to_pandas().to_dict(orient="records")
4040

4141
unique_tables = {}
4242
for row in response:
@@ -48,7 +48,7 @@ def get_table_list(self) -> list[Table]:
4848
if schema_table not in unique_tables:
4949
unique_tables[schema_table] = Table(
5050
name=schema_table,
51-
description="",
51+
description=row["table_comment"],
5252
columns=[],
5353
properties=TableProperties(
5454
schema=row["table_schema"],
@@ -65,7 +65,7 @@ def get_table_list(self) -> list[Table]:
6565
name=row["column_name"],
6666
type=self._transform_column_type(row["data_type"]),
6767
notNull=row["is_nullable"].lower() == "no",
68-
description="",
68+
description=row["column_comment"],
6969
properties=None,
7070
)
7171
)
@@ -93,12 +93,7 @@ def get_constraints(self) -> list[Constraint]:
9393
ON rc.CONSTRAINT_NAME = kcu.CONSTRAINT_NAME
9494
AND rc.CONSTRAINT_SCHEMA = kcu.CONSTRAINT_SCHEMA
9595
"""
96-
res = loads(
97-
DataSource.mysql.get_connection(self.connection_info)
98-
.sql(sql)
99-
.to_pandas()
100-
.to_json(orient="records")
101-
)
96+
res = self.connection.sql(sql).to_pandas().to_dict(orient="records")
10297
constraints = []
10398
for row in res:
10499
constraints.append(
@@ -167,9 +162,3 @@ def _transform_column_type(self, data_type):
167162
}
168163

169164
return switcher.get(data_type.lower(), WrenEngineColumnType.UNKNOWN)
170-
171-
172-
def to_json(df):
173-
json_obj = loads(df.to_json(orient="split"))
174-
del json_obj["index"]
175-
return json_obj

0 commit comments

Comments
 (0)