Skip to content

Commit dfbd336

Browse files
authored
fix(format): handle bytes to hex (#783)
* chore(test): remove unused * fix(format): handle byte to hex
1 parent 437ce4c commit dfbd336

File tree

12 files changed

+64
-46
lines changed

12 files changed

+64
-46
lines changed

ibis-server/app/custom_sqlglot/__init__.py

Whitespace-only changes.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# ruff: noqa: F401
2+
3+
from app.custom_sqlglot.dialects.mysql import MySQL
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
from sqlglot import exp
2+
from sqlglot.dialects import MySQL as OriginalMySQL
3+
4+
5+
class MySQL(OriginalMySQL):
6+
class Generator(OriginalMySQL.Generator):
7+
TYPE_MAPPING = {
8+
**OriginalMySQL.Generator.TYPE_MAPPING,
9+
exp.DataType.Type.VARBINARY: "BINARY",
10+
}

ibis-server/app/mdl/rewriter.py

+3
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,9 @@
1414
# To register custom dialects from ibis library for sqlglot
1515
importlib.import_module("ibis.backends.sql.dialects")
1616

17+
# Register custom dialects
18+
importlib.import_module("app.custom_sqlglot.dialects")
19+
1720

1821
class Rewriter:
1922
def __init__(

ibis-server/app/util.py

+2
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ def default(obj):
3030
return None
3131
if isinstance(obj, decimal.Decimal):
3232
return str(obj)
33+
if isinstance(obj, (bytes, bytearray)):
34+
return obj.hex()
3335
raise TypeError
3436

3537
json_obj = orjson.loads(

ibis-server/tests/routers/v2/connector/test_bigquery.py

+7-9
Original file line numberDiff line numberDiff line change
@@ -60,18 +60,14 @@
6060
"expression": "cast(NULL as timestamp)",
6161
"type": "timestamp",
6262
},
63+
{
64+
"name": "bytea_column",
65+
"expression": "cast('abc' as bytea)",
66+
"type": "bytea",
67+
},
6368
],
6469
"primaryKey": "orderkey",
6570
},
66-
{
67-
"name": "Customer",
68-
"refSql": "select * from tpch_tiny.customer",
69-
"columns": [
70-
{"name": "custkey", "expression": "c_custkey", "type": "integer"},
71-
{"name": "name", "expression": "c_name", "type": "varchar"},
72-
],
73-
"primaryKey": "custkey",
74-
},
7571
],
7672
}
7773

@@ -101,6 +97,7 @@ def test_query():
10197
"2024-01-01 23:59:59.000000",
10298
"2024-01-01 23:59:59.000000 UTC",
10399
None,
100+
"616263",
104101
]
105102
assert result["dtypes"] == {
106103
"orderkey": "int64",
@@ -112,6 +109,7 @@ def test_query():
112109
"timestamp": "object",
113110
"timestamptz": "object",
114111
"test_null_time": "datetime64[ns]",
112+
"bytea_column": "object",
115113
}
116114

117115

ibis-server/tests/routers/v2/connector/test_clickhouse.py

+9-2
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,11 @@
5757
"expression": "toDateTime64(NULL, 9)",
5858
"type": "timestamp",
5959
},
60+
{
61+
"name": "bytea_column",
62+
"expression": "cast('abc' as bytea)",
63+
"type": "bytea",
64+
},
6065
{
6166
"name": "customer",
6267
"type": "Customer",
@@ -167,7 +172,7 @@ def test_query(clickhouse: ClickHouseContainer):
167172
)
168173
assert response.status_code == 200
169174
result = response.json()
170-
assert len(result["columns"]) == 9
175+
assert len(result["columns"]) == 10
171176
assert len(result["data"]) == 1
172177
assert result["data"][0] == [
173178
1,
@@ -179,6 +184,7 @@ def test_query(clickhouse: ClickHouseContainer):
179184
"2024-01-01 23:59:59.000000",
180185
"2024-01-01 23:59:59.000000 UTC",
181186
None,
187+
"abc", # Clickhouse does not support bytea, so it is returned as string
182188
]
183189
assert result["dtypes"] == {
184190
"orderkey": "int32",
@@ -190,6 +196,7 @@ def test_query(clickhouse: ClickHouseContainer):
190196
"timestamp": "object",
191197
"timestamptz": "object",
192198
"test_null_time": "object",
199+
"bytea_column": "object",
193200
}
194201

195202

@@ -205,7 +212,7 @@ def test_query_with_connection_url(clickhouse: ClickHouseContainer):
205212
)
206213
assert response.status_code == 200
207214
result = response.json()
208-
assert len(result["columns"]) == 9
215+
assert len(result["columns"]) == 10
209216
assert len(result["data"]) == 1
210217
assert result["data"][0][0] == 1
211218
assert result["dtypes"] is not None

ibis-server/tests/routers/v2/connector/test_mssql.py

+7-12
Original file line numberDiff line numberDiff line change
@@ -57,18 +57,14 @@
5757
"expression": "cast(NULL as timestamp)",
5858
"type": "timestamp",
5959
},
60+
{
61+
"name": "bytea_column",
62+
"expression": "cast('abc' as bytea)",
63+
"type": "bytea",
64+
},
6065
],
6166
"primaryKey": "orderkey",
6267
},
63-
{
64-
"name": "Customer",
65-
"refSql": "select * from dbo.customer",
66-
"columns": [
67-
{"name": "custkey", "expression": "c_custkey", "type": "integer"},
68-
{"name": "name", "expression": "c_name", "type": "varchar"},
69-
],
70-
"primaryKey": "custkey",
71-
},
7268
],
7369
}
7470

@@ -84,9 +80,6 @@ def mssql(request) -> SqlServerContainer:
8480
pd.read_parquet(file_path("resource/tpch/data/orders.parquet")).to_sql(
8581
"orders", engine, index=False
8682
)
87-
pd.read_parquet(file_path("resource/tpch/data/customer.parquet")).to_sql(
88-
"customer", engine, index=False
89-
)
9083
request.addfinalizer(mssql.stop)
9184
return mssql
9285

@@ -115,6 +108,7 @@ def test_query(mssql: SqlServerContainer):
115108
"2024-01-01 23:59:59.000000",
116109
"2024-01-01 23:59:59.000000 UTC",
117110
None,
111+
"616263",
118112
]
119113
assert result["dtypes"] == {
120114
"orderkey": "int32",
@@ -126,6 +120,7 @@ def test_query(mssql: SqlServerContainer):
126120
"timestamp": "object",
127121
"timestamptz": "object",
128122
"test_null_time": "datetime64[ns]",
123+
"bytea_column": "object",
129124
}
130125

131126

ibis-server/tests/routers/v2/connector/test_mysql.py

+7
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,11 @@
5858
"expression": "cast(NULL as timestamp)",
5959
"type": "timestamp",
6060
},
61+
{
62+
"name": "bytea_column",
63+
"expression": "cast('abc' as bytea)",
64+
"type": "bytea",
65+
},
6166
],
6267
"primaryKey": "orderkey",
6368
},
@@ -123,6 +128,7 @@ def test_query(mysql: MySqlContainer):
123128
"2024-01-01 23:59:59.000000",
124129
"2024-01-01 23:59:59.000000",
125130
None,
131+
"616263",
126132
]
127133
assert result["dtypes"] == {
128134
"orderkey": "int32",
@@ -134,6 +140,7 @@ def test_query(mysql: MySqlContainer):
134140
"timestamp": "object",
135141
"timestamptz": "object",
136142
"test_null_time": "datetime64[ns]",
143+
"bytea_column": "object",
137144
}
138145

139146

ibis-server/tests/routers/v2/connector/test_postgres.py

+9-14
Original file line numberDiff line numberDiff line change
@@ -59,18 +59,14 @@
5959
"expression": "cast(NULL as timestamp)",
6060
"type": "timestamp",
6161
},
62+
{
63+
"name": "bytea_column",
64+
"expression": "cast('abc' as bytea)",
65+
"type": "bytea",
66+
},
6267
],
6368
"primaryKey": "orderkey",
6469
},
65-
{
66-
"name": "Customer",
67-
"refSql": "select * from public.customer",
68-
"columns": [
69-
{"name": "custkey", "expression": "c_custkey", "type": "integer"},
70-
{"name": "name", "expression": "c_name", "type": "varchar"},
71-
],
72-
"primaryKey": "custkey",
73-
},
7470
],
7571
}
7672

@@ -84,9 +80,6 @@ def postgres(request) -> PostgresContainer:
8480
pd.read_parquet(file_path("resource/tpch/data/orders.parquet")).to_sql(
8581
"orders", engine, index=False
8682
)
87-
pd.read_parquet(file_path("resource/tpch/data/customer.parquet")).to_sql(
88-
"customer", engine, index=False
89-
)
9083
request.addfinalizer(pg.stop)
9184
return pg
9285

@@ -103,7 +96,7 @@ def test_query(postgres: PostgresContainer):
10396
)
10497
assert response.status_code == 200
10598
result = response.json()
106-
assert len(result["columns"]) == 9
99+
assert len(result["columns"]) == len(manifest["models"][0]["columns"])
107100
assert len(result["data"]) == 1
108101
assert result["data"][0] == [
109102
1,
@@ -115,6 +108,7 @@ def test_query(postgres: PostgresContainer):
115108
"2024-01-01 23:59:59.000000",
116109
"2024-01-01 23:59:59.000000 UTC",
117110
None,
111+
"616263",
118112
]
119113
assert result["dtypes"] == {
120114
"orderkey": "int32",
@@ -126,6 +120,7 @@ def test_query(postgres: PostgresContainer):
126120
"timestamp": "object",
127121
"timestamptz": "object",
128122
"test_null_time": "datetime64[ns]",
123+
"bytea_column": "object",
129124
}
130125

131126

@@ -141,7 +136,7 @@ def test_query_with_connection_url(postgres: PostgresContainer):
141136
)
142137
assert response.status_code == 200
143138
result = response.json()
144-
assert len(result["columns"]) == 9
139+
assert len(result["columns"]) == len(manifest["models"][0]["columns"])
145140
assert len(result["data"]) == 1
146141
assert result["data"][0][0] == 1
147142
assert result["dtypes"] is not None

ibis-server/tests/routers/v2/connector/test_snowflake.py

-9
Original file line numberDiff line numberDiff line change
@@ -66,15 +66,6 @@
6666
],
6767
"primaryKey": "orderkey",
6868
},
69-
{
70-
"name": "Customer",
71-
"refSql": "select * from TPCH_SF1.CUSTOMER",
72-
"columns": [
73-
{"name": "custkey", "expression": "C_CUSTKEY", "type": "integer"},
74-
{"name": "name", "expression": "C_NAME", "type": "varchar"},
75-
],
76-
"primaryKey": "custkey",
77-
},
7869
],
7970
}
8071

ibis-server/tests/routers/v2/connector/test_trino.py

+7
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,11 @@
5454
"expression": "cast(NULL as timestamp)",
5555
"type": "timestamp",
5656
},
57+
{
58+
"name": "bytea_column",
59+
"expression": "cast('abc' as bytea)",
60+
"type": "bytea",
61+
},
5762
],
5863
"primaryKey": "orderkey",
5964
},
@@ -94,6 +99,7 @@ def test_query(trino: TrinoContainer):
9499
"2024-01-01 23:59:59.000000",
95100
"2024-01-01 23:59:59.000000 UTC",
96101
None,
102+
"616263",
97103
]
98104
assert result["dtypes"] == {
99105
"orderkey": "int64",
@@ -105,6 +111,7 @@ def test_query(trino: TrinoContainer):
105111
"timestamp": "object",
106112
"timestamptz": "object",
107113
"test_null_time": "datetime64[ns]",
114+
"bytea_column": "object",
108115
}
109116

110117

0 commit comments

Comments
 (0)