Skip to content

Commit f968f8f

Browse files
cpcloudkszucs
authored andcommitted
feat(postgres): add Map(string, string) support via the built-in HSTORE extension
1 parent 8b01f1b commit f968f8f

File tree

7 files changed

+101
-14
lines changed

7 files changed

+101
-14
lines changed

ci/schema/postgresql.sql

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
DROP SEQUENCE IF EXISTS test_sequence;
22
CREATE SEQUENCE IF NOT EXISTS test_sequence;
33

4+
CREATE EXTENSION IF NOT EXISTS hstore;
45
CREATE EXTENSION IF NOT EXISTS postgis;
56
CREATE EXTENSION IF NOT EXISTS plpython3u;
67

@@ -204,3 +205,9 @@ INSERT INTO win VALUES
204205
('a', 2, 0),
205206
('a', 3, 1),
206207
('a', 4, 1);
208+
209+
DROP TABLE IF EXISTS map CASCADE;
210+
CREATE TABLE map (kv HSTORE);
211+
INSERT INTO map VALUES
212+
('a=>1,b=>2,c=>3'),
213+
('d=>4,e=>5,c=>6');

ibis/backends/base/sql/alchemy/__init__.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -241,10 +241,9 @@ def _get_insert_method(self, expr):
241241
return methodcaller("from_select", list(expr.columns), compiled)
242242

243243
def _columns_from_schema(self, name: str, schema: sch.Schema) -> list[sa.Column]:
244+
dialect = self.con.dialect
244245
return [
245-
sa.Column(
246-
colname, to_sqla_type(self.con.dialect, dtype), nullable=dtype.nullable
247-
)
246+
sa.Column(colname, to_sqla_type(dialect, dtype), nullable=dtype.nullable)
248247
for colname, dtype in zip(schema.names, schema.types)
249248
]
250249

ibis/backends/base/sql/alchemy/datatypes.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,13 @@ def _pg_array(dialect, itype):
172172
return sa.ARRAY(to_sqla_type(dialect, itype))
173173

174174

175+
@to_sqla_type.register(PGDialect, dt.Map)
176+
def _pg_map(dialect, itype):
177+
if not (itype.key_type.is_string() and itype.value_type.is_string()):
178+
raise TypeError(f"PostgreSQL only supports map<string, string>, got: {itype}")
179+
return postgresql.HSTORE
180+
181+
175182
@to_sqla_type.register(Dialect, dt.Struct)
176183
def _struct(dialect, itype):
177184
return StructType(
@@ -294,6 +301,11 @@ def sa_macaddr(_, satype, nullable=True):
294301
return dt.MACADDR(nullable=nullable)
295302

296303

304+
@dt.dtype.register(PGDialect, postgresql.HSTORE)
305+
def sa_hstore(_, satype, nullable=True):
306+
return dt.Map(dt.string, dt.string, nullable=nullable)
307+
308+
297309
@dt.dtype.register(PGDialect, postgresql.INET)
298310
def sa_inet(_, satype, nullable=True):
299311
return dt.INET(nullable=nullable)

ibis/backends/duckdb/registry.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -268,6 +268,14 @@ def _struct_column(t, op):
268268
ops.Translate,
269269
# ibis.expr.operations.temporal
270270
ops.TimestampDiff,
271+
# ibis.expr.operations.maps
272+
ops.MapGet,
273+
ops.MapContains,
274+
ops.MapKeys,
275+
ops.MapValues,
276+
ops.MapMerge,
277+
ops.MapLength,
278+
ops.Map,
271279
}
272280

273281
operation_registry = {

ibis/backends/postgres/registry.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -396,11 +396,13 @@ def _literal(t, op):
396396
elif dtype.is_geospatial():
397397
# inline_metadata ex: 'SRID=4326;POINT( ... )'
398398
return sa.literal_column(geo.translate_literal(op, inline_metadata=True))
399-
elif isinstance(value, tuple):
399+
elif dtype.is_array():
400400
return sa.literal_column(
401401
str(pg.array(value).compile(compile_kwargs=dict(literal_binds=True))),
402402
type_=t.get_sqla_type(dtype),
403403
)
404+
elif dtype.is_map():
405+
return pg.hstore(pg.array(list(value.keys())), pg.array(list(value.values())))
404406
else:
405407
return sa.literal(value)
406408

@@ -585,5 +587,17 @@ def variance_compiler(t, op):
585587
ops.TimestampNow: lambda t, op: sa.literal_column(
586588
"CURRENT_TIMESTAMP", type_=t.get_sqla_type(op.output_dtype)
587589
),
590+
ops.MapGet: fixed_arity(
591+
lambda arg, key, default: sa.case(
592+
(arg.has_key(key), arg[key]), else_=default
593+
),
594+
3,
595+
),
596+
ops.MapContains: fixed_arity(pg.HSTORE.Comparator.has_key, 2),
597+
ops.MapKeys: unary(pg.HSTORE.Comparator.keys),
598+
ops.MapValues: unary(pg.HSTORE.Comparator.vals),
599+
ops.MapMerge: fixed_arity(operator.add, 2),
600+
ops.MapLength: unary(lambda arg: sa.func.cardinality(arg.keys())),
601+
ops.Map: fixed_arity(pg.hstore, 2),
588602
}
589603
)

ibis/backends/tests/test_map.py

Lines changed: 56 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,32 @@
1+
import contextlib
2+
13
import numpy as np
24
import pytest
5+
from pytest import param
36

47
import ibis
58
import ibis.expr.datatypes as dt
9+
from ibis.util import guid
610

711
pytestmark = [
812
pytest.mark.never(
9-
["sqlite", "mysql", "mssql", "postgres"], reason="No map support"
13+
["sqlite", "mysql", "mssql"], reason="Unlikely to ever add map support"
1014
),
1115
pytest.mark.notyet(
12-
["bigquery", "impala"], reason="backend doesn't implement map types"
16+
["bigquery", "impala"], reason="Backend doesn't yet implement map types"
1317
),
1418
pytest.mark.notimpl(
15-
["duckdb", "datafusion", "pyspark", "polars"], reason="Not implemented yet"
19+
["duckdb", "datafusion", "pyspark", "polars"],
20+
reason="Not yet implemented in ibis",
1621
),
1722
]
1823

1924

2025
@pytest.mark.notimpl(["pandas", "dask"])
2126
def test_map_table(con):
2227
table = con.table("map")
23-
assert not table.execute().empty
28+
assert table.kv.type().is_map()
29+
assert not table.limit(1).execute().empty
2430

2531

2632
def test_literal_map_keys(con):
@@ -42,7 +48,7 @@ def test_literal_map_values(con):
4248
assert np.array_equal(result, ['a', 'b'])
4349

4450

45-
@pytest.mark.notimpl(["trino"])
51+
@pytest.mark.notimpl(["trino", "postgres"])
4652
@pytest.mark.notyet(["snowflake"])
4753
def test_scalar_isin_literal_map_keys(con):
4854
mapping = ibis.literal({'a': 1, 'b': 2})
@@ -54,6 +60,7 @@ def test_scalar_isin_literal_map_keys(con):
5460
assert con.execute(false) == False # noqa: E712
5561

5662

63+
@pytest.mark.notyet(["postgres"], reason="only support maps of string -> string")
5764
def test_map_scalar_contains_key_scalar(con):
5865
mapping = ibis.literal({'a': 1, 'b': 2})
5966
a = ibis.literal('a')
@@ -74,6 +81,7 @@ def test_map_scalar_contains_key_column(backend, alltypes, df):
7481

7582

7683
@pytest.mark.notyet(["snowflake"])
84+
@pytest.mark.notyet(["postgres"], reason="only support maps of string -> string")
7785
def test_map_column_contains_key_scalar(backend, alltypes, df):
7886
expr = ibis.map(ibis.array([alltypes.string_col]), ibis.array([alltypes.int_col]))
7987
series = df.apply(lambda row: {row['string_col']: row['int_col']}, axis=1)
@@ -85,13 +93,15 @@ def test_map_column_contains_key_scalar(backend, alltypes, df):
8593

8694

8795
@pytest.mark.notyet(["snowflake"])
88-
def test_map_column_contains_key_column(backend, alltypes, df):
96+
@pytest.mark.notyet(["postgres"], reason="only support maps of string -> string")
97+
def test_map_column_contains_key_column(alltypes):
8998
expr = ibis.map(ibis.array([alltypes.string_col]), ibis.array([alltypes.int_col]))
9099
result = expr.contains(alltypes.string_col).name('tmp').execute()
91100
assert result.all()
92101

93102

94103
@pytest.mark.notyet(["snowflake"])
104+
@pytest.mark.notyet(["postgres"], reason="only support maps of string -> string")
95105
def test_literal_map_merge(con):
96106
a = ibis.literal({'a': 0, 'b': 2})
97107
b = ibis.literal({'a': 1, 'c': 3})
@@ -124,15 +134,30 @@ def test_literal_map_get_broadcast(backend, alltypes, df):
124134
backend.assert_series_equal(result, expected)
125135

126136

127-
def test_map_construct_dict(con):
128-
expr = ibis.map(['a', 'b'], [1, 2])
137+
@pytest.mark.parametrize(
138+
("keys", "values"),
139+
[
140+
param(
141+
["a", "b"],
142+
[1, 2],
143+
id="string",
144+
marks=pytest.mark.notyet(
145+
["postgres"], reason="only support maps of string -> string"
146+
),
147+
),
148+
param(["a", "b"], ["1", "2"], id="int"),
149+
],
150+
)
151+
def test_map_construct_dict(con, keys, values):
152+
expr = ibis.map(keys, values)
129153
result = con.execute(expr.name('tmp'))
130-
assert result == {'a': 1, 'b': 2}
154+
assert result == dict(zip(keys, values))
131155

132156

133157
@pytest.mark.notimpl(
134158
["snowflake"], reason="unclear how to implement two arrays -> object construction"
135159
)
160+
@pytest.mark.notyet(["postgres"], reason="only support maps of string -> string")
136161
def test_map_construct_array_column(con, alltypes, df):
137162
expr = ibis.map(ibis.array([alltypes.string_col]), ibis.array([alltypes.int_col]))
138163
result = con.execute(expr)
@@ -141,25 +166,29 @@ def test_map_construct_array_column(con, alltypes, df):
141166
assert result.to_list() == expected.to_list()
142167

143168

169+
@pytest.mark.notyet(["postgres"], reason="only support maps of string -> string")
144170
def test_map_get_with_compatible_value_smaller(con):
145171
value = ibis.literal({'A': 1000, 'B': 2000})
146172
expr = value.get('C', 3)
147173
assert con.execute(expr) == 3
148174

149175

176+
@pytest.mark.notyet(["postgres"], reason="only support maps of string -> string")
150177
def test_map_get_with_compatible_value_bigger(con):
151178
value = ibis.literal({'A': 1, 'B': 2})
152179
expr = value.get('C', 3000)
153180
assert con.execute(expr) == 3000
154181

155182

183+
@pytest.mark.notyet(["postgres"], reason="only support maps of string -> string")
156184
def test_map_get_with_incompatible_value_different_kind(con):
157185
value = ibis.literal({'A': 1000, 'B': 2000})
158186
expr = value.get('C', 3.0)
159187
assert con.execute(expr) == 3.0
160188

161189

162190
@pytest.mark.parametrize('null_value', [None, ibis.NA])
191+
@pytest.mark.notyet(["postgres"], reason="only support maps of string -> string")
163192
def test_map_get_with_null_on_not_nullable(con, null_value):
164193
map_type = dt.Map(dt.string, dt.Int16(nullable=False))
165194
value = ibis.literal({'A': 1000, 'B': 2000}).cast(map_type)
@@ -174,7 +203,25 @@ def test_map_get_with_null_on_null_type_with_null(con, null_value):
174203
assert con.execute(expr) is None
175204

176205

206+
@pytest.mark.notyet(["postgres"], reason="only support maps of string -> string")
177207
def test_map_get_with_null_on_null_type_with_non_null(con):
178208
value = ibis.literal({'A': None, 'B': None})
179209
expr = value.get('C', 1)
180210
assert con.execute(expr) == 1
211+
212+
213+
@pytest.fixture
214+
def tmptable(con):
215+
name = guid()
216+
yield name
217+
218+
# some backends don't implement drop
219+
with contextlib.suppress(NotImplementedError):
220+
con.drop_table(name)
221+
222+
223+
@pytest.mark.notimpl(["clickhouse"], reason=".create_table not yet implemented in ibis")
224+
def test_map_create_table(con, tmptable):
225+
con.create_table(tmptable, schema=ibis.schema(dict(xyz="map<string, string>")))
226+
t = con.table(tmptable)
227+
assert t.schema()["xyz"].is_map()

ibis/backends/tests/test_param.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ def test_scalar_param_struct(con):
8888
["mysql", "sqlite", "mssql"],
8989
reason="mysql and sqlite will never implement map types",
9090
)
91-
@pytest.mark.notyet(["bigquery", "postgres"])
91+
@pytest.mark.notyet(["bigquery"])
9292
def test_scalar_param_map(con):
9393
value = {'a': 'ghi', 'b': 'def', 'c': 'abc'}
9494
param = ibis.param(dt.Map(dt.string, dt.string))

0 commit comments

Comments
 (0)