Skip to content

Commit a1164df

Browse files
committed
refactor: generate uuid-based names for temp tables
1 parent dd55beb commit a1164df

File tree

9 files changed

+36
-71
lines changed

9 files changed

+36
-71
lines changed

ibis/backends/base/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -462,7 +462,7 @@ def __init__(self, *args, **kwargs):
462462
populate=self._load_into_cache,
463463
lookup=lambda name: self.table(name).op(),
464464
finalize=self._clean_up_cached_table,
465-
generate_name=functools.partial(util.generate_unique_table_name, "cache"),
465+
generate_name=functools.partial(util.gen_name, "cache"),
466466
key=lambda expr: expr.op(),
467467
)
468468

ibis/backends/datafusion/__init__.py

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
from __future__ import annotations
22

3-
import itertools
43
import re
54
from functools import lru_cache
65
from pathlib import Path
@@ -15,7 +14,7 @@
1514
import ibis.expr.types as ir
1615
from ibis.backends.base import BaseBackend
1716
from ibis.backends.datafusion.compiler import translate
18-
from ibis.util import normalize_filename
17+
from ibis.util import gen_name, normalize_filename
1918

2019
try:
2120
from datafusion import ExecutionContext as SessionContext
@@ -24,11 +23,6 @@
2423

2524
import datafusion
2625

27-
# counters for in-memory, parquet, and csv reads
28-
# used if no table name is specified
29-
pa_n = itertools.count(0)
30-
csv_n = itertools.count(0)
31-
3226

3327
class Backend(BaseBackend):
3428
name = 'datafusion'
@@ -169,7 +163,7 @@ def read_csv(
169163
The just-registered table
170164
"""
171165
path = normalize_filename(path)
172-
table_name = table_name or f"ibis_read_csv_{next(csv_n)}"
166+
table_name = table_name or gen_name("read_csv")
173167
# Our other backends support overwriting views / tables when reregistering
174168
self._context.deregister_table(table_name)
175169
self._context.register_csv(table_name, path, **kwargs)
@@ -196,7 +190,7 @@ def read_parquet(
196190
The just-registered table
197191
"""
198192
path = normalize_filename(path)
199-
table_name = table_name or f"ibis_read_parquet_{next(pa_n)}"
193+
table_name = table_name or gen_name("read_parquet")
200194
# Our other backends support overwriting views / tables when reregistering
201195
self._context.deregister_table(table_name)
202196
self._context.register_parquet(table_name, path, **kwargs)

ibis/backends/duckdb/__init__.py

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
from __future__ import annotations
44

55
import ast
6-
import itertools
76
import os
87
import warnings
98
import weakref
@@ -30,13 +29,6 @@
3029

3130
import ibis.expr.operations as ops
3231

33-
# counters for in-memory, parquet, csv, and json reads
34-
# used if no table name is specified
35-
pd_n = itertools.count(0)
36-
pa_n = itertools.count(0)
37-
csv_n = itertools.count(0)
38-
json_n = itertools.count(0)
39-
4032

4133
def normalize_filenames(source_list):
4234
# Promote to list
@@ -276,7 +268,7 @@ def read_json(
276268
f"`read_json` requires duckdb >= 0.7.0, duckdb {version} is installed"
277269
)
278270
if not table_name:
279-
table_name = f"ibis_read_json_{next(json_n)}"
271+
table_name = util.gen_name("read_json")
280272

281273
source = sa.select(sa.literal_column("*")).select_from(
282274
sa.func.read_json_auto(
@@ -318,7 +310,7 @@ def read_csv(
318310
source_list = normalize_filenames(source_list)
319311

320312
if not table_name:
321-
table_name = f"ibis_read_csv_{next(csv_n)}"
313+
table_name = util.gen_name("read_csv")
322314

323315
# auto_detect and columns collide, so we set auto_detect=True
324316
# unless COLUMNS has been specified
@@ -362,7 +354,7 @@ def read_parquet(
362354
"""
363355
source_list = normalize_filenames(source_list)
364356

365-
table_name = table_name or f"ibis_read_parquet_{next(pa_n)}"
357+
table_name = table_name or util.gen_name("read_parquet")
366358

367359
# Default to using the native duckdb parquet reader
368360
# If that fails because of auth issues, fall back to ingesting via
@@ -457,7 +449,7 @@ def _clean_up_string_columns(
457449
}
458450
)
459451

460-
table_name = table_name or f"ibis_read_in_memory_{next(pd_n)}"
452+
table_name = table_name or util.gen_name("read_in_memory")
461453
with self.begin() as con:
462454
con.connection.register(table_name, _clean_up_string_columns(dataframe))
463455

ibis/backends/duckdb/tests/conftest.py

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,8 @@
1414

1515

1616
class TestConf(BackendTest, RoundAwayFromZero):
17-
def __init__(
18-
self, data_directory: Path, extension_directory: Path | None = None
19-
) -> None:
20-
self.connection = self.connect(
21-
data_directory, extension_directory=str(extension_directory)
22-
)
17+
def __init__(self, data_directory: Path, **kwargs: Any) -> None:
18+
self.connection = self.connect(data_directory, **kwargs)
2319

2420
script_dir = data_directory.parent
2521

@@ -52,11 +48,11 @@ def _load_data(data_dir, script_dir, **_: Any) -> None:
5248
return TestConf(data_directory=data_dir)
5349

5450
@staticmethod
55-
def connect(data_directory: Path, **kwargs) -> BaseBackend:
51+
def connect(data_directory: Path, **kwargs: Any) -> BaseBackend:
5652
pytest.importorskip("duckdb")
5753
return ibis.duckdb.connect(**kwargs) # type: ignore
5854

5955

6056
@pytest.fixture
6157
def con(data_directory, tmp_path: Path):
62-
return TestConf(data_directory, extension_directory=tmp_path).connection
58+
return TestConf(data_directory, extension_directory=str(tmp_path)).connection

ibis/backends/polars/__init__.py

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
from __future__ import annotations
22

3-
import itertools
43
from functools import lru_cache
54
from pathlib import Path
65
from typing import TYPE_CHECKING, Any, Mapping, MutableMapping
@@ -15,17 +14,11 @@
1514
import ibis.expr.types as ir
1615
from ibis.backends.base import BaseBackend
1716
from ibis.backends.polars.compiler import translate
18-
from ibis.util import deprecated, normalize_filename
17+
from ibis.util import deprecated, gen_name, normalize_filename
1918

2019
if TYPE_CHECKING:
2120
import pandas as pd
2221

23-
# counters for in-memory, parquet, and csv reads
24-
# used if no table name is specified
25-
pd_n = itertools.count(0)
26-
pa_n = itertools.count(0)
27-
csv_n = itertools.count(0)
28-
2922

3023
class Backend(BaseBackend):
3124
name = "polars"
@@ -154,7 +147,7 @@ def read_csv(
154147
The just-registered table
155148
"""
156149
path = normalize_filename(path)
157-
table_name = table_name or f"ibis_read_csv_{next(csv_n)}"
150+
table_name = table_name or gen_name("read_csv")
158151
try:
159152
self._tables[table_name] = pl.scan_csv(path, **kwargs)
160153
except pl.exceptions.ComputeError:
@@ -184,7 +177,7 @@ def read_pandas(
184177
ir.Table
185178
The just-registered table
186179
"""
187-
table_name = table_name or f"ibis_read_in_memory_{next(pd_n)}"
180+
table_name = table_name or gen_name("read_in_memory")
188181
self._tables[table_name] = pl.from_pandas(source, **kwargs).lazy()
189182
return self.table(table_name)
190183

@@ -211,7 +204,7 @@ def read_parquet(
211204
The just-registered table
212205
"""
213206
path = normalize_filename(path)
214-
table_name = table_name or f"ibis_read_parquet_{next(pa_n)}"
207+
table_name = table_name or gen_name("read_parquet")
215208
self._tables[table_name] = pl.scan_parquet(path, **kwargs)
216209
return self.table(table_name)
217210

ibis/backends/pyspark/__init__.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
from __future__ import annotations
22

3-
import itertools
43
from pathlib import Path
54
from typing import TYPE_CHECKING, Any
65

@@ -39,9 +38,6 @@
3938
'escape': '"',
4039
}
4140

42-
pa_n = itertools.count(0)
43-
csv_n = itertools.count(0)
44-
4541

4642
def normalize_filenames(source_list):
4743
# Promote to list
@@ -613,7 +609,7 @@ def read_parquet(
613609
"""
614610
source = util.normalize_filename(source)
615611
spark_df = self._session.read.parquet(source, **kwargs)
616-
table_name = table_name or f"ibis_read_parquet_{next(pa_n)}"
612+
table_name = table_name or util.gen_name("read_parquet")
617613

618614
spark_df.createOrReplaceTempView(table_name)
619615
return self.table(table_name)
@@ -645,7 +641,7 @@ def read_csv(
645641
"""
646642
source_list = normalize_filenames(source_list)
647643
spark_df = self._session.read.csv(source_list, **kwargs)
648-
table_name = table_name or f"ibis_read_csv_{next(csv_n)}"
644+
table_name = table_name or util.gen_name("read_csv")
649645

650646
spark_df.createOrReplaceTempView(table_name)
651647
return self.table(table_name)

ibis/backends/tests/test_register.py

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ def test_register_csv(con, data_directory, fname, in_table_name, out_table_name)
9696
with pushd(data_directory):
9797
table = con.register(fname, table_name=in_table_name)
9898

99-
assert any(t.startswith(out_table_name) for t in con.list_tables())
99+
assert any(out_table_name in t for t in con.list_tables())
100100
if con.name != "datafusion":
101101
table.count().execute()
102102

@@ -167,14 +167,16 @@ def read_table(path: Path) -> Iterator[tuple[str, pa.Table]]:
167167
@pytest.mark.parametrize(
168168
("fname", "in_table_name", "out_table_name"),
169169
[
170-
pytest.param(
171-
"parquet://functional_alltypes.parquet",
172-
None,
173-
"ibis_read_parquet",
170+
param(
171+
"parquet://functional_alltypes.parquet", None, "ibis_read_parquet", id="url"
172+
),
173+
param("functional_alltypes.parquet", "funk_all", "funk_all", id="basename"),
174+
param(
175+
"parquet://functional_alltypes.parq", "funk_all", "funk_all", id="url_parq"
176+
),
177+
param(
178+
"parquet://functional_alltypes", None, "ibis_read_parquet", id="url_no_ext"
174179
),
175-
("functional_alltypes.parquet", "funk_all", "funk_all"),
176-
pytest.param("parquet://functional_alltypes.parq", "funk_all", "funk_all"),
177-
("parquet://functional_alltypes", None, "ibis_read_parquet"),
178180
],
179181
)
180182
@pytest.mark.notyet(
@@ -205,7 +207,7 @@ def test_register_parquet(
205207
with pushd(tmp_path):
206208
table = con.register(f"parquet://{fname.name}", table_name=in_table_name)
207209

208-
assert any(t.startswith(out_table_name) for t in con.list_tables())
210+
assert any(out_table_name in t for t in con.list_tables())
209211

210212
if con.name != "datafusion":
211213
table.count().execute()
@@ -246,7 +248,7 @@ def test_register_iterator_parquet(
246248
table_name=None,
247249
)
248250

249-
assert any(t.startswith("ibis_read_parquet") for t in con.list_tables())
251+
assert any("ibis_read_parquet" in t for t in con.list_tables())
250252

251253
assert table.count().execute()
252254

@@ -432,7 +434,7 @@ def test_read_parquet(
432434
fname = str(Path(fname).absolute())
433435
table = con.read_parquet(fname, table_name=in_table_name)
434436

435-
assert any(t.startswith(out_table_name) for t in con.list_tables())
437+
assert any(out_table_name in t for t in con.list_tables())
436438

437439
if con.name != "datafusion":
438440
table.count().execute()
@@ -472,6 +474,6 @@ def test_read_csv(con, data_directory, fname, in_table_name, out_table_name):
472474
fname = str(Path(fname).absolute())
473475
table = con.read_csv(fname, table_name=in_table_name)
474476

475-
assert any(t.startswith(out_table_name) for t in con.list_tables())
477+
assert any(out_table_name in t for t in con.list_tables())
476478
if con.name != "datafusion":
477479
table.count().execute()

ibis/expr/api.py

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -419,11 +419,7 @@ def _memtable_from_pyarrow_table(
419419
assert schema is None, "if `columns` is not `None` then `schema` must be `None`"
420420
schema = sch.Schema(dict(zip(columns, sch.infer(data).values())))
421421
return ops.InMemoryTable(
422-
name=(
423-
name
424-
if name is not None
425-
else util.generate_unique_table_name("pyarrow_memtable")
426-
),
422+
name=name if name is not None else util.gen_name("pyarrow_memtable"),
427423
schema=sch.infer(data) if schema is None else schema,
428424
data=PyArrowTableProxy(data),
429425
).to_expr()
@@ -451,11 +447,7 @@ def _memtable_from_dataframe(
451447
)
452448
df = df.rename(columns=dict(zip(cols, newcols)))
453449
op = ops.InMemoryTable(
454-
name=(
455-
name
456-
if name is not None
457-
else util.generate_unique_table_name("pandas_memtable")
458-
),
450+
name=name if name is not None else util.gen_name("pandas_memtable"),
459451
schema=sch.infer(df) if schema is None else schema,
460452
data=DataFrameProxy(df),
461453
)

ibis/util.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -538,6 +538,6 @@ def _absolufy_paths(name):
538538
return source
539539

540540

541-
def generate_unique_table_name(namespace: str) -> str:
542-
"""Creates case-insensitive uuid4 unique table name."""
541+
def gen_name(namespace: str) -> str:
542+
"""Create a case-insensitive uuid4 unique table name."""
543543
return f"_ibis_{namespace}_{np.base_repr(uuid.uuid4().int, 36)}".lower()

0 commit comments

Comments
 (0)