Skip to content

Commit b14721f

Browse files
authored
Merge pull request #312 from MC-kit/devel
Stop using xarray
2 parents 0d0e422 + cb11aac commit b14721f

27 files changed

+408
-1318
lines changed

README.rst

+52-7
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
==============================================================================
2-
*xpypact*: FISPACT output to datasets converter
2+
*xpypact*: FISPACT output to Polars or DuckDB converter
33
==============================================================================
44

55

@@ -16,22 +16,26 @@
1616
Description
1717
-----------
1818

19-
The module loads FISPACT JSON output as xarray dataset.
19+
The module loads FISPACT JSON output files and converts to Polars dataframes
20+
with minor data normalization.
2021
This allows efficient data extraction and aggregation.
22+
Multiple JSON files can be combined using simple additional identification for different
23+
FISPACT runs. So far we use just two-dimensional identification by material
24+
and case. The case usually identifies certain neutron flux.
25+
2126

2227
Implemented functionality
2328
-------------------------
2429

2530
- export to DuckDB
2631
- export to parquet files
2732

28-
.. configures and runs FISPACT, converts FISPACT output to xarray datasets.
29-
3033
.. note::
3134

3235
Currently available FISPACT v.5 API uses rather old python version (3.6).
33-
That prevents direct use of their API in our package (>=3.8).
36+
That prevents direct use of their API in our package (>=3.10).
3437
Check if own python integration with FISPACT is reasonable and feasible.
38+
Or provide own FISPACT python binding.
3539

3640

3741
Installation
@@ -61,9 +65,50 @@ From source
6165
Examples
6266
--------
6367

64-
.. note::
68+
.. code-block::
69+
70+
from xpypact import FullDataCollector, Inventory
71+
72+
def get_material_id(p: Path) -> int:
73+
...
74+
75+
def get_case_id(p: Path) -> int:
76+
...
77+
78+
jsons = [path1, path2, ...]
79+
material_ids = {p: get_material_id(p) for p in jsons }
80+
case_ids = {c:: get_case_id(p) for p in jsons
81+
82+
collector = FullDataCollector()
83+
84+
for json in jsons:
85+
inventory = Inventory.from_json(json)
86+
collector.append(inventory, material_id=material_ids[json], case_id=case_ids[json])
87+
88+
collected = collector.get_result()
89+
90+
# save to parquet files
91+
92+
collected.save_to_parquets(Path.cwd() / "parquets")
93+
94+
# or use DuckDB database
95+
96+
import from xpypact.dao save
97+
import duckdb as db
98+
99+
con = db.connect()
100+
save(con, collected)
101+
102+
gamma_from_db = con.sql(
103+
"""
104+
select
105+
g, rate
106+
from timestep_gamma
107+
where material_id = 1 and case_id = 54 and time_step_number = 7
108+
order by g
109+
""",
110+
).fetchall()
65111
66-
Add examples
67112
68113
Contributing
69114
------------

adhoc/demo_duckdb_multithreading.py

+1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
33
From: https://duckdb.org/docs/guides/python/multiple_threads.html
44
"""
5+
56
from __future__ import annotations
67

78
import random

adhoc/demo_duckdb_parquet_access.py

+1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
"""TODO..."""
2+
23
from __future__ import annotations
34

45
from pathlib import Path

benchmarks/test_inventory.py

+1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
33
See https://pytest-benchmark.readthedocs.io/en/latest/index.html
44
"""
5+
56
from __future__ import annotations
67

78
from typing import TYPE_CHECKING

noxfile.py

+1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
"""Nox sessions."""
2+
23
from __future__ import annotations
34

45
from typing import TYPE_CHECKING, Final

poetry.lock

+256-357
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

+3-8
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "xpypact"
3-
version = "0.10.0"
3+
version = "0.11.0"
44
description = "\"Python workflow framework for FISPACT.\""
55
authors = ["dvp <[email protected]>"]
66
license = "MIT"
@@ -48,21 +48,16 @@ Changelog = "https://github.com/MC-kit/xpypact/releases"
4848

4949

5050
[tool.poetry.dependencies]
51-
# msgspec-0.18.5 doesn't work on 3.9 - uses | without importing annotations
51+
# msgspec-0.18.5 doesn't work on 3.9 - uses `|` without importing annotations from __future__
5252
# duckdb-0.9.2, has no wheels for 3.12 and fails to build from source
53-
python = ">=3.10,<3.13"
53+
python = ">=3.9,<3.13"
5454
duckdb = ">=0.8.0"
55-
h5netcdf = ">=0.13.1"
5655
# mckit-nuclides = {version = ">=0.2.5", allow-prereleases = true}
5756
numpy = ">=1.26.0"
58-
openpyxl = ">=3.0.9"
59-
pandas = ">=2.0.0"
60-
xarray = ">=2022.3.0"
6157
multipledispatch = ">=0.6.0"
6258
msgspec = ">=0.18.5"
6359
rich = ">=13.7.0"
6460
polars = {version = "^0.20.3", extras = ["all"]}
65-
pyarrow = "^14.0.2"
6661
mckit-nuclides = "^0.3.0"
6762

6863
[tool.poetry.group.dev.dependencies]

src/xpypact/__init__.py

+29-8
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,23 @@
22
33
Wraps FISPACT workflow. Transforms FISPACT output to xarray datasets.
44
"""
5+
56
from __future__ import annotations
67

78
from importlib import metadata as _meta
89
from importlib.metadata import PackageNotFoundError, version
910

10-
from .data_arrays import from_json, scale_by_flux, scale_by_mass
11+
from .collector import (
12+
FullDataCollector,
13+
GammaSchema,
14+
NuclideSchema,
15+
RunDataSchema,
16+
TimeStepNuclideSchema,
17+
TimeStepSchema,
18+
)
19+
from .inventory import Inventory, RunDataCorrected
20+
from .nuclide import Nuclide, NuclideInfo
21+
from .time_step import DoseRate, GammaSpectrum, TimeStep
1122

1223
try:
1324
__version__ = version(__name__)
@@ -23,15 +34,25 @@
2334
__copyright__ = f"Copyright 2021 {__author__}"
2435

2536
__all__ = [
26-
"__version__",
27-
"__distribution__",
28-
"__meta_data__",
37+
"DoseRate",
38+
"FullDataCollector",
39+
"GammaSchema",
40+
"GammaSpectrum",
41+
"Inventory",
42+
"Nuclide",
43+
"NuclideInfo",
44+
"NuclideSchema",
45+
"RunDataCorrected",
46+
"RunDataSchema",
47+
"TimeStep",
48+
"TimeStepNuclideSchema",
49+
"TimeStepSchema",
2950
"__author__",
3051
"__author_email__",
52+
"__copyright__",
53+
"__distribution__",
3154
"__license__",
55+
"__meta_data__",
3256
"__summary__",
33-
"__copyright__",
34-
"from_json",
35-
"scale_by_flux",
36-
"scale_by_mass",
57+
"__version__",
3758
]

src/xpypact/collector.py

+6-5
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
"""Collect data from multiple inventories to Polars tables."""
2+
23
from __future__ import annotations
34

45
from typing import TYPE_CHECKING
56

6-
import datetime
7+
import datetime as dt
78
import sys
89
import threading
910

@@ -25,9 +26,9 @@
2526

2627

2728
if sys.version_info >= (3, 11): # pragma: no cover
28-
UTC = datetime.UTC
29+
UTC = dt.UTC
2930
else:
30-
UTC = datetime.timezone.utc # pragma: no cover
31+
UTC = dt.timezone.utc # pragma: no cover
3132

3233
RunDataSchema = OrderedDict(
3334
material_id=pl.UInt32,
@@ -146,14 +147,14 @@ def append(self, inventory: Inventory, material_id: int, case_id: int) -> FullDa
146147
def _append_rundata(self, inventory, material_id, case_id):
147148
rundata = inventory.meta_info
148149
st = strptime(rundata.timestamp, "%H:%M:%S %d %B %Y")
149-
ts = datetime.datetime(
150+
ts = dt.datetime( # noqa: DTZ001 - no tzinfo is available from the FISPACT output
150151
year=st.tm_year,
151152
month=st.tm_mon,
152153
day=st.tm_mday,
153154
hour=st.tm_hour,
154155
minute=st.tm_min,
155156
second=st.tm_sec,
156-
tzinfo=UTC,
157+
tzinfo=None,
157158
)
158159
rundata_df = pl.DataFrame(
159160
[

src/xpypact/dao/__init__.py

+1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
"""Interface and implementations of data access objects (DAO)."""
2+
23
from __future__ import annotations
34

45
from .api import DataAccessInterface

src/xpypact/dao/api.py

+1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
"""Interface to data access facilities."""
2+
23
from __future__ import annotations
34

45
from typing import TYPE_CHECKING

src/xpypact/dao/duckdb/__init__.py

+1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
"""DAO implementation for DuckDB."""
2+
23
from __future__ import annotations
34

45
from .implementation import DuckDBDAO, create_indices, save

src/xpypact/dao/duckdb/implementation.py

+5-7
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
"""Code to implement DuckDB DAO."""
2+
23
from __future__ import annotations
34

45
from typing import TYPE_CHECKING
@@ -9,7 +10,6 @@
910

1011
if TYPE_CHECKING:
1112
import duckdb as db
12-
import pandas as pd
1313

1414
from xpypact.collector import FullDataCollector
1515

@@ -29,9 +29,9 @@ class DuckDBDAO(ms.Struct):
2929

3030
con: db.DuckDBPyConnection
3131

32-
def get_tables_info(self) -> pd.DataFrame:
32+
def get_tables_info(self) -> db.DuckDBPyRelation:
3333
"""Get information on tables in schema."""
34-
return self.con.execute("select * from information_schema.tables").df()
34+
return self.con.sql("select * from information_schema.tables")
3535

3636
def tables(self) -> tuple[str, str, str, str, str]:
3737
"""List tables being used by xpypact dao.
@@ -43,13 +43,11 @@ def tables(self) -> tuple[str, str, str, str, str]:
4343

4444
def has_schema(self) -> bool:
4545
"""Check if the schema is available in a database."""
46-
db_tables = self.get_tables_info()
46+
table_names = self.get_tables_info().select("table_name").fetchnumpy()["table_name"]
4747

48-
if len(db_tables) < len(self.tables()):
48+
if len(table_names) < len(self.tables()):
4949
return False
5050

51-
table_names = db_tables["table_name"].to_numpy()
52-
5351
return all(name in table_names for name in self.tables())
5452

5553
def create_schema(self) -> None:

0 commit comments

Comments
 (0)