Skip to content

Commit 33c0ae2

Browse files
authored
Merge pull request #100 from kif/99_export_HDF5_to_ZIP
Export hdf5 to zip
2 parents e257206 + 8c2c509 commit 33c0ae2

File tree

4 files changed

+270
-45
lines changed

4 files changed

+270
-45
lines changed

src/freesas/app/extract_ascii.py

Lines changed: 84 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,8 @@
2626

2727
__author__ = "Jérôme Kieffer"
2828
__license__ = "MIT"
29-
__copyright__ = "2020, ESRF"
30-
__date__ = "15/01/2021"
29+
__copyright__ = "2020-2024, ESRF"
30+
__date__ = "16/04/2025"
3131

3232
import io
3333
import os
@@ -38,6 +38,7 @@
3838
import posixpath
3939
from collections import namedtuple, OrderedDict
4040
import json
41+
import zipfile
4142
import copy
4243
import pyFAI
4344
from pyFAI.io import Nexus
@@ -78,6 +79,13 @@ def parse():
7879
help="extract every individual frame",
7980
default=False,
8081
)
82+
parser.add_argument(
83+
"-z",
84+
"--zip",
85+
action="store_true",
86+
help="extract every individual frame into a zip-file",
87+
default=False,
88+
)
8189
return parser.parse_args()
8290

8391

@@ -87,11 +95,24 @@ def extract_averaged(filename):
8795
results["filename"] = filename
8896
# Missing: comment normalization
8997
with Nexus(filename, "r") as nxsr:
90-
entry_grp = nxsr.get_entries()[0]
98+
default = nxsr.h5.attrs.get("default")
99+
if default and default in nxsr.h5:
100+
entry_grp = nxsr.h5[default]
101+
else:
102+
entry_grp = nxsr.get_entries()[0]
91103
results["h5path"] = entry_grp.name
92-
nxdata_grp = nxsr.h5[entry_grp.attrs["default"]]
104+
# program = entry_grp.get("program_name")
105+
# if program == ""
106+
default = entry_grp.attrs["default"]
107+
if posixpath.split(default)[-1] == "hplc":
108+
default = posixpath.join(posixpath.split(default)[0],"results")
109+
# print(default)
110+
nxdata_grp = nxsr.h5[default]
93111
signal = nxdata_grp.attrs["signal"]
94112
axis = nxdata_grp.attrs["axes"]
113+
if not isinstance(axis, (str,bytes)):
114+
logger.error(f"There are several curves if the dataset {default} from file {filename}, please use the option --all to extract them all")
115+
sys.exit(1)
95116
results["I"] = nxdata_grp[signal][()]
96117
results["q"] = nxdata_grp[axis][()]
97118
results["std"] = nxdata_grp["errors"][()]
@@ -102,15 +123,12 @@ def extract_averaged(filename):
102123
results["geometry"] = json.loads(
103124
integration_grp["configuration/data"][()]
104125
)
105-
results["polarization"] = integration_grp[
106-
"configuration/polarization_factor"
107-
][()]
126+
results["polarization"] = integration_grp["configuration/polarization_factor"][()]
108127

109128
instrument_grps = nxsr.get_class(entry_grp, class_type="NXinstrument")
110129
if instrument_grps:
111-
detector_grp = nxsr.get_class(
112-
instrument_grps[0], class_type="NXdetector"
113-
)[0]
130+
detector_grp = nxsr.get_class(instrument_grps[0],
131+
class_type="NXdetector")[0]
114132
results["mask"] = detector_grp["pixel_mask"].attrs["filename"]
115133
sample_grp = nxsr.get_class(entry_grp, class_type="NXsample")[0]
116134
results["sample"] = posixpath.split(sample_grp.name)[-1]
@@ -119,52 +137,68 @@ def extract_averaged(filename):
119137
results["exposure temperature"] = sample_grp["temperature"][()]
120138
results["concentration"] = sample_grp["concentration"][()]
121139
if "2_correlation_mapping" in entry_grp:
122-
results["to_merge"] = entry_grp[
123-
"2_correlation_mapping/results/to_merge"
124-
][()]
140+
results["to_merge"] = entry_grp["2_correlation_mapping/results/to_merge"][()]
125141
return results
126142

127143

128144
def extract_all(filename):
129-
"return some infomations extracted from a HDF5 file for all individual frames"
145+
"""return some infomations extracted from a HDF5 file for all individual frames.
146+
Supports HPLC and SC and freshly integrated blocks of frames"""
130147
res = []
131148
results = OrderedDict()
132149
results["filename"] = filename
133150
with Nexus(filename, "r") as nxsr:
134-
entry_grp = nxsr.get_entries()[0]
151+
default = nxsr.h5.attrs.get("default")
152+
if default and default in nxsr.h5:
153+
entry_grp = nxsr.h5[default]
154+
else:
155+
entry_grp = nxsr.get_entries()[0]
156+
program = entry_grp.get("program_name")[()].decode() if "program_name" in entry_grp else None
157+
158+
if program == "bm29.hplc":
159+
target = "1_chromatogram"
160+
elif program == "bm29.integratemultiframe":
161+
target = "1_integration"
162+
elif program == "bm29.subtract":
163+
target = "3_azimuthal_integration"
164+
else:
165+
raise RuntimeError(f"Unable to read file written by {program}")
166+
target = posixpath.join(entry_grp.name, target, "results")
135167
results["h5path"] = entry_grp.name
136-
nxdata_grp = nxsr.h5[entry_grp.name + "/1_integration/results"]
168+
nxdata_grp = nxsr.h5[target]
137169
signal = nxdata_grp.attrs["signal"]
138170
axis = nxdata_grp.attrs["axes"][1]
139171
I = nxdata_grp[signal][()]
140172
results["q"] = nxdata_grp[axis][()]
141173
std = nxdata_grp["errors"][()]
142-
results["unit"] = pyFAI.units.to_unit(
143-
axis + "_" + nxdata_grp[axis].attrs["units"]
144-
)
174+
try:
175+
results["unit"] = pyFAI.units.to_unit(axis + "_" + nxdata_grp[axis].attrs["units"])
176+
except KeyError:
177+
logger.warning("Unable to parse radial units")
145178
integration_grp = nxdata_grp.parent
146-
results["geometry"] = json.loads(
147-
integration_grp["configuration/data"][()]
148-
)
149-
results["polarization"] = integration_grp[
150-
"configuration/polarization_factor"
151-
][()]
152-
instrument_grp = nxsr.get_class(entry_grp, class_type="NXinstrument")[
153-
0
154-
]
155-
detector_grp = nxsr.get_class(instrument_grp, class_type="NXdetector")[
156-
0
157-
]
158-
results["mask"] = detector_grp["pixel_mask"].attrs["filename"]
159-
sample_grp = nxsr.get_class(entry_grp, class_type="NXsample")[0]
160-
results["sample"] = posixpath.split(sample_grp.name)[-1]
161-
results["buffer"] = sample_grp["buffer"][()]
162-
if "temperature_env" in sample_grp:
163-
results["storage temperature"] = sample_grp["temperature_env"][()]
164-
if "temperature" in sample_grp:
165-
results["exposure temperature"] = sample_grp["temperature"][()]
166-
if "concentration" in sample_grp:
167-
results["concentration"] = sample_grp["concentration"][()]
179+
if "configuration/data" in integration_grp:
180+
results["geometry"] = json.loads(integration_grp["configuration/data"][()])
181+
else:
182+
logger.warning("Unable to parse AzimuthalIntegrator configuration")
183+
if "configuration/polarization_factor" in integration_grp:
184+
results["polarization"] = integration_grp["configuration/polarization_factor"][()]
185+
instrument_grp = nxsr.get_class(entry_grp, class_type="NXinstrument")
186+
if instrument_grp:
187+
detector_grp = nxsr.get_class(instrument_grp[0], class_type="NXdetector")
188+
if detector_grp:
189+
results["mask"] = detector_grp[0]["pixel_mask"].attrs["filename"]
190+
sample_grp = nxsr.get_class(entry_grp, class_type="NXsample")
191+
if sample_grp:
192+
sample_grp = sample_grp[0]
193+
results["sample"] = posixpath.split(sample_grp.name)[-1]
194+
if "buffer" in sample_grp:
195+
results["buffer"] = sample_grp["buffer"][()]
196+
if "temperature_env" in sample_grp:
197+
results["storage temperature"] = sample_grp["temperature_env"][()]
198+
if "temperature" in sample_grp:
199+
results["exposure temperature"] = sample_grp["temperature"][()]
200+
if "concentration" in sample_grp:
201+
results["concentration"] = sample_grp["concentration"][()]
168202
# if "2_correlation_mapping" in entry_grp:
169203
# results["to_merge"] = entry_grp["2_correlation_mapping/results/to_merge"][()]
170204
for i, s in zip(I, std):
@@ -326,15 +360,23 @@ def main():
326360
input_len = len(files)
327361
logger.debug("%s input files", input_len)
328362
for src in files:
363+
print(f"{src} \t --> ", end="")
329364
if args.all:
330365
dest = os.path.splitext(src)[0] + "%04i.dat"
331366
for idx, frame in enumerate(extract_all(src)):
332367
print(src, " --> ", dest % idx)
333368
write_ascii(frame, dest % idx)
369+
print(dest)
370+
elif args.zip:
371+
dest = os.path.splitext(src)[0] + ".zip"
372+
with zipfile.ZipFile(dest, "w") as z:
373+
for idx, frame in enumerate(extract_all(src)):
374+
z.writestr(f'frame_{idx:04d}.dat', write_ascii(frame))
375+
print(dest)
334376
else:
335377
dest = os.path.splitext(src)[0] + ".dat"
336378
write_ascii(extract_averaged(src), dest)
337-
print(src, " --> ", dest)
379+
print(dest)
338380

339381

340382
if __name__ == "__main__":

src/freesas/meson.build

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ py.install_sources([
2121
'sasio.py',
2222
'transformations.py',
2323
'dnn.py',
24+
'nexus_parser.py',
2425
],
2526
pure: false, # Will be installed next to binaries
2627
subdir: 'freesas' # Folder relative to site-packages to install to

src/freesas/nexus_parser.py

Lines changed: 182 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,182 @@
1+
__author__ = "Jérôme Kieffer"
2+
__license__ = "MIT"
3+
__copyright__ = "2017, ESRF"
4+
__date__ = "20/01/2025"
5+
6+
import sys, os
7+
import zipfile
8+
import posixpath
9+
import logging
10+
from typing import Union
11+
from silx.io.nxdata import NXdata
12+
from dataclasses import dataclass
13+
import numpy
14+
logger = logging.getLogger(__name__)
15+
16+
try:
17+
import h5py
18+
except ImportError:
19+
logger.error("H5py is mandatory to parse HDF5 files")
20+
h5py = None
21+
22+
23+
@dataclass
24+
class IntegratedPattern:
25+
"""Store one pyFAI integrated pattern"""
26+
27+
point: Union[float, int, None]
28+
radial: numpy.ndarray
29+
intensity: numpy.ndarray
30+
intensity_errors: Union[numpy.ndarray, None]=None
31+
radial_name: str = ""
32+
radial_units: str = ""
33+
intensity_name: str = ""
34+
intensity_units: str = ""
35+
36+
def __repr__(self):
37+
line = f"# {self.radial_name}({self.radial_units}) \t {self.intensity_name}({self.intensity_units})"
38+
if self.intensity_errors is not None:
39+
line += " \t uncertainties"
40+
res = [line]
41+
if self.intensity_errors is None:
42+
for q,i,s in zip(self.radial, self.intensity):
43+
res.append(f"{q} \t {i}")
44+
else:
45+
for q,i,s in zip(self.radial, self.intensity, self.intensity_errors):
46+
res.append(f"{q} \t {i} \t {s}")
47+
return os.linesep.join(res)
48+
49+
50+
def read_nexus_integrated_patterns(group):
51+
"""Read integrated patterns from a HDF5 NXdata group.
52+
53+
It reads from both single (1D signal) or multi (2D signal) NXdata.
54+
:param group : h5py.Group
55+
:return: list of IntegratedPattern instances.
56+
"""
57+
nxdata = NXdata(group)
58+
if not nxdata.is_valid:
59+
raise RuntimeError(
60+
f"Cannot parse NXdata group: {group.file.filename}::{group.name}"
61+
)
62+
if not (nxdata.signal_is_1d or nxdata.signal_is_2d):
63+
raise RuntimeError(
64+
f"Signal is not a 1D or 2D dataset: {group.file.filename}::{group.name}"
65+
)
66+
67+
if nxdata.signal_is_1d:
68+
points = [None]
69+
else: # 2d
70+
if nxdata.axes[0] is None:
71+
points = [None] * nxdata.signal.shape[0]
72+
else:
73+
points = nxdata.axes[0][()]
74+
75+
if nxdata.axes[-1] is None:
76+
radial = numpy.arange(nxdata.signal.shape[1])
77+
radial_units = ""
78+
radial_name = ""
79+
else:
80+
axis_dataset = nxdata.axes[-1]
81+
radial = axis_dataset[()]
82+
radial_name = axis_dataset.name.split("/")[-1]
83+
radial_units = axis_dataset.attrs.get("units", "")
84+
85+
intensities = numpy.atleast_2d(nxdata.signal)
86+
intensity_name = nxdata.signal.name.split("/")[-1]
87+
intensity_units = nxdata.signal.attrs.get("units", "")
88+
89+
90+
if nxdata.errors is None:
91+
errors = [None] * intensities.shape[0]
92+
else:
93+
errors = numpy.atleast_2d(nxdata.errors)
94+
95+
if (len(points), len(radial)) != intensities.shape:
96+
raise RuntimeError("Shape mismatch between axes and signal")
97+
98+
return [IntegratedPattern(
99+
point, radial, intensity, intensity_errors, radial_name, radial_units, intensity_name, intensity_units) for point, intensity, intensity_errors in zip(points, intensities, errors)]
100+
101+
102+
class Tree:
103+
def __init__(self, root=None):
104+
self.root = root or {}
105+
self.skip = set()
106+
def visit_item(self, name, obj):
107+
if name in self.skip:
108+
return
109+
node = self.root
110+
path = [i.replace(" ","_") for i in name.split("/")]
111+
for i in path[:-1]:
112+
if i not in node:
113+
node[i] = {}
114+
node = node[i]
115+
if isinstance(obj, h5py.Group):
116+
if obj.attrs.get("NX_class") == "NXdata" and "errors" in obj:
117+
try:
118+
node[path[-1]] = read_nexus_integrated_patterns(obj)
119+
except (KeyError, OSError) as err:
120+
print(f"{type(err).__name__}: {err} while readding {path}")
121+
for key in obj:
122+
self.skip.add(posixpath.join(name,key))
123+
if isinstance(obj[key], h5py.Group):
124+
for sub in obj[key]:
125+
self.skip.add(posixpath.join(name,key, sub))
126+
else:
127+
node[path[-1]] = {}
128+
if isinstance(obj, h5py.Dataset):
129+
if len(obj.shape) <= 1:
130+
node[path[-1]] = obj[()]
131+
132+
def save(self, filename):
133+
with zipfile.ZipFile(filename, "w") as z:
134+
def write(path, name, obj):
135+
new_path = posixpath.join(path, name)
136+
if isinstance(obj, dict):
137+
if sys.version_info>=(3,11): z.mkdir(new_path)
138+
for key, value in obj.items():
139+
write(new_path, key, value)
140+
elif isinstance(obj, numpy.ndarray):
141+
if obj.ndim == 1:
142+
z.writestr(new_path, os.linesep.join(str(i) for i in obj))
143+
else:
144+
z.writestr(new_path, str(obj))
145+
elif isinstance(obj, list):
146+
if sys.version_info>=(3,11): z.mkdir(new_path)
147+
if len(obj)==1:
148+
fname = new_path+"/biosaxs.dat"
149+
z.writestr(fname, str(obj[0]))
150+
else:
151+
for i,j in enumerate(obj):
152+
fname = new_path+f"/bioxaxs_{i:04d}.dat"
153+
z.writestr(fname, str(j))
154+
elif isinstance(obj, (int, float, numpy.number, bool, numpy.bool)):
155+
z.writestr(new_path, str(obj))
156+
elif isinstance(obj, (str, bytes)):
157+
z.writestr(new_path, obj)
158+
else:
159+
print(f"skip {new_path} for {obj} of type {obj.__class__.__mro__}")
160+
161+
root = self.root
162+
for key, value in root.items():
163+
write("", key, value)
164+
def get(self, path):
165+
node = self.root
166+
for i in path.split("/"):
167+
node = node[i]
168+
return node
169+
170+
def convert_nexus2zip(nexusfile, outfile=None):
171+
""" Convert a nexus-file, as produced by BM29 beamline into a zip file
172+
173+
:param nexusfile: string with the path of the input file
174+
:param outfile: name of the output file, unless, just replace the extension with ".zip"
175+
:return: nothing, maybe an error code ?
176+
"""
177+
tree = Tree()
178+
with h5py.File(nexusfile, "r") as h:
179+
h.visititems(tree.visit_item)
180+
outfile = outfile or (os.path.splitext(nexusfile)[0]+".h5")
181+
tree.save(outfile)
182+

0 commit comments

Comments
 (0)