You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
I have a file with tons of histograms, and I need to read almost all of them.
The first step is to select only histograms. uproot is quite faster here, probably since I don't need recursion. Maybe you can help to find a better way to implement
import time
from collections.abc import Iterable
import uproot
#import logging
#logging.basicConfig(level=0)
def list_object(root_file, selection, strip_cycle=True) -> list[str]:
if isinstance(root_file, str):
root_file = uproot.open(root_file)
keys = [key for (key, classname) in root_file.classnames().items() if selection(key, classname)]
if strip_cycle:
keys = [key.split(";")[0] for key in keys]
return keys
def select_histo(key: str, classname: str) -> bool:
# return issubclass(type(item), uproot.behaviors.TH1.TH1)
return classname.startswith("TH1") or classname.startswith("TH2") or classname.startswith("TProfile")
def list_histograms(root_file, strip_cycle=True) -> list[str]:
if not root_file:
return []
return list_object(root_file, select_histo, strip_cycle=strip_cycle)
def iter_common_histograms(root_file, common_histograms:Iterable[str]):
f = uproot.open(root_file)
for histogram_name in common_histograms:
histograms = []
h = f.get(histogram_name)
if h is None:
raise ValueError(f"Histogram {histogram_name} not found in {f}.")
yield histogram_name, h
fn = "NTUP_PHYSVAL.43258939._000001.pool.root.1"
start_time = time.perf_counter()
hnames = list_histograms(fn)
print(len(hnames))
print(time.perf_counter() - start_time)
start_time = time.perf_counter()
for h in iter_common_histograms(fn, hnames):
pass
print(time.perf_counter() - start_time)
ROOT code
import time
import ROOT
def list_object(root_file: str | ROOT.TFile, selection) -> list[str]:
if isinstance(root_file, str):
root_file = ROOT.TFile(root_file)
if not root_file or root_file.IsZombie():
return []
obj_paths = []
def find_objects(directory, path=""):
keys = directory.GetListOfKeys()
for key in keys:
obj_name = key.GetName()
obj_path = f"{path}/{obj_name}" if path else obj_name
if ROOT.TClass(key.GetClassName()).InheritsFrom(ROOT.TDirectory.Class()):
directory = key.ReadObj()
find_objects(directory, obj_path)
elif selection(key):
obj_paths.append(obj_path)
find_objects(root_file)
return obj_paths
def select_histo(key: ROOT.TKey) -> bool:
return ROOT.TClass(key.GetClassName()).InheritsFrom(ROOT.TH1.Class())
def list_histograms(root_file: str | ROOT.TFile) -> list[str]:
return list_object(root_file, select_histo)
def iter_common_histograms(root_file, common_histograms: list[str]):
f = ROOT.TFile.Open(root_file)
for histogram_name in common_histograms:
h = f.Get(histogram_name)
if not h:
raise ValueError(f"Histogram {histogram_name} not found in {f.GetName()}.")
h.SetDirectory(0)
yield histogram_name, h
h.Delete()
fn = "NTUP_PHYSVAL.43258939._000001.pool.root.1"
start_time = time.perf_counter()
hnames = list_histograms(fn)
print(len(hnames))
print(time.perf_counter() - start_time)
start_time = time.perf_counter()
for h in iter_common_histograms(fn, hnames):
pass
print(time.perf_counter() - start_time)
Results uproot
16816
0.43960302799951023
16.232253045000107
Results ROOT
16816
0.6675683060002484
2.048757989000478
iter_common_histograms seems quite unuseful, but in my real case it is more complicated.
If I uncomment the logging lines I see tons of
I have a file with tons of histograms, and I need to read almost all of them.
The first step is to select only histograms. uproot is quite faster here, probably since I don't need recursion. Maybe you can help to find a better way to implement
by the way, my problem is that when looping on all the histograms uproot is much slower.
The input ROOT file is here: https://cernbox.cern.ch/s/vXi8Dx9cVWx8Net
uproot code:
ROOT code
Results uproot
Results ROOT
iter_common_histograms
seems quite unuseful, but in my real case it is more complicated.If I uncomment the logging lines I see tons of
The text was updated successfully, but these errors were encountered: