Skip to content

Adding possibility to add a list of files to TRestDataSet #514

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 9 commits into
base: master
Choose a base branch
from
Open
8 changes: 4 additions & 4 deletions source/framework/core/inc/TRestDataSet.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ class TRestDataSet : public TRestMetadata {
std::string fFilterEndTime = "3000/12/31"; //<

/// A glob file pattern that must be satisfied by all files
std::string fFilePattern = ""; //<
std::vector<std::string> fFilePatternList; //<

/// It contains a list of the observables that will be added to the final tree or exported file
std::vector<std::string> fObservablesList; //<
Expand Down Expand Up @@ -167,7 +167,7 @@ class TRestDataSet : public TRestMetadata {
inline auto GetFilterEndTime() const { return fFilterEndTime; }
inline auto GetStartTime() const { return fStartTime; }
inline auto GetEndTime() const { return fEndTime; }
inline auto GetFilePattern() const { return fFilePattern; }
inline auto GetFilePattern() const { return fFilePatternList; }
inline auto GetObservablesList() const { return fObservablesList; }
inline auto GetFileSelection() const { return fFileSelection; }
inline auto GetProcessObservablesList() const { return fProcessObservablesList; }
Expand All @@ -182,7 +182,7 @@ class TRestDataSet : public TRestMetadata {
inline auto IsMergedDataSet() const { return fMergedDataset; }

inline void SetObservablesList(const std::vector<std::string>& obsList) { fObservablesList = obsList; }
inline void SetFilePattern(const std::string& pattern) { fFilePattern = pattern; }
inline void SetFilePattern(const std::string& pattern) { fFilePatternList.push_back(pattern); }
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In principle if it is adding an entry to the list, the method name should be like "AddFilePattern"...

inline void SetQuantity(const std::map<std::string, RelevantQuantity>& quantity) { fQuantity = quantity; }

void SetTotalTimeInSeconds(Double_t seconds) { fTotalDuration = seconds; }
Expand All @@ -209,6 +209,6 @@ class TRestDataSet : public TRestMetadata {
TRestDataSet(const char* cfgFileName, const std::string& name = "");
~TRestDataSet();

ClassDefOverride(TRestDataSet, 7);
ClassDefOverride(TRestDataSet, 8);
};
#endif
27 changes: 19 additions & 8 deletions source/framework/core/src/TRestDataSet.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -389,16 +389,20 @@ std::vector<std::string> TRestDataSet::FileSelection() {
return fFileSelection;
}

std::vector<std::string> fileNames = TRestTools::GetFilesMatchingPattern(fFilePattern);
std::vector<std::string> fileList;
for (const auto& pattern : fFilePatternList) {
auto list = TRestTools::GetFilesMatchingPattern(pattern);
fileList.insert(end(fileList), begin(list), end(list));
}

RESTInfo << "TRestDataSet::FileSelection. Starting file selection." << RESTendl;
RESTInfo << "Total files : " << fileNames.size() << RESTendl;
RESTInfo << "Total files : " << fileList.size() << RESTendl;
RESTInfo << "This process may take long computation time in case there are many files." << RESTendl;

fTotalDuration = 0;
std::cout << "Processing file selection.";
int cnt = 1;
for (const auto& file : fileNames) {
for (const auto& file : fileList) {
if (cnt % 100 == 0) {
std::cout << std::endl;
std::cout << "Files processed: " << cnt << " ." << std::flush;
Expand Down Expand Up @@ -558,8 +562,10 @@ void TRestDataSet::PrintMetadata() {

RESTMetadata << " - StartTime : " << REST_StringHelper::ToDateTimeString(fStartTime) << RESTendl;
RESTMetadata << " - EndTime : " << REST_StringHelper::ToDateTimeString(fEndTime) << RESTendl;
RESTMetadata << " - Path : " << TRestTools::SeparatePathAndName(fFilePattern).first << RESTendl;
RESTMetadata << " - File pattern : " << TRestTools::SeparatePathAndName(fFilePattern).second << RESTendl;
for (const auto& pattern : fFilePatternList) {
RESTMetadata << " - Path : " << TRestTools::SeparatePathAndName(pattern).first << RESTendl;
RESTMetadata << " - File pattern : " << TRestTools::SeparatePathAndName(pattern).second << RESTendl;
}
RESTMetadata << " " << RESTendl;
RESTMetadata << " - Accumulated run time (seconds) : " << fTotalDuration << RESTendl;
RESTMetadata << " - Accumulated run time (hours) : " << fTotalDuration / 3600. << RESTendl;
Expand Down Expand Up @@ -655,6 +661,9 @@ void TRestDataSet::PrintMetadata() {
void TRestDataSet::InitFromConfigFile() {
TRestMetadata::InitFromConfigFile();

std::string filePattern = GetParameter("filePattern", "");
if (!filePattern.empty()) fFilePatternList.push_back(filePattern);

/// Reading filters
TiXmlElement* filterDefinition = GetElement("filter");
while (filterDefinition != nullptr) {
Expand Down Expand Up @@ -843,8 +852,10 @@ void TRestDataSet::Export(const std::string& filename, std::vector<std::string>
fprintf(f, "### Accumulated run time (hours) : %lf\n", fTotalDuration / 3600.);
fprintf(f, "### Accumulated run time (days) : %lf\n", fTotalDuration / 3600. / 24.);
fprintf(f, "###\n");
fprintf(f, "### Data path : %s\n", TRestTools::SeparatePathAndName(fFilePattern).first.c_str());
fprintf(f, "### File pattern : %s\n", TRestTools::SeparatePathAndName(fFilePattern).second.c_str());
for (const auto& pattern : fFilePatternList) {
fprintf(f, "### Data path : %s\n", TRestTools::SeparatePathAndName(pattern).first.c_str());
fprintf(f, "### File pattern : %s\n", TRestTools::SeparatePathAndName(pattern).second.c_str());
}
fprintf(f, "###\n");
if (!fFilterMetadata.empty()) {
fprintf(f, "### Metadata filters : \n");
Expand Down Expand Up @@ -926,7 +937,7 @@ TRestDataSet& TRestDataSet::operator=(TRestDataSet& dS) {
fFilterEndTime = dS.GetFilterEndTime();
fStartTime = dS.GetStartTime();
fEndTime = dS.GetEndTime();
fFilePattern = dS.GetFilePattern();
fFilePatternList = dS.GetFilePattern();
fObservablesList = dS.GetObservablesList();
fFileSelection = dS.GetFileSelection();
fProcessObservablesList = dS.GetProcessObservablesList();
Expand Down