Skip to content

Commit a5a51d3

Browse files
Merge pull request #503 from JulienDoerner/Interrupt
Handling of interruptions
2 parents 61a6257 + ea66e0e commit a5a51d3

File tree

11 files changed

+873
-10
lines changed

11 files changed

+873
-10
lines changed

.github/workflows/test_examples.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,8 @@ jobs:
6767
[ "$file" = "MHD_modelsipynb.py" ] ||
6868
[ "$file" = "density_grid_samplingipynb.py" ] ||
6969
[ "$file" = "lensing_crv4ipynb.py" ] ||
70+
[ "$file" = "interrupt_candidateVectoripynb.py" ] ||
71+
[ "$file" = "interrupt_sourceipynb.py" ] ||
7072
[ "$file" = "lensing_mapsv4ipynb.py" ]; then
7173
echo "skip file $file"
7274
else

doc/index.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ Contents
3131
pages/acceleration.rst
3232
pages/extending_crpropa.rst
3333
pages/example_notebooks/propagation_comparison/Propagation_Comparison_CK_BP.ipynb
34+
pages/interrupting-simulations.rst
3435
pages/AdditionalResources.rst
3536

3637

doc/pages/example_notebooks/interrupting_simulations/interrupt_candidateVector.ipynb

Lines changed: 396 additions & 0 deletions
Large diffs are not rendered by default.

doc/pages/example_notebooks/interrupting_simulations/interrupt_source.ipynb

Lines changed: 355 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
Interrupting simulations on runtime
2+
------------------------------------------------
3+
4+
CRPropa simulations can be interrupted on runtime with the `SIGTERM` or `SIGINT` signals.
5+
If the user defines an output for the interruption (called `InterruptAction`) all candidates which are currently in the simulation will be passed to this output.
6+
In the error stream the user will see a message denoting the number of candidates which have not been started yet.
7+
If the simulation was run with a `candidateVector` as source, the indices of the candidates which have not been started yet will be printed or written to the file.
8+
For a simulation with a source interface, a restart with the missing number of candidates will be sufficient to continue the simulation.
9+
10+
.. toctree::
11+
:caption: Using a candidateVector as source
12+
:maxdepth: 1
13+
14+
example_notebooks/interrupting_simulations/interrupt_candidateVector.ipynb
15+
16+
.. toctree::
17+
:caption: Using a source interface
18+
:maxdepth: 1
19+
20+
example_notebooks/interrupting_simulations/interrupt_source.ipynb
21+
22+

include/crpropa/ModuleList.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
#include "crpropa/Candidate.h"
55
#include "crpropa/Module.h"
66
#include "crpropa/Source.h"
7+
#include "crpropa/module/Output.h"
78

89
#include <list>
910
#include <sstream>
@@ -47,9 +48,15 @@ class ModuleList: public Module {
4748
iterator end();
4849
const_iterator end() const;
4950

51+
void setInterruptAction(Output* action);
52+
void dumpCandidate(Candidate* cand) const;
53+
5054
private:
5155
module_list_t modules;
5256
bool showProgress;
57+
Output* interruptAction;
58+
bool haveInterruptAction = false;
59+
std::vector<int> notFinished; // list with not finished numbers of candidates
5360
};
5461

5562
/**

include/crpropa/module/Output.h

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -52,16 +52,18 @@ namespace crpropa {
5252
They can be easily customised by enabling/disabling specific columns.
5353
*/
5454
class Output: public Module {
55-
protected:
56-
double lengthScale, energyScale;
57-
std::bitset<64> fields;
58-
55+
public:
5956
struct Property
6057
{
6158
std::string name;
6259
std::string comment;
6360
Variant defaultValue;
6461
};
62+
63+
protected:
64+
double lengthScale, energyScale;
65+
std::bitset<64> fields;
66+
6567
std::vector<Property> properties;
6668

6769
bool oneDimensional;
@@ -163,6 +165,18 @@ class Output: public Module {
163165
size_t size() const;
164166

165167
void process(Candidate *) const;
168+
169+
/**
170+
* write the indices of not started candidates into the output file.
171+
* Used for interrupting the simulation
172+
* @param indices list of not started indices
173+
*/
174+
virtual void dumpIndexList(std::vector<int> indices) {
175+
std::cout << "indices:\t";
176+
for (int i = 0; i < indices.size(); i++)
177+
std::cout << indices[i] << ", ";
178+
std::cout << "\n";
179+
};
166180
};
167181

168182
/** @}*/

include/crpropa/module/TextOutput.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,8 @@ class TextOutput: public Output {
7171
*/
7272
static void load(const std::string &filename, ParticleCollector *collector);
7373
std::string getDescription() const;
74+
75+
void dumpIndexList(std::vector<int> indicies);
7476
};
7577
/** @}*/
7678

python/2_headers.i

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -279,6 +279,11 @@
279279
%feature("director") crpropa::AbstractCondition;
280280
%include "crpropa/Module.h"
281281

282+
%template(OutputRefPtr) crpropa::ref_ptr<Output>;
283+
%feature("director") crpropa::Output;
284+
%ignore crpropa::Output::dumpIndexList(std::vector<int>);
285+
%include "crpropa/module/Output.h"
286+
282287
%implicitconv crpropa::ref_ptr<crpropa::MagneticField>;
283288
%template(MagneticFieldRefPtr) crpropa::ref_ptr<crpropa::MagneticField>;
284289
%feature("director") crpropa::MagneticField;
@@ -394,8 +399,6 @@
394399
}
395400
}
396401

397-
398-
%include "crpropa/module/Output.h"
399402
%include "crpropa/module/DiffusionSDE.h"
400403
%include "crpropa/module/TextOutput.h"
401404
%include "crpropa/module/HDF5Output.h"

src/ModuleList.cpp.in

Lines changed: 51 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
#include <algorithm>
1010
#include <csignal>
11+
#include <bits/stdc++.h>
1112
#ifndef sighandler_t
1213
typedef void (*sighandler_t)(int);
1314
#endif
@@ -87,6 +88,10 @@ void ModuleList::run(Candidate* candidate, bool recursive, bool secondariesFirst
8788
run(candidate->secondaries[i], recursive, secondariesFirst);
8889
}
8990
}
91+
92+
// dump candidae and secondaries if interrupted.
93+
if (candidate->isActive() && (g_cancel_signal_flag != 0))
94+
dumpCandidate(candidate);
9095
}
9196

9297
void ModuleList::run(ref_ptr<Candidate> candidate, bool recursive, bool secondariesFirst) {
@@ -114,8 +119,11 @@ void ModuleList::run(const candidate_vector_t *candidates, bool recursive, bool
114119

115120
#pragma omp parallel for schedule(OMP_SCHEDULE)
116121
for (size_t i = 0; i < count; i++) {
117-
if (g_cancel_signal_flag != 0)
122+
if (g_cancel_signal_flag != 0) {
123+
#pragma omp critical(interrupt_write)
124+
notFinished.push_back(i);
118125
continue;
126+
}
119127

120128
try {
121129
run(candidates->operator[](i), recursive);
@@ -132,8 +140,18 @@ void ModuleList::run(const candidate_vector_t *candidates, bool recursive, bool
132140
::signal(SIGINT, old_sigint_handler);
133141
::signal(SIGTERM, old_sigterm_handler);
134142
// Propagate signal to old handler.
135-
if (g_cancel_signal_flag > 0)
143+
if (g_cancel_signal_flag > 0) {
136144
raise(g_cancel_signal_flag);
145+
std::cerr << "############################################################################\n";
146+
std::cerr << "# Interrupted CRPropa simulation \n";
147+
std::cerr << "# A total of " << notFinished.size() << " candidates have not been started.\n";
148+
std::cerr << "# the indices of the vector haven been written to to output file. \n";
149+
std::cerr << "############################################################################\n";
150+
151+
// dump list to output file
152+
std::sort(notFinished.begin(), notFinished.end());
153+
interruptAction->dumpIndexList(notFinished);
154+
}
137155
}
138156

139157
void ModuleList::run(SourceInterface *source, size_t count, bool recursive, bool secondariesFirst) {
@@ -156,8 +174,11 @@ void ModuleList::run(SourceInterface *source, size_t count, bool recursive, bool
156174

157175
#pragma omp parallel for schedule(OMP_SCHEDULE)
158176
for (size_t i = 0; i < count; i++) {
159-
if (g_cancel_signal_flag !=0)
177+
if (g_cancel_signal_flag !=0) {
178+
#pragma omp critical(interrupt_write)
179+
notFinished.push_back(i);
160180
continue;
181+
}
161182

162183
ref_ptr<Candidate> candidate;
163184

@@ -189,8 +210,13 @@ void ModuleList::run(SourceInterface *source, size_t count, bool recursive, bool
189210
::signal(SIGINT, old_signal_handler);
190211
::signal(SIGTERM, old_sigterm_handler);
191212
// Propagate signal to old handler.
192-
if (g_cancel_signal_flag > 0)
213+
if (g_cancel_signal_flag > 0) {
193214
raise(g_cancel_signal_flag);
215+
std::cerr << "############################################################################\n";
216+
std::cerr << "# Interrupted CRPropa simulation \n";
217+
std::cerr << "# Number of not started candidates from source: " << notFinished.size() << "\n";
218+
std::cerr << "############################################################################\n";
219+
}
194220
}
195221

196222
ModuleList::iterator ModuleList::begin() {
@@ -222,6 +248,27 @@ void ModuleList::showModules() const {
222248
std::cout << getDescription();
223249
}
224250

251+
void ModuleList::setInterruptAction(Output* action) {
252+
interruptAction = action;
253+
haveInterruptAction = true;
254+
}
255+
256+
void ModuleList::dumpCandidate(Candidate *cand) const {
257+
if (!haveInterruptAction)
258+
return;
259+
260+
if (cand->isActive())
261+
interruptAction->process(cand);
262+
else
263+
KISS_LOG_WARNING << "ModuleList::dumpCandidate is called with a non active candidate. This should not happen for the interrupt action. Please check candidate with serial number "
264+
<< cand->getSerialNumber() << std::endl;
265+
266+
for (int i = 0; i < cand->secondaries.size(); i++) {
267+
if (cand->secondaries[i])
268+
dumpCandidate(cand->secondaries[i]);
269+
}
270+
}
271+
225272
ModuleListRunner::ModuleListRunner(ModuleList *mlist) : mlist(mlist) {
226273
}
227274

src/module/TextOutput.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
#include "kiss/string.h"
99

10+
#include <sstream>
1011
#include <cstdio>
1112
#include <stdexcept>
1213
#include <iostream>
@@ -378,4 +379,17 @@ void TextOutput::gzip() {
378379
#endif
379380
}
380381

382+
void TextOutput::dumpIndexList(std::vector<int> indices) {
383+
#pragma omp critical(FileOutput)
384+
{
385+
std::stringstream ss;
386+
ss << "#" << "\t";
387+
for (int i = 0; i < indices.size(); i++)
388+
ss << indices[i] << "\t";
389+
390+
const std::string cstr = ss.str();
391+
out-> write(cstr.c_str(), cstr.length());
392+
}
393+
}
394+
381395
} // namespace crpropa

0 commit comments

Comments
 (0)