|
11 | 11 | namespace {
|
12 | 12 | int verbosity = 0;
|
13 | 13 | int enable_timing = 0;
|
| 14 | + const int padding = 2; |
| 15 | + const int precision = 5; |
14 | 16 | std::unordered_map<std::string, int> timing_index;
|
15 | 17 |
|
16 | 18 | const double PREBARRIER_TOL = .000001;
|
17 | 19 | struct TimeInfo {
|
18 | 20 | TimeInfo(std::string s, int index) : str(s), time(0), hasPrebarrier(false),
|
| 21 | + timeSq(0), max(0), min(std::numeric_limits<double>::max()), |
19 | 22 | prebarrier(0), count(0), orig_index(index) {}
|
20 | 23 | std::string str;
|
21 | 24 | double time;
|
| 25 | + double timeSq; |
| 26 | + double max; |
| 27 | + double min; |
22 | 28 | int count;
|
23 | 29 | bool hasPrebarrier;
|
24 | 30 | double prebarrier;
|
@@ -70,7 +76,12 @@ namespace pumipic {
|
70 | 76 | }
|
71 | 77 | int index = itr->second;
|
72 | 78 | time_per_op[index].time += seconds;
|
| 79 | + time_per_op[index].timeSq += seconds*seconds; |
73 | 80 | ++(time_per_op[index].count);
|
| 81 | + if (seconds > time_per_op[index].max) |
| 82 | + time_per_op[index].max = seconds; |
| 83 | + if (seconds < time_per_op[index].min) |
| 84 | + time_per_op[index].min = seconds; |
74 | 85 | if (prebarrierTime >= PREBARRIER_TOL) {
|
75 | 86 | time_per_op[index].hasPrebarrier = true;
|
76 | 87 | time_per_op[index].prebarrier += prebarrierTime;
|
@@ -126,62 +137,75 @@ namespace pumipic {
|
126 | 137 | }
|
127 | 138 | }
|
128 | 139 |
|
129 |
| - int length(int x) { |
130 |
| - if (x== 0) |
131 |
| - return 1; |
132 |
| - else |
133 |
| - return Kokkos::trunc(Kokkos::log10(x)) + 1; |
| 140 | + template <typename T> |
| 141 | + int length(T num) { |
| 142 | + std::stringstream ss; |
| 143 | + ss << std::setprecision(precision) << num; |
| 144 | + return ss.str().size(); |
134 | 145 | }
|
135 |
| - void determineLengths(int& name_length, int& tt_length, int& cc_length, |
136 |
| - int& at_length) { |
| 146 | + |
| 147 | + void determineLengths(int& name_length, int& tt_length, int& min_length, int& max_length, int& rmsd_length, |
| 148 | + int& cc_length, int& at_length) { |
137 | 149 | for (std::size_t index = 0; index < time_per_op.size(); ++index) {
|
138 |
| - if (time_per_op[index].str.size() > name_length) |
139 |
| - name_length = time_per_op[index].str.size(); |
140 |
| - int len = Kokkos::log10(time_per_op[index].time) + 8; |
141 |
| - if (len > tt_length) |
142 |
| - tt_length = len; |
143 |
| - len = Kokkos::log10(time_per_op[index].count) + 1; |
144 |
| - if (len > cc_length) |
145 |
| - cc_length = len; |
146 |
| - len = Kokkos::log10(time_per_op[index].time / time_per_op[index].count) + 8; |
147 |
| - if (len > at_length) |
148 |
| - len = at_length; |
| 150 | + double average = time_per_op[index].time / time_per_op[index].count; |
| 151 | + double averageSq = time_per_op[index].timeSq / time_per_op[index].count; |
| 152 | + |
| 153 | + name_length = std::max(name_length, static_cast<int>(time_per_op[index].str.size())); |
| 154 | + tt_length = std::max(tt_length, length(time_per_op[index].time)); |
| 155 | + min_length = std::max(min_length, length(time_per_op[index].min)); |
| 156 | + max_length = std::max(max_length, length(time_per_op[index].max)); |
| 157 | + cc_length = std::max(cc_length, length(time_per_op[index].count)); |
| 158 | + at_length = std::max(at_length, length(average)); |
| 159 | + rmsd_length = std::max(rmsd_length, length(averageSq)); |
149 | 160 | }
|
150 | 161 | }
|
| 162 | + |
| 163 | + template <typename T> |
| 164 | + void PrintTable(std::stringstream& buffer, T print, int width){ |
| 165 | + buffer << std::left << std::setw(width) << std::setfill(' ') << print; |
| 166 | + } |
| 167 | + |
151 | 168 | void SummarizeTime(TimingSortOption sort) {
|
152 | 169 | int comm_rank;
|
153 | 170 | MPI_Comm_rank(MPI_COMM_WORLD, &comm_rank);
|
154 | 171 | if (isTiming()) {
|
155 | 172 | if (verbosity >= 0) {
|
156 |
| - int name_length = 9; |
157 |
| - int tt_length = 10; |
158 |
| - int cc_length = 10; |
159 |
| - int at_length = 12; |
160 |
| - determineLengths(name_length, tt_length, cc_length, at_length); |
| 173 | + int name_length = strlen("Operation"); |
| 174 | + int tt_length = strlen("Total Time"); |
| 175 | + int min_length = strlen("Min Time"); |
| 176 | + int max_length = strlen("Max Time"); |
| 177 | + int sqrAvg_length = strlen("Sqr Average"); |
| 178 | + int cc_length = strlen("Call Count"); |
| 179 | + int at_length = strlen("Average Time"); |
| 180 | + determineLengths(name_length, tt_length, min_length, max_length, sqrAvg_length, cc_length, at_length); |
161 | 181 | sortTimeInfo(sort);
|
162 | 182 | std::stringstream buffer;
|
| 183 | + buffer.precision(precision); |
163 | 184 | //Header
|
164 | 185 | buffer << "Timing Summary " << comm_rank << "\n";
|
165 | 186 | //Column heads
|
166 |
| - buffer << "Operation" << std::string(name_length - 6, ' ') |
167 |
| - << "Total Time" << std::string(tt_length - 7, ' ') |
168 |
| - << "Call Count" << std::string(cc_length - 7, ' ') |
169 |
| - << "Average Time\n"; |
| 187 | + PrintTable(buffer, "Operation", name_length + padding); |
| 188 | + PrintTable(buffer, "Total Time", tt_length + padding); |
| 189 | + PrintTable(buffer, "Min Time", min_length + padding); |
| 190 | + PrintTable(buffer, "Max Time", max_length + padding); |
| 191 | + PrintTable(buffer, "Sqr Average", sqrAvg_length + padding); |
| 192 | + PrintTable(buffer, "Call Count", cc_length + padding); |
| 193 | + PrintTable(buffer, "Average Time", at_length + padding); |
| 194 | + buffer << '\n'; |
170 | 195 | for (int index = 0; index < time_per_op.size(); ++index) {
|
171 |
| - //Operation name |
172 |
| - buffer << time_per_op[index].str.c_str() |
173 |
| - //Fill space after operation name |
174 |
| - << std::string(name_length - time_per_op[index].str.size()+3, ' ') |
175 |
| - //Total time spent on operation |
176 |
| - << std::setw(tt_length+3) << time_per_op[index].time |
177 |
| - //Number of calls of operation |
178 |
| - << std::setw(cc_length+3) << time_per_op[index].count |
179 |
| - //Average time per call |
180 |
| - << std::setw(at_length+3) |
181 |
| - << time_per_op[index].time / time_per_op[index].count; |
| 196 | + double average = time_per_op[index].time / time_per_op[index].count; |
| 197 | + double averageSq = time_per_op[index].timeSq / time_per_op[index].count; |
| 198 | + |
| 199 | + PrintTable(buffer, time_per_op[index].str.c_str(), name_length + padding); |
| 200 | + PrintTable(buffer, time_per_op[index].time, tt_length + padding); |
| 201 | + PrintTable(buffer, time_per_op[index].min, min_length + padding); |
| 202 | + PrintTable(buffer, time_per_op[index].max, max_length + padding); |
| 203 | + PrintTable(buffer, averageSq, sqrAvg_length + padding); |
| 204 | + PrintTable(buffer, time_per_op[index].count, cc_length + padding); |
| 205 | + PrintTable(buffer, average, at_length + padding); |
182 | 206 | if (time_per_op[index].hasPrebarrier)
|
183 | 207 | buffer <<" Total Prebarrier=" << time_per_op[index].prebarrier;
|
184 |
| - buffer <<'\n'; |
| 208 | + buffer << '\n'; |
185 | 209 | }
|
186 | 210 | printInfo( "%s\n", buffer.str().c_str());
|
187 | 211 | }
|
|
0 commit comments