Skip to content

Commit 8bd9160

Browse files
committed
add method to query HW size
Signed-off-by: Jeff Hammond <[email protected]>
1 parent 1c73340 commit 8bd9160

File tree

4 files changed

+176
-0
lines changed

4 files changed

+176
-0
lines changed

frame/include/blis.h

+1
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ extern "C" {
8484

8585
#include "bli_thread.h"
8686
#include "bli_pthread.h"
87+
#include "bli_affinity.h"
8788

8889

8990
// -- Constant definitions --

frame/thread/bli_affinity.c

+105
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
/*
2+
3+
BLIS
4+
An object-based framework for developing high-performance BLAS-like
5+
libraries.
6+
7+
Copyright (C) 2022 NVIDIA
8+
9+
Redistribution and use in source and binary forms, with or without
10+
modification, are permitted provided that the following conditions are
11+
met:
12+
- Redistributions of source code must retain the above copyright
13+
notice, this list of conditions and the following disclaimer.
14+
- Redistributions in binary form must reproduce the above copyright
15+
notice, this list of conditions and the following disclaimer in the
16+
documentation and/or other materials provided with the distribution.
17+
- Neither the name(s) of the copyright holder(s) nor the names of its
18+
contributors may be used to endorse or promote products derived
19+
from this software without specific prior written permission.
20+
21+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22+
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23+
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24+
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25+
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26+
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27+
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28+
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29+
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30+
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32+
33+
*/
34+
35+
// we need a way to detect oversubscription of the kind where
36+
// hierarchical parallelism is used and the affinity mask within
37+
// which BLIS runs does not have enough hardware threads to support
38+
// the requested software threads.
39+
//
40+
// this is motivated by, or related to:
41+
// https://github.com/flame/blis/issues/588
42+
// https://github.com/flame/blis/pull/607
43+
// https://github.com/flame/blis/issues/604
44+
// https://github.com/flame/blis/issues/603
45+
46+
#include "bli_affinity.h"
47+
48+
#ifndef BLIS_ENABLE_AFFINITY
49+
50+
// define the symbol for platforms like Windows and MacOS that do not support the Linux affinity API
51+
52+
dim_t bli_affinity_get_hw_size(bli_affinity_scope_t scope)
53+
{
54+
// this is the largest possible value returned by this function
55+
// and it means that the affinity mask does not constrain the current scope.
56+
return (dim_t)1024;
57+
}
58+
59+
#else
60+
61+
// this macro has to come before any other headers
62+
#define _GNU_SOURCE
63+
64+
#include <sched.h>
65+
#include <unistd.h>
66+
67+
// scope is either the calling process or the calling thread:
68+
// 0 = calling process
69+
// 1 = calling thread
70+
71+
dim_t bli_affinity_get_hw_size(bli_affinity_scope_t scope)
72+
{
73+
int rc;
74+
int active_cpus;
75+
pid_t pid;
76+
cpu_set_t mask;
77+
78+
if (scope == 0) {
79+
pid = getpid();
80+
} else {
81+
// this means the current thread
82+
pid = 0;
83+
}
84+
85+
CPU_ZERO(&mask);
86+
87+
// if the CPU mask is larger than 1024 bits, this needs to change.
88+
// see https://man7.org/linux/man-pages/man2/sched_getaffinity.2.html for details.
89+
rc = sched_getaffinity(pid, sizeof(cpu_set_t), &mask);
90+
if (rc) {
91+
bli_print_msg( "sched_getaffinity failed",
92+
__FILE__, __LINE__ );
93+
bli_abort();
94+
}
95+
96+
active_cpus = 0;
97+
for (int i=0; i<sizeof(cpu_set_t); i++) {
98+
const int on = CPU_ISSET(i, &mask);
99+
if (on) active_cpus++;
100+
}
101+
102+
return active_cpus;
103+
}
104+
105+
#endif // BLIS_ENABLE_AFFINITY

frame/thread/bli_affinity.h

+44
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
/*
2+
3+
BLIS
4+
An object-based framework for developing high-performance BLAS-like
5+
libraries.
6+
7+
Copyright (C) 2022 NVIDIA
8+
9+
Redistribution and use in source and binary forms, with or without
10+
modification, are permitted provided that the following conditions are
11+
met:
12+
- Redistributions of source code must retain the above copyright
13+
notice, this list of conditions and the following disclaimer.
14+
- Redistributions in binary form must reproduce the above copyright
15+
notice, this list of conditions and the following disclaimer in the
16+
documentation and/or other materials provided with the distribution.
17+
- Neither the name(s) of the copyright holder(s) nor the names of its
18+
contributors may be used to endorse or promote products derived
19+
from this software without specific prior written permission.
20+
21+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22+
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23+
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24+
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25+
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26+
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27+
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28+
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29+
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30+
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32+
33+
*/
34+
35+
#ifndef BLIS_AFFINITY_H
36+
#define BLIS_AFFINITY_H
37+
38+
#include "blis.h"
39+
40+
typedef enum { process = 0, thread = 1 } bli_affinity_scope_t;
41+
42+
dim_t bli_affinity_get_hw_size(bli_affinity_scope_t scope);
43+
44+
#endif // BLIS_AFFINITY_H

frame/thread/bli_l3_decor_openmp.c

+26
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,12 @@ void bli_l3_thread_decorator_thread_check
199199
)
200200
{
201201
dim_t n_threads_real = omp_get_num_threads();
202+
dim_t n_threads_hwmask;
203+
if ( omp_in_parallel() ) {
204+
n_threads_hwmask = bli_affinity_get_hw_size(thread);
205+
} else {
206+
n_threads_hwmask = bli_affinity_get_hw_size(process);
207+
}
202208

203209
// Check if the number of OpenMP threads created within this parallel
204210
// region is different from the number of threads that were requested
@@ -241,6 +247,26 @@ void bli_l3_thread_decorator_thread_check
241247

242248
// Synchronize all threads and continue.
243249
_Pragma( "omp barrier" )
250+
251+
return;
252+
}
253+
254+
// Check if the number of OpenMP threads created within this parallel
255+
// region is different from the number of threads that are available
256+
// to BLIS in the calling context.
257+
if ( n_threads_hwmask < n_threads || n_threads_hwmask < n_threads_real)
258+
{
259+
bli_print_msg( "The affinity mask on this process does not have "
260+
"enough HW threads for your requested SW threads.",
261+
__FILE__, __LINE__ );
262+
263+
bli_thrcomm_init( n_threads_hwmask, gl_comm );
264+
bli_rntm_set_num_threads_only( n_threads_hwmask, rntm );
265+
#warning HELP ME HERE
266+
bli_rntm_set_ways_only( 1, 1, 1, 1, 1, rntm );
267+
268+
// Synchronize all threads and continue.
269+
_Pragma( "omp barrier" )
244270
}
245271
}
246272

0 commit comments

Comments
 (0)