Skip to content

Commit dfc1267

Browse files
Fix race condition in packing allocation.
All threads need to make a copy of the master thread's local mem_t struct, but the master thread may exit before this is complete. Fixed by putting the thread barrier in the right place. Also, fixed a problem with the max panel length and work partitioning for triangular matrices.
1 parent 57f116c commit dfc1267

File tree

2 files changed

+39
-78
lines changed

2 files changed

+39
-78
lines changed

frame/1m/packm/bli_packm_alloc.c

+25-68
Original file line numberDiff line numberDiff line change
@@ -49,22 +49,29 @@ void* bli_packm_alloc
4949
// Query the address of the mem_t entry within the control tree node.
5050
mem_t* cntl_mem_p = bli_cntl_pack_mem( cntl );
5151

52-
// Check the mem_t field in the control tree. If it is unallocated, then
53-
// we need to acquire a block from the memory broker and broadcast it to
54-
// all threads in the chief's thread group.
55-
if ( bli_mem_is_unalloc( cntl_mem_p ) )
56-
{
57-
mem_t* local_mem_p;
58-
mem_t local_mem_s;
52+
mem_t* local_mem_p;
53+
mem_t local_mem_s;
54+
55+
siz_t cntl_mem_size = 0;
5956

57+
if ( bli_mem_is_alloc( cntl_mem_p ) )
58+
cntl_mem_size = bli_mem_size( cntl_mem_p );
59+
60+
if ( cntl_mem_size < size_needed )
61+
{
6062
if ( bli_thread_am_ochief( thread ) )
6163
{
62-
#ifdef BLIS_ENABLE_MEM_TRACING
63-
printf( "bli_l3_packm(): acquiring mem pool block\n" );
64-
#endif
65-
66-
// The chief thread acquires a block from the memory broker
67-
// and saves the associated mem_t entry to local_mem_s.
64+
// The chief thread releases the existing block associated with
65+
// the mem_t entry in the control tree, and then re-acquires a
66+
// new block, saving the associated mem_t entry to local_mem_s.
67+
if ( bli_mem_is_alloc( cntl_mem_p ) )
68+
{
69+
bli_pba_release
70+
(
71+
rntm,
72+
cntl_mem_p
73+
);
74+
}
6875
bli_pba_acquire_m
6976
(
7077
rntm,
@@ -78,63 +85,13 @@ void* bli_packm_alloc
7885
// all threads.
7986
local_mem_p = bli_thread_broadcast( thread, &local_mem_s );
8087

81-
// Save the contents of the chief thread's local mem_t entry to the
82-
// mem_t field in this thread's control tree node.
88+
// Save the chief thread's local mem_t entry to the mem_t field in
89+
// this thread's control tree node.
8390
*cntl_mem_p = *local_mem_p;
84-
}
85-
else // ( bli_mem_is_alloc( cntl_mem_p ) )
86-
{
87-
mem_t* local_mem_p;
88-
mem_t local_mem_s;
8991

90-
// If the mem_t entry in the control tree does NOT contain a NULL
91-
// buffer, then a block has already been acquired from the memory
92-
// broker and cached in the control tree.
93-
94-
// As a sanity check, we should make sure that the mem_t object isn't
95-
// associated with a block that is too small compared to the size of
96-
// the packed matrix buffer that is needed, according to the return
97-
// value from packm_init().
98-
siz_t cntl_mem_size = bli_mem_size( cntl_mem_p );
99-
100-
if ( cntl_mem_size < size_needed )
101-
{
102-
if ( bli_thread_am_ochief( thread ) )
103-
{
104-
// The chief thread releases the existing block associated with
105-
// the mem_t entry in the control tree, and then re-acquires a
106-
// new block, saving the associated mem_t entry to local_mem_s.
107-
bli_pba_release
108-
(
109-
rntm,
110-
cntl_mem_p
111-
);
112-
bli_pba_acquire_m
113-
(
114-
rntm,
115-
size_needed,
116-
pack_buf_type,
117-
&local_mem_s
118-
);
119-
}
120-
121-
// Broadcast the address of the chief thread's local mem_t entry to
122-
// all threads.
123-
local_mem_p = bli_thread_broadcast( thread, &local_mem_s );
124-
125-
// Save the chief thread's local mem_t entry to the mem_t field in
126-
// this thread's control tree node.
127-
*cntl_mem_p = *local_mem_p;
128-
}
129-
else
130-
{
131-
// If the mem_t entry is already allocated and sufficiently large,
132-
// then we use it as-is. No action is needed, because all threads
133-
// will already have the cached values in their local control
134-
// trees' mem_t entries, currently pointed to by cntl_mem_p.
135-
136-
bli_thread_barrier( thread );
137-
}
92+
// Barrier so that the master thread doesn't return from the function
93+
// before we are done reading.
94+
bli_thread_barrier( thread );
13895
}
13996

14097
return bli_mem_buffer( cntl_mem_p );

frame/1m/packm/bli_packm_blk_var1.c

+14-10
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,7 @@ void bli_packm_blk_var1
151151
// this overrides the kernel determined above.
152152
packm_blk_var1_params_t* params = bli_obj_pack_params( c );
153153

154-
if ( params )
154+
if ( params && params->ukr_fn[ dt_c ][ dt_p ] )
155155
{
156156
packm_ker_cast = params->ukr_fn[ dt_c ][ dt_p ];
157157
}
@@ -220,6 +220,16 @@ void bli_packm_blk_var1
220220

221221
inc_t p_inc = ps_p;
222222

223+
/* NOTE: We MUST use round-robin partitioning when packing
224+
micropanels of a triangular matrix. Hermitian/symmetric
225+
and general packing may use slab or round-robin, depending
226+
on which was selected at configure-time. */
227+
/* The definition of bli_packm_my_iter() will depend on whether slab
228+
or round-robin partitioning was requested at configure-time. */
229+
bool my_iter = bli_is_triangular( strucc )
230+
? bli_packm_my_iter_rr( it, it_start, it_end, tid, nt )
231+
: bli_packm_my_iter ( it, it_start, it_end, tid, nt );
232+
223233
if ( bli_is_triangular( strucc ) &&
224234
bli_is_unstored_subpart_n( diagoffc_i, uploc, panel_dim_i, panel_len_full ) )
225235
{
@@ -278,11 +288,7 @@ void bli_packm_blk_var1
278288
/* We nudge the imaginary stride up by one if it is odd. */
279289
is_p_use += ( bli_is_odd( is_p_use ) ? 1 : 0 );
280290

281-
/* NOTE: We MUST use round-robin partitioning when packing
282-
micropanels of a triangular matrix. Hermitian/symmetric
283-
and general packing may use slab or round-robin, depending
284-
on which was selected at configure-time. */
285-
if ( bli_packm_my_iter_rr( it, it_start, it_end, tid, nt ) )
291+
if ( my_iter )
286292
{
287293
packm_ker_cast( strucc,
288294
diagc,
@@ -293,7 +299,7 @@ void bli_packm_blk_var1
293299
panel_dim_i,
294300
panel_len_i,
295301
panel_dim_max,
296-
panel_len_max,
302+
panel_len_max_i,
297303
panel_dim_off_i,
298304
panel_len_off_i,
299305
kappa_cast,
@@ -315,9 +321,7 @@ void bli_packm_blk_var1
315321
to a Hermitian or symmetric matrix, which includes stored,
316322
unstored, and diagonal-intersecting panels. */
317323

318-
/* The definition of bli_packm_my_iter() will depend on whether slab
319-
or round-robin partitioning was requested at configure-time. */
320-
if ( bli_packm_my_iter( it, it_start, it_end, tid, nt ) )
324+
if ( my_iter )
321325
{
322326
packm_ker_cast( strucc,
323327
diagc,

0 commit comments

Comments
 (0)