@@ -49,22 +49,29 @@ void* bli_packm_alloc
49
49
// Query the address of the mem_t entry within the control tree node.
50
50
mem_t * cntl_mem_p = bli_cntl_pack_mem ( cntl );
51
51
52
- // Check the mem_t field in the control tree. If it is unallocated, then
53
- // we need to acquire a block from the memory broker and broadcast it to
54
- // all threads in the chief's thread group.
55
- if ( bli_mem_is_unalloc ( cntl_mem_p ) )
56
- {
57
- mem_t * local_mem_p ;
58
- mem_t local_mem_s ;
52
+ mem_t * local_mem_p ;
53
+ mem_t local_mem_s ;
54
+
55
+ siz_t cntl_mem_size = 0 ;
59
56
57
+ if ( bli_mem_is_alloc ( cntl_mem_p ) )
58
+ cntl_mem_size = bli_mem_size ( cntl_mem_p );
59
+
60
+ if ( cntl_mem_size < size_needed )
61
+ {
60
62
if ( bli_thread_am_ochief ( thread ) )
61
63
{
62
- #ifdef BLIS_ENABLE_MEM_TRACING
63
- printf ( "bli_l3_packm(): acquiring mem pool block\n" );
64
- #endif
65
-
66
- // The chief thread acquires a block from the memory broker
67
- // and saves the associated mem_t entry to local_mem_s.
64
+ // The chief thread releases the existing block associated with
65
+ // the mem_t entry in the control tree, and then re-acquires a
66
+ // new block, saving the associated mem_t entry to local_mem_s.
67
+ if ( bli_mem_is_alloc ( cntl_mem_p ) )
68
+ {
69
+ bli_pba_release
70
+ (
71
+ rntm ,
72
+ cntl_mem_p
73
+ );
74
+ }
68
75
bli_pba_acquire_m
69
76
(
70
77
rntm ,
@@ -78,63 +85,13 @@ void* bli_packm_alloc
78
85
// all threads.
79
86
local_mem_p = bli_thread_broadcast ( thread , & local_mem_s );
80
87
81
- // Save the contents of the chief thread's local mem_t entry to the
82
- // mem_t field in this thread's control tree node.
88
+ // Save the chief thread's local mem_t entry to the mem_t field in
89
+ // this thread's control tree node.
83
90
* cntl_mem_p = * local_mem_p ;
84
- }
85
- else // ( bli_mem_is_alloc( cntl_mem_p ) )
86
- {
87
- mem_t * local_mem_p ;
88
- mem_t local_mem_s ;
89
91
90
- // If the mem_t entry in the control tree does NOT contain a NULL
91
- // buffer, then a block has already been acquired from the memory
92
- // broker and cached in the control tree.
93
-
94
- // As a sanity check, we should make sure that the mem_t object isn't
95
- // associated with a block that is too small compared to the size of
96
- // the packed matrix buffer that is needed, according to the return
97
- // value from packm_init().
98
- siz_t cntl_mem_size = bli_mem_size ( cntl_mem_p );
99
-
100
- if ( cntl_mem_size < size_needed )
101
- {
102
- if ( bli_thread_am_ochief ( thread ) )
103
- {
104
- // The chief thread releases the existing block associated with
105
- // the mem_t entry in the control tree, and then re-acquires a
106
- // new block, saving the associated mem_t entry to local_mem_s.
107
- bli_pba_release
108
- (
109
- rntm ,
110
- cntl_mem_p
111
- );
112
- bli_pba_acquire_m
113
- (
114
- rntm ,
115
- size_needed ,
116
- pack_buf_type ,
117
- & local_mem_s
118
- );
119
- }
120
-
121
- // Broadcast the address of the chief thread's local mem_t entry to
122
- // all threads.
123
- local_mem_p = bli_thread_broadcast ( thread , & local_mem_s );
124
-
125
- // Save the chief thread's local mem_t entry to the mem_t field in
126
- // this thread's control tree node.
127
- * cntl_mem_p = * local_mem_p ;
128
- }
129
- else
130
- {
131
- // If the mem_t entry is already allocated and sufficiently large,
132
- // then we use it as-is. No action is needed, because all threads
133
- // will already have the cached values in their local control
134
- // trees' mem_t entries, currently pointed to by cntl_mem_p.
135
-
136
- bli_thread_barrier ( thread );
137
- }
92
+ // Barrier so that the master thread doesn't return from the function
93
+ // before we are done reading.
94
+ bli_thread_barrier ( thread );
138
95
}
139
96
140
97
return bli_mem_buffer ( cntl_mem_p );
0 commit comments