@@ -146,10 +146,37 @@ get_cached_stack (size_t *sizep, void **memp)
146
146
return result ;
147
147
}
148
148
149
+ /* Assume support for MADV_ADVISE_GUARD, setup_stack_prot will disable it
150
+ and fallback to ALLOCATE_GUARD_PROT_NONE if the madvise call fails. */
151
+ static int allocate_stack_mode = ALLOCATE_GUARD_MADV_GUARD ;
152
+
153
+ static inline int stack_prot (void )
154
+ {
155
+ return (PROT_READ | PROT_WRITE
156
+ | ((GL (dl_stack_flags ) & PF_X ) ? PROT_EXEC : 0 ));
157
+ }
158
+
159
+ static void *
160
+ allocate_thread_stack (size_t size , size_t guardsize )
161
+ {
162
+ /* MADV_ADVISE_GUARD does not require an additional PROT_NONE mapping. */
163
+ int prot = stack_prot ();
164
+
165
+ if (atomic_load_relaxed (& allocate_stack_mode ) == ALLOCATE_GUARD_PROT_NONE )
166
+ /* If a guard page is required, avoid committing memory by first allocate
167
+ with PROT_NONE and then reserve with required permission excluding the
168
+ guard page. */
169
+ prot = guardsize == 0 ? prot : PROT_NONE ;
170
+
171
+ return __mmap (NULL , size , prot , MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK , -1 ,
172
+ 0 );
173
+ }
174
+
175
+
149
176
/* Return the guard page position on allocated stack. */
150
177
static inline char *
151
178
__attribute ( (always_inline ))
152
- guard_position (void * mem , size_t size , size_t guardsize , struct pthread * pd ,
179
+ guard_position (void * mem , size_t size , size_t guardsize , const struct pthread * pd ,
153
180
size_t pagesize_m1 )
154
181
{
155
182
#if _STACK_GROWS_DOWN
@@ -159,27 +186,131 @@ guard_position (void *mem, size_t size, size_t guardsize, struct pthread *pd,
159
186
#endif
160
187
}
161
188
162
- /* Based on stack allocated with PROT_NONE, setup the required portions with
163
- 'prot' flags based on the guard page position. */
164
- static inline int
165
- setup_stack_prot (char * mem , size_t size , char * guard , size_t guardsize ,
166
- const int prot )
189
+ /* Setup the MEM thread stack of SIZE bytes with the required protection flags
190
+ along with a guard area of GUARDSIZE size. It first tries with
191
+ MADV_GUARD_INSTALL, and then fallback to setup the guard area using the
192
+ extra PROT_NONE mapping. Update PD with the type of guard area setup. */
193
+ static inline bool
194
+ setup_stack_prot (char * mem , size_t size , struct pthread * pd ,
195
+ size_t guardsize , size_t pagesize_m1 )
167
196
{
168
- char * guardend = guard + guardsize ;
197
+ if (__glibc_unlikely (guardsize == 0 ))
198
+ return true;
199
+
200
+ char * guard = guard_position (mem , size , guardsize , pd , pagesize_m1 );
201
+ if (atomic_load_relaxed (& allocate_stack_mode ) == ALLOCATE_GUARD_MADV_GUARD )
202
+ {
203
+ if (__madvise (guard , guardsize , MADV_GUARD_INSTALL ) == 0 )
204
+ {
205
+ pd -> stack_mode = ALLOCATE_GUARD_MADV_GUARD ;
206
+ return true;
207
+ }
208
+
209
+ /* If madvise fails it means the kernel does not support the guard
210
+ advise (we assume that the syscall is available, guard is page-aligned
211
+ and length is non negative). The stack has already the expected
212
+ protection flags, so it just need to PROT_NONE the guard area. */
213
+ atomic_store_relaxed (& allocate_stack_mode , ALLOCATE_GUARD_PROT_NONE );
214
+ if (__mprotect (guard , guardsize , PROT_NONE ) != 0 )
215
+ return false;
216
+ }
217
+ else
218
+ {
219
+ const int prot = stack_prot ();
220
+ char * guardend = guard + guardsize ;
169
221
#if _STACK_GROWS_DOWN
170
- /* As defined at guard_position, for architectures with downward stack
171
- the guard page is always at start of the allocated area. */
172
- if (__mprotect (guardend , size - guardsize , prot ) != 0 )
173
- return errno ;
222
+ /* As defined at guard_position, for architectures with downward stack
223
+ the guard page is always at start of the allocated area. */
224
+ if (__mprotect (guardend , size - guardsize , prot ) != 0 )
225
+ return false ;
174
226
#else
175
- size_t mprots1 = (uintptr_t ) guard - (uintptr_t ) mem ;
176
- if (__mprotect (mem , mprots1 , prot ) != 0 )
177
- return errno ;
178
- size_t mprots2 = ((uintptr_t ) mem + size ) - (uintptr_t ) guardend ;
179
- if (__mprotect (guardend , mprots2 , prot ) != 0 )
180
- return errno ;
227
+ size_t mprots1 = (uintptr_t ) guard - (uintptr_t ) mem ;
228
+ if (__mprotect (mem , mprots1 , prot ) != 0 )
229
+ return false ;
230
+ size_t mprots2 = ((uintptr_t ) mem + size ) - (uintptr_t ) guardend ;
231
+ if (__mprotect (guardend , mprots2 , prot ) != 0 )
232
+ return false ;
181
233
#endif
182
- return 0 ;
234
+ }
235
+
236
+ pd -> stack_mode = ALLOCATE_GUARD_PROT_NONE ;
237
+ return true;
238
+ }
239
+
240
+ /* Update the guard area of the thread stack MEM of size SIZE with the new
241
+ GUARDISZE. It uses the method defined by PD stack_mode. */
242
+ static inline bool
243
+ adjust_stack_prot (char * mem , size_t size , const struct pthread * pd ,
244
+ size_t guardsize , size_t pagesize_m1 )
245
+ {
246
+ /* The required guard area is larger than the current one. For
247
+ _STACK_GROWS_DOWN it means the guard should increase as:
248
+
249
+ |guard|---------------------------------stack|
250
+ |new guard--|---------------------------stack|
251
+
252
+ while for _STACK_GROWS_UP:
253
+
254
+ |stack---------------------------|guard|-----|
255
+ |stack--------------------|new guard---|-----|
256
+
257
+ Both madvise and mprotect allows overlap the required region,
258
+ so use the new guard placement with the new size. */
259
+ if (guardsize > pd -> guardsize )
260
+ {
261
+ char * guard = guard_position (mem , size , guardsize , pd , pagesize_m1 );
262
+ if (pd -> stack_mode == ALLOCATE_GUARD_MADV_GUARD )
263
+ return __madvise (guard , guardsize , MADV_GUARD_INSTALL ) == 0 ;
264
+ else if (pd -> stack_mode == ALLOCATE_GUARD_PROT_NONE )
265
+ return __mprotect (guard , guardsize , PROT_NONE ) == 0 ;
266
+ }
267
+ /* The current guard area is larger than the required one. For
268
+ _STACK_GROWS_DOWN is means change the guard as:
269
+
270
+ |guard-------|-------------------------stack|
271
+ |new guard|----------------------------stack|
272
+
273
+ And for _STACK_GROWS_UP:
274
+
275
+ |stack---------------------|guard-------|---|
276
+ |stack------------------------|new guard|---|
277
+
278
+ For ALLOCATE_GUARD_MADV_GUARD it means remove the slack area
279
+ (disjointed region of guard and new guard), while for
280
+ ALLOCATE_GUARD_PROT_NONE it requires to mprotect it with the stack
281
+ protection flags. */
282
+ else if (pd -> guardsize > guardsize )
283
+ {
284
+ size_t slacksize = pd -> guardsize - guardsize ;
285
+ if (pd -> stack_mode == ALLOCATE_GUARD_MADV_GUARD )
286
+ {
287
+ void * slack =
288
+ #if _STACK_GROWS_DOWN
289
+ mem + guardsize ;
290
+ #else
291
+ guard_position (mem , size , pd -> guardsize , pd , pagesize_m1 );
292
+ #endif
293
+ return __madvise (slack , slacksize , MADV_GUARD_REMOVE ) == 0 ;
294
+ }
295
+ else if (pd -> stack_mode == ALLOCATE_GUARD_PROT_NONE )
296
+ {
297
+ const int prot = stack_prot ();
298
+ #if _STACK_GROWS_DOWN
299
+ return __mprotect (mem + guardsize , slacksize , prot ) == 0 ;
300
+ #else
301
+ char * new_guard = (char * )(((uintptr_t ) pd - guardsize )
302
+ & ~pagesize_m1 );
303
+ char * old_guard = (char * )(((uintptr_t ) pd - pd -> guardsize )
304
+ & ~pagesize_m1 );
305
+ /* The guard size difference might be > 0, but once rounded
306
+ to the nearest page the size difference might be zero. */
307
+ if (new_guard > old_guard
308
+ && __mprotect (old_guard , new_guard - old_guard , prot ) != 0 )
309
+ return false;
310
+ #endif
311
+ }
312
+ }
313
+ return true;
183
314
}
184
315
185
316
/* Mark the memory of the stack as usable to the kernel. It frees everything
@@ -291,7 +422,7 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
291
422
292
423
/* This is a user-provided stack. It will not be queued in the
293
424
stack cache nor will the memory (except the TLS memory) be freed. */
294
- pd -> user_stack = true ;
425
+ pd -> stack_mode = ALLOCATE_GUARD_USER ;
295
426
296
427
/* This is at least the second thread. */
297
428
pd -> header .multiple_threads = 1 ;
@@ -325,10 +456,7 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
325
456
/* Allocate some anonymous memory. If possible use the cache. */
326
457
size_t guardsize ;
327
458
size_t reported_guardsize ;
328
- size_t reqsize ;
329
459
void * mem ;
330
- const int prot = (PROT_READ | PROT_WRITE
331
- | ((GL (dl_stack_flags ) & PF_X ) ? PROT_EXEC : 0 ));
332
460
333
461
/* Adjust the stack size for alignment. */
334
462
size &= ~tls_static_align_m1 ;
@@ -358,16 +486,10 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
358
486
return EINVAL ;
359
487
360
488
/* Try to get a stack from the cache. */
361
- reqsize = size ;
362
489
pd = get_cached_stack (& size , & mem );
363
490
if (pd == NULL )
364
491
{
365
- /* If a guard page is required, avoid committing memory by first
366
- allocate with PROT_NONE and then reserve with required permission
367
- excluding the guard page. */
368
- mem = __mmap (NULL , size , (guardsize == 0 ) ? prot : PROT_NONE ,
369
- MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK , -1 , 0 );
370
-
492
+ mem = allocate_thread_stack (size , guardsize );
371
493
if (__glibc_unlikely (mem == MAP_FAILED ))
372
494
return errno ;
373
495
@@ -394,15 +516,10 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
394
516
#endif
395
517
396
518
/* Now mprotect the required region excluding the guard area. */
397
- if (__glibc_likely ( guardsize > 0 ))
519
+ if (! setup_stack_prot ( mem , size , pd , guardsize , pagesize_m1 ))
398
520
{
399
- char * guard = guard_position (mem , size , guardsize , pd ,
400
- pagesize_m1 );
401
- if (setup_stack_prot (mem , size , guard , guardsize , prot ) != 0 )
402
- {
403
- __munmap (mem , size );
404
- return errno ;
405
- }
521
+ __munmap (mem , size );
522
+ return errno ;
406
523
}
407
524
408
525
/* Remember the stack-related values. */
@@ -456,59 +573,31 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
456
573
which will be read next. */
457
574
}
458
575
459
- /* Create or resize the guard area if necessary. */
460
- if (__glibc_unlikely (guardsize > pd -> guardsize ))
576
+ /* Create or resize the guard area if necessary on an already
577
+ allocated stack. */
578
+ if (!adjust_stack_prot (mem , size , pd , guardsize , pagesize_m1 ))
461
579
{
462
- char * guard = guard_position (mem , size , guardsize , pd ,
463
- pagesize_m1 );
464
- if (__mprotect (guard , guardsize , PROT_NONE ) != 0 )
465
- {
466
- mprot_error :
467
- lll_lock (GL (dl_stack_cache_lock ), LLL_PRIVATE );
468
-
469
- /* Remove the thread from the list. */
470
- __nptl_stack_list_del (& pd -> list );
580
+ lll_lock (GL (dl_stack_cache_lock ), LLL_PRIVATE );
471
581
472
- lll_unlock (GL (dl_stack_cache_lock ), LLL_PRIVATE );
582
+ /* Remove the thread from the list. */
583
+ __nptl_stack_list_del (& pd -> list );
473
584
474
- /* Get rid of the TLS block we allocated. */
475
- _dl_deallocate_tls (TLS_TPADJ (pd ), false);
585
+ lll_unlock (GL (dl_stack_cache_lock ), LLL_PRIVATE );
476
586
477
- /* Free the stack memory regardless of whether the size
478
- of the cache is over the limit or not. If this piece
479
- of memory caused problems we better do not use it
480
- anymore. Uh, and we ignore possible errors. There
481
- is nothing we could do. */
482
- (void ) __munmap (mem , size );
587
+ /* Get rid of the TLS block we allocated. */
588
+ _dl_deallocate_tls (TLS_TPADJ (pd ), false);
483
589
484
- return errno ;
485
- }
590
+ /* Free the stack memory regardless of whether the size
591
+ of the cache is over the limit or not. If this piece
592
+ of memory caused problems we better do not use it
593
+ anymore. Uh, and we ignore possible errors. There
594
+ is nothing we could do. */
595
+ (void ) __munmap (mem , size );
486
596
487
- pd -> guardsize = guardsize ;
597
+ return errno ;
488
598
}
489
- else if (__builtin_expect (pd -> guardsize - guardsize > size - reqsize ,
490
- 0 ))
491
- {
492
- /* The old guard area is too large. */
493
-
494
- #if _STACK_GROWS_DOWN
495
- if (__mprotect ((char * ) mem + guardsize , pd -> guardsize - guardsize ,
496
- prot ) != 0 )
497
- goto mprot_error ;
498
- #elif _STACK_GROWS_UP
499
- char * new_guard = (char * )(((uintptr_t ) pd - guardsize )
500
- & ~pagesize_m1 );
501
- char * old_guard = (char * )(((uintptr_t ) pd - pd -> guardsize )
502
- & ~pagesize_m1 );
503
- /* The guard size difference might be > 0, but once rounded
504
- to the nearest page the size difference might be zero. */
505
- if (new_guard > old_guard
506
- && __mprotect (old_guard , new_guard - old_guard , prot ) != 0 )
507
- goto mprot_error ;
508
- #endif
509
599
510
- pd -> guardsize = guardsize ;
511
- }
600
+ pd -> guardsize = guardsize ;
512
601
/* The pthread_getattr_np() calls need to get passed the size
513
602
requested in the attribute, regardless of how large the
514
603
actually used guardsize is. */
@@ -568,19 +657,21 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
568
657
static void
569
658
name_stack_maps (struct pthread * pd , bool set )
570
659
{
660
+ size_t adjust = pd -> stack_mode == ALLOCATE_GUARD_PROT_NONE ?
661
+ pd -> guardsize : 0 ;
571
662
#if _STACK_GROWS_DOWN
572
- void * stack = pd -> stackblock + pd -> guardsize ;
663
+ void * stack = pd -> stackblock + adjust ;
573
664
#else
574
665
void * stack = pd -> stackblock ;
575
666
#endif
576
- size_t stacksize = pd -> stackblock_size - pd -> guardsize ;
667
+ size_t stacksize = pd -> stackblock_size - adjust ;
577
668
578
669
if (!set )
579
- __set_vma_name (stack , stacksize , NULL );
670
+ __set_vma_name (stack , stacksize , " glibc: unused stack" );
580
671
else
581
672
{
582
673
unsigned int tid = pd -> tid ;
583
- if (pd -> user_stack )
674
+ if (pd -> stack_mode == ALLOCATE_GUARD_USER )
584
675
SET_STACK_NAME (" glibc: pthread user stack: " , stack , stacksize , tid );
585
676
else
586
677
SET_STACK_NAME (" glibc: pthread stack: " , stack , stacksize , tid );
0 commit comments