@@ -322,30 +322,6 @@ static int clone_parent(jmp_buf *env, int jmpval)
322
322
return clone (child_func , ca .stack_ptr , CLONE_PARENT | SIGCHLD , & ca );
323
323
}
324
324
325
- /* Returns the clone(2) flag for a namespace, given the name of a namespace. */
326
- static int nsflag (char * name )
327
- {
328
- if (!strcmp (name , "cgroup" ))
329
- return CLONE_NEWCGROUP ;
330
- else if (!strcmp (name , "ipc" ))
331
- return CLONE_NEWIPC ;
332
- else if (!strcmp (name , "mnt" ))
333
- return CLONE_NEWNS ;
334
- else if (!strcmp (name , "net" ))
335
- return CLONE_NEWNET ;
336
- else if (!strcmp (name , "pid" ))
337
- return CLONE_NEWPID ;
338
- else if (!strcmp (name , "user" ))
339
- return CLONE_NEWUSER ;
340
- else if (!strcmp (name , "uts" ))
341
- return CLONE_NEWUTS ;
342
- else if (!strcmp (name , "time" ))
343
- return CLONE_NEWTIME ;
344
-
345
- /* If we don't recognise a name, fallback to 0. */
346
- return 0 ;
347
- }
348
-
349
325
static uint32_t readint32 (char * buf )
350
326
{
351
327
return * (uint32_t * ) buf ;
@@ -444,35 +420,67 @@ void nl_free(struct nlconfig_t *config)
444
420
free (config -> data );
445
421
}
446
422
447
- void join_namespaces (char * nslist )
448
- {
449
- int num = 0 , i ;
450
- char * saveptr = NULL ;
451
- char * namespace = strtok_r (nslist , "," , & saveptr );
452
- struct namespace_t {
453
- int fd ;
454
- char type [PATH_MAX ];
455
- char path [PATH_MAX ];
456
- } * namespaces = NULL ;
423
+ struct namespace_t {
424
+ int fd ;
425
+ char type [PATH_MAX ];
426
+ char path [PATH_MAX ];
427
+ };
457
428
458
- if (!namespace || !strlen (namespace ) || !strlen (nslist ))
459
- bail ("ns paths are empty" );
429
+ typedef int nsset_t ;
430
+
431
+ static struct nstype_t {
432
+ int type ;
433
+ char * name ;
434
+ } all_ns_types [] = {
435
+ { CLONE_NEWCGROUP , "cgroup" },
436
+ { CLONE_NEWIPC , "ipc" },
437
+ { CLONE_NEWNS , "mnt" },
438
+ { CLONE_NEWNET , "net" },
439
+ { CLONE_NEWPID , "pid" },
440
+ { CLONE_NEWTIME , "time" },
441
+ { CLONE_NEWUSER , "user" },
442
+ { CLONE_NEWUTS , "uts" },
443
+ { }, /* null terminator */
444
+ };
460
445
446
+ /* Returns the clone(2) flag for a namespace, given the name of a namespace. */
447
+ static int nstype (char * name )
448
+ {
449
+ for (struct nstype_t * ns = all_ns_types ; ns -> name != NULL ; ns ++ )
450
+ if (!strcmp (name , ns -> name ))
451
+ return ns -> type ;
461
452
/*
462
- * We have to open the file descriptors first, since after
463
- * we join the mnt namespace we might no longer be able to
464
- * access the paths.
453
+ * setns(2) lets us join namespaces without knowing the type, but
454
+ * namespaces usually require special handling of some kind (so joining
455
+ * a namespace without knowing its type or joining a new namespace type
456
+ * without corresponding handling could result in broken behaviour) and
457
+ * the rest of runc doesn't allow unknown namespace types anyway.
465
458
*/
459
+ bail ("unknown namespace type %s" , name );
460
+ }
461
+
462
+ static nsset_t __open_namespaces (char * nsspec , struct namespace_t * * ns_list , size_t * ns_len )
463
+ {
464
+ int len = 0 ;
465
+ nsset_t ns_to_join = 0 ;
466
+ char * namespace , * saveptr = NULL ;
467
+ struct namespace_t * namespaces = NULL ;
468
+
469
+ namespace = strtok_r (nsspec , "," , & saveptr );
470
+
471
+ if (!namespace || !strlen (namespace ) || !strlen (nsspec ))
472
+ bail ("ns paths are empty" );
473
+
466
474
do {
467
475
int fd ;
468
476
char * path ;
469
477
struct namespace_t * ns ;
470
478
471
479
/* Resize the namespace array. */
472
- namespaces = realloc (namespaces , ++ num * sizeof (struct namespace_t ));
480
+ namespaces = realloc (namespaces , ++ len * sizeof (struct namespace_t ));
473
481
if (!namespaces )
474
482
bail ("failed to reallocate namespace array" );
475
- ns = & namespaces [num - 1 ];
483
+ ns = & namespaces [len - 1 ];
476
484
477
485
/* Split 'ns:path'. */
478
486
path = strstr (namespace , ":" );
@@ -488,38 +496,145 @@ void join_namespaces(char *nslist)
488
496
strncpy (ns -> type , namespace , PATH_MAX - 1 );
489
497
strncpy (ns -> path , path , PATH_MAX - 1 );
490
498
ns -> path [PATH_MAX - 1 ] = '\0' ;
491
- } while ((namespace = strtok_r (NULL , "," , & saveptr )) != NULL );
492
499
493
- /*
494
- * The ordering in which we join namespaces is important. We should
495
- * always join the user namespace *first*. This is all guaranteed
496
- * from the container_linux.go side of this, so we're just going to
497
- * follow the order given to us.
498
- */
500
+ ns_to_join |= nstype (ns -> type );
501
+ } while ((namespace = strtok_r (NULL , "," , & saveptr )) != NULL );
499
502
500
- for (i = 0 ; i < num ; i ++ ) {
501
- struct namespace_t * ns = & namespaces [i ];
502
- int flag = nsflag (ns -> type );
503
+ * ns_list = namespaces ;
504
+ * ns_len = len ;
505
+ return ns_to_join ;
506
+ }
503
507
504
- write_log (DEBUG , "setns(%#x) into %s namespace (with path %s)" , flag , ns -> type , ns -> path );
505
- if (setns (ns -> fd , flag ) < 0 )
508
+ /*
509
+ * Try to join all namespaces that are in the "allow" nsset, and return the
510
+ * set we were able to successfully join. If a permission error is returned
511
+ * from nsset(2), the namespace is skipped (non-permission errors are fatal).
512
+ */
513
+ static nsset_t __join_namespaces (nsset_t allow , struct namespace_t * ns_list , size_t ns_len )
514
+ {
515
+ nsset_t joined = 0 ;
516
+
517
+ for (size_t i = 0 ; i < ns_len ; i ++ ) {
518
+ struct namespace_t * ns = & ns_list [i ];
519
+ int type = nstype (ns -> type );
520
+ int err , saved_errno ;
521
+
522
+ if (!(type & allow ))
523
+ continue ;
524
+
525
+ err = setns (ns -> fd , type );
526
+ saved_errno = errno ;
527
+ write_log (DEBUG , "setns(%#x) into %s namespace (with path %s): %s" ,
528
+ type , ns -> type , ns -> path , strerror (errno ));
529
+ if (err < 0 ) {
530
+ /* Skip permission errors. */
531
+ if (saved_errno == EPERM )
532
+ continue ;
506
533
bail ("failed to setns into %s namespace" , ns -> type );
534
+ }
535
+ joined |= type ;
507
536
508
537
/*
509
538
* If we change user namespaces, make sure we switch to root in the
510
539
* namespace (this matches the logic for unshare(CLONE_NEWUSER)), lots
511
540
* of things can break if we aren't the right user. See
512
541
* <https://github.com/opencontainers/runc/issues/4466> for one example.
513
542
*/
514
- if (flag == CLONE_NEWUSER ) {
543
+ if (type == CLONE_NEWUSER ) {
515
544
if (setresuid (0 , 0 , 0 ) < 0 )
516
545
bail ("failed to become root in user namespace" );
517
546
}
518
547
519
548
close (ns -> fd );
549
+ ns -> fd = -1 ;
550
+ }
551
+ return joined ;
552
+ }
553
+
554
+ static char * strappend (char * dst , char * src )
555
+ {
556
+ if (!dst )
557
+ return strdup (src );
558
+
559
+ size_t len = strlen (dst ) + strlen (src ) + 1 ;
560
+ dst = realloc (dst , len );
561
+ strncat (dst , src , len );
562
+ return dst ;
563
+ }
564
+
565
+ static char * nsset_to_str (nsset_t nsset )
566
+ {
567
+ char * str = NULL ;
568
+ for (struct nstype_t * ns = all_ns_types ; ns -> name != NULL ; ns ++ ) {
569
+ if (ns -> type & nsset ) {
570
+ if (str )
571
+ str = strappend (str , ", " );
572
+ str = strappend (str , ns -> name );
573
+ }
574
+ }
575
+ return str ? : strdup ("" );
576
+ }
577
+
578
+ static void __close_namespaces (nsset_t to_join , nsset_t joined , struct namespace_t * ns_list , size_t ns_len )
579
+ {
580
+ /* We expect to have joined every namespace. */
581
+ nsset_t failed_to_join = to_join & ~joined ;
582
+
583
+ /* Double-check that we used up (and thus joined) all of the nsfds. */
584
+ for (size_t i = 0 ; i < ns_len ; i ++ ) {
585
+ struct namespace_t * ns = & ns_list [i ];
586
+ int type = nstype (ns -> type );
587
+
588
+ if (ns -> fd < 0 )
589
+ continue ;
590
+
591
+ failed_to_join |= type ;
592
+ write_log (FATAL , "failed to setns(%#x) into %s namespace (with path %s): %s" ,
593
+ type , ns -> type , ns -> path , strerror (EPERM ));
594
+ close (ns -> fd );
595
+ ns -> fd = -1 ;
520
596
}
521
597
522
- free (namespaces );
598
+ /* Make sure we joined the namespaces we planned to. */
599
+ if (failed_to_join )
600
+ bail ("failed to join {%s} namespaces: %s" , nsset_to_str (failed_to_join ), strerror (EPERM ));
601
+
602
+ free (ns_list );
603
+ }
604
+
605
+ void join_namespaces (char * nsspec )
606
+ {
607
+ nsset_t to_join = 0 , joined = 0 ;
608
+ struct namespace_t * ns_list ;
609
+ size_t ns_len ;
610
+
611
+ /*
612
+ * We have to open the file descriptors first, since after we join the
613
+ * mnt or user namespaces we might no longer be able to access the
614
+ * paths.
615
+ */
616
+ to_join = __open_namespaces (nsspec , & ns_list , & ns_len );
617
+
618
+ /*
619
+ * We first try to join all non-userns namespaces to join any namespaces
620
+ * that we might not be able to join once we switch credentials to the
621
+ * container's userns. We then join the user namespace, and then try to
622
+ * join any remaining namespaces (this last step is needed for rootless
623
+ * containers -- we don't get setns(2) permissions until we join the userns
624
+ * and get CAP_SYS_ADMIN).
625
+ *
626
+ * Splitting the joins this way is necessary for containers that are
627
+ * configured to join some externally-created namespace but are also
628
+ * configured to join an unrelated user namespace.
629
+ *
630
+ * This is similar to what nsenter(1) seems to do in practice.
631
+ */
632
+ joined |= __join_namespaces (to_join & ~(joined | CLONE_NEWUSER ), ns_list , ns_len );
633
+ joined |= __join_namespaces (CLONE_NEWUSER , ns_list , ns_len );
634
+ joined |= __join_namespaces (to_join & ~(joined | CLONE_NEWUSER ), ns_list , ns_len );
635
+
636
+ /* Verify that we joined all of the namespaces. */
637
+ __close_namespaces (to_join , joined , ns_list , ns_len );
523
638
}
524
639
525
640
static inline int sane_kill (pid_t pid , int signum )
0 commit comments