busybox/util-linux/unshare.c

380 lines
11 KiB
C
Raw Normal View History

/* vi: set sw=4 ts=4: */
/*
* Mini unshare implementation for busybox.
*
* Copyright (C) 2016 by Bartosz Golaszewski <bartekgola@gmail.com>
*
* Licensed under GPLv2 or later, see file LICENSE in this source tree.
*/
//config:config UNSHARE
//config: bool "unshare (9.2 kb)"
//config: default y
//config: depends on !NOMMU
//config: select PLATFORM_LINUX
//config: select LONG_OPTS
//config: help
//config: Run program with some namespaces unshared from parent.
// needs LONG_OPTS: it is awkward to exclude code which handles --propagation
// and --setgroups based on LONG_OPTS, so instead applet requires LONG_OPTS.
// depends on !NOMMU: we need fork()
//applet:IF_UNSHARE(APPLET(unshare, BB_DIR_USR_BIN, BB_SUID_DROP))
//kbuild:lib-$(CONFIG_UNSHARE) += unshare.o
//usage:#define unshare_trivial_usage
//usage: "[OPTIONS] [PROG [ARGS]]"
//usage:#define unshare_full_usage "\n"
//usage: "\n -m,--mount[=FILE] Unshare mount namespace"
//usage: "\n -u,--uts[=FILE] Unshare UTS namespace (hostname etc.)"
//usage: "\n -i,--ipc[=FILE] Unshare System V IPC namespace"
//usage: "\n -n,--net[=FILE] Unshare network namespace"
//usage: "\n -p,--pid[=FILE] Unshare PID namespace"
//usage: "\n -U,--user[=FILE] Unshare user namespace"
//usage: "\n -f,--fork Fork before execing PROG"
//usage: "\n -r,--map-root-user Map current user to root (implies -U)"
//usage: "\n --mount-proc[=DIR] Mount /proc filesystem first (implies -m)"
//usage: "\n --propagation slave|shared|private|unchanged"
//usage: "\n Modify mount propagation in mount namespace"
//usage: "\n --setgroups allow|deny Control the setgroups syscall in user namespaces"
#include <sched.h>
#ifndef CLONE_NEWUTS
# define CLONE_NEWUTS 0x04000000
#endif
#ifndef CLONE_NEWIPC
# define CLONE_NEWIPC 0x08000000
#endif
#ifndef CLONE_NEWUSER
# define CLONE_NEWUSER 0x10000000
#endif
#ifndef CLONE_NEWPID
# define CLONE_NEWPID 0x20000000
#endif
#ifndef CLONE_NEWNET
# define CLONE_NEWNET 0x40000000
#endif
#include <sys/mount.h>
#ifndef MS_REC
# define MS_REC (1 << 14)
#endif
#ifndef MS_PRIVATE
# define MS_PRIVATE (1 << 18)
#endif
#ifndef MS_SLAVE
# define MS_SLAVE (1 << 19)
#endif
#ifndef MS_SHARED
# define MS_SHARED (1 << 20)
#endif
#include "libbb.h"
static void mount_or_die(const char *source, const char *target,
const char *fstype, unsigned long mountflags)
{
if (mount(source, target, fstype, mountflags, NULL)) {
bb_perror_msg_and_die("can't mount %s on %s (flags:0x%lx)",
source, target, mountflags);
/* fstype is always either NULL or "proc".
* "proc" is only used to mount /proc.
* No need to clutter up error message with fstype,
* it is easily deductible.
*/
}
}
#define PATH_PROC_SETGROUPS "/proc/self/setgroups"
#define PATH_PROC_UIDMAP "/proc/self/uid_map"
#define PATH_PROC_GIDMAP "/proc/self/gid_map"
struct namespace_descr {
int flag;
const char nsfile4[4];
};
struct namespace_ctx {
char *path;
};
enum {
OPT_mount = 1 << 0,
OPT_uts = 1 << 1,
OPT_ipc = 1 << 2,
OPT_net = 1 << 3,
OPT_pid = 1 << 4,
OPT_user = 1 << 5, /* OPT_user, NS_USR_POS, and ns_list[] index must match! */
OPT_fork = 1 << 6,
OPT_map_root = 1 << 7,
OPT_mount_proc = 1 << 8,
OPT_propagation = 1 << 9,
OPT_setgroups = 1 << 10,
};
enum {
NS_MNT_POS = 0,
NS_UTS_POS,
NS_IPC_POS,
NS_NET_POS,
NS_PID_POS,
NS_USR_POS, /* OPT_user, NS_USR_POS, and ns_list[] index must match! */
NS_COUNT,
};
static const struct namespace_descr ns_list[] = {
{ CLONE_NEWNS, "mnt" },
{ CLONE_NEWUTS, "uts" },
{ CLONE_NEWIPC, "ipc" },
{ CLONE_NEWNET, "net" },
{ CLONE_NEWPID, "pid" },
{ CLONE_NEWUSER, "user" }, /* OPT_user, NS_USR_POS, and ns_list[] index must match! */
};
/*
* Upstream unshare doesn't support short options for --mount-proc,
* --propagation, --setgroups.
* Optional arguments (namespace mountpoints) exist only for long opts,
* we are forced to use "fake" letters for them.
* '+': stop at first non-option.
*/
static const char opt_str[] ALIGN1 = "+muinpU""fr""\xfd::""\xfe:""\xff:";
static const char unshare_longopts[] ALIGN1 =
"mount\0" Optional_argument "\xf0"
"uts\0" Optional_argument "\xf1"
"ipc\0" Optional_argument "\xf2"
"net\0" Optional_argument "\xf3"
"pid\0" Optional_argument "\xf4"
"user\0" Optional_argument "\xf5"
"fork\0" No_argument "f"
"map-root-user\0" No_argument "r"
"mount-proc\0" Optional_argument "\xfd"
"propagation\0" Required_argument "\xfe"
"setgroups\0" Required_argument "\xff"
;
/* Ugly-looking string reuse trick */
#define PRIVATE_STR "private\0""unchanged\0""shared\0""slave\0"
#define PRIVATE_UNCHANGED_SHARED_SLAVE PRIVATE_STR
static unsigned long parse_propagation(const char *prop_str)
{
int i = index_in_strings(PRIVATE_UNCHANGED_SHARED_SLAVE, prop_str);
if (i < 0)
bb_error_msg_and_die("unrecognized: --%s=%s", "propagation", prop_str);
if (i == 0)
return MS_REC | MS_PRIVATE;
if (i == 1)
return 0;
if (i == 2)
return MS_REC | MS_SHARED;
return MS_REC | MS_SLAVE;
}
static void mount_namespaces(pid_t pid, struct namespace_ctx *ns_ctx_list)
{
const struct namespace_descr *ns;
struct namespace_ctx *ns_ctx;
int i;
for (i = 0; i < NS_COUNT; i++) {
char nsf[sizeof("/proc/%u/ns/AAAA") + sizeof(int)*3];
ns = &ns_list[i];
ns_ctx = &ns_ctx_list[i];
if (!ns_ctx->path)
continue;
sprintf(nsf, "/proc/%u/ns/%.4s", (unsigned)pid, ns->nsfile4);
mount_or_die(nsf, ns_ctx->path, NULL, MS_BIND);
}
}
int unshare_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
int unshare_main(int argc UNUSED_PARAM, char **argv)
{
int i;
unsigned int opts;
int unsflags;
uintptr_t need_mount;
const char *proc_mnt_target;
const char *prop_str;
const char *setgrp_str;
unsigned long prop_flags;
uid_t reuid = geteuid();
gid_t regid = getegid();
struct fd_pair fdp;
pid_t child = child; /* for compiler */
struct namespace_ctx ns_ctx_list[NS_COUNT];
memset(ns_ctx_list, 0, sizeof(ns_ctx_list));
proc_mnt_target = "/proc";
prop_str = PRIVATE_STR;
setgrp_str = NULL;
opt_complementary =
"\xf0""m" /* long opts (via their "fake chars") imply short opts */
":\xf1""u"
":\xf2""i"
":\xf3""n"
":\xf4""p"
":\xf5""U"
":ru" /* --map-root-user or -r implies -u */
":\xfd""m" /* --mount-proc implies -m */
;
getopt32: remove applet_long_options FEATURE_GETOPT_LONG made dependent on LONG_OPTS. The folloving options are removed, now LONG_OPTS enables long options for affected applets: FEATURE_ENV_LONG_OPTIONS FEATURE_EXPAND_LONG_OPTIONS FEATURE_UNEXPAND_LONG_OPTIONS FEATURE_MKDIR_LONG_OPTIONS FEATURE_MV_LONG_OPTIONS FEATURE_RMDIR_LONG_OPTIONS FEATURE_ADDGROUP_LONG_OPTIONS FEATURE_ADDUSER_LONG_OPTIONS FEATURE_HWCLOCK_LONG_OPTIONS FEATURE_NSENTER_LONG_OPTS FEATURE_CHCON_LONG_OPTIONS FEATURE_RUNCON_LONG_OPTIONS They either had a small number of long options, or their long options are essential. Example: upstream addgroup and adduser have ONLY longopts, we should probably go further and get rid of non-standard short options. To this end, make addgroup and adduser "select LONG_OPTS". We had this breakage caused by us even in our own package! #if ENABLE_LONG_OPTS || !ENABLE_ADDGROUP /* We try to use --gid, not -g, because "standard" addgroup * has no short option -g, it has only long --gid. */ argv[1] = (char*)"--gid"; #else /* Breaks if system in fact does NOT use busybox addgroup */ argv[1] = (char*)"-g"; #endif xargs: its lone longopt no longer depends on DESKTOP, only on LONG_OPTS. hwclock TODO: get rid of incompatible -t, -l aliases to --systz, --localtime Shorten help texts by omitting long option when short opt alternative exists. Reduction of size comes from the fact that store of an immediate (an address of longopts) to a fixed address (global variable) is a longer insn than pushing that immediate or passing it in a register. This effect is CPU-agnostic. function old new delta getopt32 1350 22 -1328 vgetopt32 - 1318 +1318 getopt32long - 24 +24 tftpd_main 562 567 +5 scan_recursive 376 380 +4 collect_cpu 545 546 +1 date_main 1096 1095 -1 hostname_main 262 259 -3 uname_main 259 255 -4 setpriv_main 362 358 -4 rmdir_main 191 187 -4 mv_main 562 558 -4 ipcalc_main 548 544 -4 ifenslave_main 641 637 -4 gzip_main 192 188 -4 gunzip_main 77 73 -4 fsfreeze_main 81 77 -4 flock_main 318 314 -4 deluser_main 337 333 -4 cp_main 374 370 -4 chown_main 175 171 -4 applet_long_options 4 - -4 xargs_main 894 889 -5 wget_main 2540 2535 -5 udhcpc_main 2767 2762 -5 touch_main 436 431 -5 tar_main 1014 1009 -5 start_stop_daemon_main 1033 1028 -5 sed_main 682 677 -5 script_main 1082 1077 -5 run_parts_main 330 325 -5 rtcwake_main 459 454 -5 od_main 2169 2164 -5 nl_main 201 196 -5 modprobe_main 773 768 -5 mkdir_main 160 155 -5 ls_main 568 563 -5 install_main 773 768 -5 hwclock_main 411 406 -5 getopt_main 622 617 -5 fstrim_main 256 251 -5 env_main 198 193 -5 dumpleases_main 635 630 -5 dpkg_main 3991 3986 -5 diff_main 1355 1350 -5 cryptpw_main 233 228 -5 cpio_main 593 588 -5 conspy_main 1135 1130 -5 chpasswd_main 313 308 -5 adduser_main 887 882 -5 addgroup_main 416 411 -5 ftpgetput_main 351 345 -6 get_terminal_width_height 242 234 -8 expand_main 690 680 -10 static.expand_longopts 18 - -18 static.unexpand_longopts 27 - -27 mkdir_longopts 28 - -28 env_longopts 30 - -30 static.ifenslave_longopts 34 - -34 mv_longopts 46 - -46 static.rmdir_longopts 48 - -48 packed_usage 31739 31687 -52 ------------------------------------------------------------------------------ (add/remove: 2/8 grow/shrink: 3/49 up/down: 1352/-1840) Total: -488 bytes text data bss dec hex filename 915681 485 6880 923046 e15a6 busybox_old 915428 485 6876 922789 e14a5 busybox_unstripped Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
2017-08-08 20:08:18 +05:30
opts = getopt32long(argv, opt_str, unshare_longopts,
&proc_mnt_target, &prop_str, &setgrp_str,
&ns_ctx_list[NS_MNT_POS].path,
&ns_ctx_list[NS_UTS_POS].path,
&ns_ctx_list[NS_IPC_POS].path,
&ns_ctx_list[NS_NET_POS].path,
&ns_ctx_list[NS_PID_POS].path,
&ns_ctx_list[NS_USR_POS].path
);
argv += optind;
//bb_error_msg("opts:0x%x", opts);
//bb_error_msg("mount:%s", ns_ctx_list[NS_MNT_POS].path);
//bb_error_msg("proc_mnt_target:%s", proc_mnt_target);
//bb_error_msg("prop_str:%s", prop_str);
//bb_error_msg("setgrp_str:%s", setgrp_str);
//exit(1);
if (setgrp_str) {
if (strcmp(setgrp_str, "allow") == 0) {
if (opts & OPT_map_root) {
bb_error_msg_and_die(
"--setgroups=allow and --map-root-user "
"are mutually exclusive"
);
}
} else {
/* It's not "allow", must be "deny" */
if (strcmp(setgrp_str, "deny") != 0)
bb_error_msg_and_die("unrecognized: --%s=%s",
"setgroups", setgrp_str);
}
}
unsflags = 0;
need_mount = 0;
for (i = 0; i < NS_COUNT; i++) {
const struct namespace_descr *ns = &ns_list[i];
struct namespace_ctx *ns_ctx = &ns_ctx_list[i];
if (opts & (1 << i))
unsflags |= ns->flag;
need_mount |= (uintptr_t)(ns_ctx->path);
}
/* need_mount != 0 if at least one FILE was given */
prop_flags = MS_REC | MS_PRIVATE;
/* Silently ignore --propagation if --mount is not requested. */
if (opts & OPT_mount)
prop_flags = parse_propagation(prop_str);
/*
* Special case: if we were requested to unshare the mount namespace
* AND to make any namespace persistent (by bind mounting it) we need
* to spawn a child process which will wait for the parent to call
* unshare(), then mount parent's namespaces while still in the
* previous namespace.
*/
fdp.wr = -1;
if (need_mount && (opts & OPT_mount)) {
/*
* Can't use getppid() in child, as we can be unsharing the
* pid namespace.
*/
pid_t ppid = getpid();
xpiped_pair(fdp);
child = xfork();
if (child == 0) {
/* Child */
close(fdp.wr);
/* Wait until parent calls unshare() */
read(fdp.rd, ns_ctx_list, 1); /* ...using bogus buffer */
/*close(fdp.rd);*/
/* Mount parent's unshared namespaces. */
mount_namespaces(ppid, ns_ctx_list);
return EXIT_SUCCESS;
}
/* Parent continues */
}
if (unshare(unsflags) != 0)
bb_perror_msg_and_die("unshare(0x%x)", unsflags);
if (fdp.wr >= 0) {
close(fdp.wr); /* Release child */
close(fdp.rd); /* should close fd, to not confuse exec'ed PROG */
}
if (need_mount) {
/* Wait for the child to finish mounting the namespaces. */
if (opts & OPT_mount) {
int exit_status = wait_for_exitstatus(child);
if (WIFEXITED(exit_status) &&
WEXITSTATUS(exit_status) != EXIT_SUCCESS)
return WEXITSTATUS(exit_status);
} else {
/*
* Regular way - we were requested to mount some other
* namespaces: mount them after the call to unshare().
*/
mount_namespaces(getpid(), ns_ctx_list);
}
}
/*
* When we're unsharing the pid namespace, it's not the process that
* calls unshare() that is put into the new namespace, but its first
* child. The user may want to use this option to spawn a new process
* that'll become PID 1 in this new namespace.
*/
if (opts & OPT_fork) {
xvfork_parent_waits_and_exits();
/* Child continues */
}
if (opts & OPT_map_root) {
char uidmap_buf[sizeof("%u 0 1") + sizeof(int)*3];
/*
* Since Linux 3.19 unprivileged writing of /proc/self/gid_map
* has been disabled unless /proc/self/setgroups is written
* first to permanently disable the ability to call setgroups
* in that user namespace.
*/
xopen_xwrite_close(PATH_PROC_SETGROUPS, "deny");
sprintf(uidmap_buf, "%u 0 1", (unsigned)reuid);
xopen_xwrite_close(PATH_PROC_UIDMAP, uidmap_buf);
sprintf(uidmap_buf, "%u 0 1", (unsigned)regid);
xopen_xwrite_close(PATH_PROC_GIDMAP, uidmap_buf);
} else
if (setgrp_str) {
/* Write "allow" or "deny" */
xopen_xwrite_close(PATH_PROC_SETGROUPS, setgrp_str);
}
if (opts & OPT_mount) {
mount_or_die("none", "/", NULL, prop_flags);
}
if (opts & OPT_mount_proc) {
/*
* When creating a new pid namespace, we might want the pid
* subdirectories in /proc to remain consistent with the new
* process IDs. Without --mount-proc the pids in /proc would
* still reflect the old pid namespace. This is why we make
* /proc private here and then do a fresh mount.
*/
mount_or_die("none", proc_mnt_target, NULL, MS_PRIVATE | MS_REC);
mount_or_die("proc", proc_mnt_target, "proc", MS_NOSUID | MS_NOEXEC | MS_NODEV);
}
exec_prog_or_SHELL(argv);
}