unshare: new applet
function old new delta unshare_main - 873 +873 .rodata 154444 155131 +687 packed_usage 30329 30520 +191 unshare_longopts - 106 +106 mount_namespaces - 99 +99 mount_or_die - 51 +51 ns_list - 48 +48 wait_for_exitstatus - 41 +41 opt_str - 17 +17 applet_names 2510 2518 +8 applet_main 2912 2920 +8 applet_suid 91 92 +1 applet_install_loc 182 183 +1 ------------------------------------------------------------------------------ (add/remove: 8/0 grow/shrink: 6/0 up/down: 2131/0) Total: 2131 bytes text data bss dec hex filename 826110 4070 9080 839260 cce5c busybox_old 827961 4078 9080 841119 cd59f busybox_unstripped Signed-off-by: Bartosz Golaszewski <bartekgola@gmail.com> Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
This commit is contained in:
		
				
					committed by
					
						 Denys Vlasenko
						Denys Vlasenko
					
				
			
			
				
	
			
			
			
						parent
						
							52977a7d60
						
					
				
				
					commit
					29b33b63d4
				
			
							
								
								
									
										380
									
								
								util-linux/unshare.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										380
									
								
								util-linux/unshare.c
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,380 @@ | ||||
| /* vi: set sw=4 ts=4: */ | ||||
| /* | ||||
|  * Mini unshare implementation for busybox. | ||||
|  * | ||||
|  * Copyright (C) 2016 by Bartosz Golaszewski <bartekgola@gmail.com> | ||||
|  * | ||||
|  * Licensed under GPLv2 or later, see file LICENSE in this source tree. | ||||
|  */ | ||||
|  | ||||
| //config:config UNSHARE | ||||
| //config:	bool "unshare" | ||||
| //config:	default y | ||||
| //config:	depends on LONG_OPTS && !NOMMU | ||||
| //config:	select PLATFORM_LINUX | ||||
| //config:	help | ||||
| //config:	  Run program with some namespaces unshared from parent. | ||||
|  | ||||
| // depends on LONG_OPTS: it is awkward to exclude code which handles --propagation | ||||
| // and --setgroups based on LONG_OPTS, so instead applet requires LONG_OPTS. | ||||
| // depends on !NOMMU: we need fork() | ||||
|  | ||||
| //applet:IF_UNSHARE(APPLET(unshare, BB_DIR_USR_BIN, BB_SUID_DROP)) | ||||
|  | ||||
| //kbuild:lib-$(CONFIG_UNSHARE) += unshare.o | ||||
|  | ||||
| //usage:#define unshare_trivial_usage | ||||
| //usage:       "[OPTIONS] [PROG [ARGS]]" | ||||
| //usage:#define unshare_full_usage "\n" | ||||
| //usage:     "\n	-m, --mount[=FILE]	Unshare mount namespace" | ||||
| //usage:     "\n	-u, --uts[=FILE]	Unshare UTS namespace (hostname etc.)" | ||||
| //usage:     "\n	-i, --ipc[=FILE]	Unshare System V IPC namespace" | ||||
| //usage:     "\n	-n, --net[=FILE]	Unshare network namespace" | ||||
| //usage:     "\n	-p, --pid[=FILE]	Unshare PID namespace" | ||||
| //usage:     "\n	-U, --user[=FILE}	Unshare user namespace" | ||||
| //usage:     "\n	-f, --fork		Fork before execing PROG" | ||||
| //usage:     "\n	-r, --map-root-user	Map current user to root (implies -u)" | ||||
| //usage:     "\n	--mount-proc[=DIR]	Mount /proc filesystem first (implies -m)" | ||||
| //usage:     "\n	--propagation slave|shared|private|unchanged" | ||||
| //usage:     "\n				Modify mount propagation in mount namespace" | ||||
| //usage:     "\n	--setgroups allow|deny	Control the setgroups syscall in user namespaces" | ||||
|  | ||||
| #include <sched.h> | ||||
| #include <sys/mount.h> | ||||
| #include "libbb.h" | ||||
|  | ||||
| static void mount_or_die(const char *source, const char *target, | ||||
|                  const char *fstype, unsigned long mountflags) | ||||
| { | ||||
| 	if (mount(source, target, fstype, mountflags, NULL)) { | ||||
| 		bb_perror_msg_and_die("can't mount %s on %s (flags:0x%lx)", | ||||
| 			source, target, mountflags); | ||||
| 		/* fstype is always either NULL or "proc". | ||||
| 		 * "proc" is only used to mount /proc. | ||||
| 		 * No need to clutter up error message with fstype, | ||||
| 		 * it is easily deductible. | ||||
| 		 */ | ||||
| 	} | ||||
| } | ||||
|  | ||||
| // TODO: move to libbb | ||||
| static int wait_for_exitstatus(pid_t pid) | ||||
| { | ||||
| 	int exit_status, n; | ||||
|  | ||||
| 	n = safe_waitpid(pid, &exit_status, 0); | ||||
| 	if (n < 0) | ||||
| 		bb_perror_msg_and_die("waitpid"); | ||||
| 	return exit_status; | ||||
| } | ||||
|  | ||||
| /* | ||||
|  * Longest possible path to a procfs file used in unshare. Must be able to | ||||
|  * contain the '/proc/' string, the '/ns/user' string which is the longest | ||||
|  * namespace name and a 32-bit integer representing the process ID. | ||||
|  */ | ||||
| #define PATH_PROC_SETGROUPS	"/proc/self/setgroups" | ||||
| #define PATH_PROC_UIDMAP	"/proc/self/uid_map" | ||||
| #define PATH_PROC_GIDMAP	"/proc/self/gid_map" | ||||
|  | ||||
| struct namespace_descr { | ||||
| 	int flag; | ||||
| 	const char nsfile4[4]; | ||||
| }; | ||||
|  | ||||
| struct namespace_ctx { | ||||
| 	char *path; | ||||
| }; | ||||
|  | ||||
| enum { | ||||
| 	OPT_mount	= 1 << 0, | ||||
| 	OPT_uts		= 1 << 1, | ||||
| 	OPT_ipc		= 1 << 2, | ||||
| 	OPT_network	= 1 << 3, | ||||
| 	OPT_pid		= 1 << 4, | ||||
| 	OPT_user	= 1 << 5, /* OPT_user, NS_USR_POS, and ns_list[] index must match! */ | ||||
| 	OPT_fork	= 1 << 6, | ||||
| 	OPT_map_root	= 1 << 7, | ||||
| 	OPT_mount_proc	= 1 << 8, | ||||
| 	OPT_propagation	= 1 << 9, | ||||
| 	OPT_setgroups	= 1 << 10, | ||||
| }; | ||||
| enum { | ||||
| 	NS_MNT_POS = 0, | ||||
| 	NS_UTS_POS, | ||||
| 	NS_IPC_POS, | ||||
| 	NS_NET_POS, | ||||
| 	NS_PID_POS, | ||||
| 	NS_USR_POS, /* OPT_user, NS_USR_POS, and ns_list[] index must match! */ | ||||
| 	NS_COUNT, | ||||
| }; | ||||
| static const struct namespace_descr ns_list[] = { | ||||
| 	{ CLONE_NEWNS,   "mnt"  }, | ||||
| 	{ CLONE_NEWUTS,  "uts"  }, | ||||
| 	{ CLONE_NEWIPC,  "ipc"  }, | ||||
| 	{ CLONE_NEWNET,  "net"  }, | ||||
| 	{ CLONE_NEWPID,  "pid"  }, | ||||
| 	{ CLONE_NEWUSER, "user" }, /* OPT_user, NS_USR_POS, and ns_list[] index must match! */ | ||||
| }; | ||||
|  | ||||
| /* | ||||
|  * Upstream unshare doesn't support short options for --mount-proc, | ||||
|  * --propagation, --setgroups. | ||||
|  * Optional arguments (namespace mountpoints) exist only for long opts, | ||||
|  * we are forced to use "fake" letters for them. | ||||
|  * '+': stop at first non-option. | ||||
|  */ | ||||
| static const char opt_str[] = "+muinpU""fr""\xfd::""\xfe:""\xff:"; | ||||
| static const char unshare_longopts[] ALIGN1 = | ||||
| 	"mount\0"		Optional_argument	"\xf0" | ||||
| 	"uts\0"			Optional_argument	"\xf1" | ||||
| 	"ipc\0"			Optional_argument	"\xf2" | ||||
| 	"network\0"		Optional_argument	"\xf3" | ||||
| 	"pid\0"			Optional_argument	"\xf4" | ||||
| 	"user\0"		Optional_argument	"\xf5" | ||||
| 	"fork\0"		No_argument		"f" | ||||
| 	"map-root-user\0"	No_argument		"r" | ||||
| 	"mount-proc\0"		Optional_argument	"\xfd" | ||||
| 	"propagation\0"		Required_argument	"\xfe" | ||||
| 	"setgroups\0"		Required_argument	"\xff" | ||||
| ; | ||||
|  | ||||
| /* Ugly-looking string reuse trick */ | ||||
| #define PRIVATE_STR   "private\0""unchanged\0""shared\0""slave\0" | ||||
| #define PRIVATE_UNCHANGED_SHARED_SLAVE   PRIVATE_STR | ||||
|  | ||||
| static unsigned long parse_propagation(const char *prop_str) | ||||
| { | ||||
| 	int i = index_in_strings(PRIVATE_UNCHANGED_SHARED_SLAVE, prop_str); | ||||
| 	if (i < 0) | ||||
| 		bb_error_msg_and_die("unrecognized: --%s=%s", "propagation", prop_str); | ||||
| 	if (i == 0) | ||||
| 		return MS_REC | MS_PRIVATE; | ||||
| 	if (i == 1) | ||||
| 		return 0; | ||||
| 	if (i == 2) | ||||
| 		return MS_REC | MS_SHARED; | ||||
| 	return MS_REC | MS_SLAVE; | ||||
| } | ||||
|  | ||||
| static void mount_namespaces(pid_t pid, struct namespace_ctx *ns_ctx_list) | ||||
| { | ||||
| 	const struct namespace_descr *ns; | ||||
| 	struct namespace_ctx *ns_ctx; | ||||
| 	int i; | ||||
|  | ||||
| 	for (i = 0; i < NS_COUNT; i++) { | ||||
| 		char nsf[sizeof("/proc/%u/ns/AAAA") + sizeof(int)*3]; | ||||
|  | ||||
| 		ns = &ns_list[i]; | ||||
| 		ns_ctx = &ns_ctx_list[i]; | ||||
| 		if (!ns_ctx->path) | ||||
| 			continue; | ||||
| 		sprintf(nsf, "/proc/%u/ns/%.4s", (unsigned)pid, ns->nsfile4); | ||||
| 		mount_or_die(nsf, ns_ctx->path, NULL, MS_BIND); | ||||
| 	} | ||||
| } | ||||
|  | ||||
| int unshare_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; | ||||
| int unshare_main(int argc UNUSED_PARAM, char **argv) | ||||
| { | ||||
| 	int i; | ||||
| 	unsigned int opts; | ||||
| 	int unsflags; | ||||
| 	uintptr_t need_mount; | ||||
| 	const char *proc_mnt_target; | ||||
| 	const char *prop_str; | ||||
| 	const char *setgrp_str; | ||||
| 	unsigned long prop_flags; | ||||
| 	uid_t reuid = geteuid(); | ||||
| 	gid_t regid = getegid(); | ||||
| 	struct fd_pair fdp; | ||||
| 	pid_t child = child; /* for compiler */ | ||||
| 	struct namespace_ctx ns_ctx_list[NS_COUNT]; | ||||
|  | ||||
| 	memset(ns_ctx_list, 0, sizeof(ns_ctx_list)); | ||||
| 	proc_mnt_target = "/proc"; | ||||
| 	prop_str = PRIVATE_STR; | ||||
| 	setgrp_str = NULL; | ||||
|  | ||||
| 	opt_complementary = | ||||
| 		"\xf0""m" /* long opts (via their "fake chars") imply short opts */ | ||||
| 		":\xf1""u" | ||||
| 		":\xf2""i" | ||||
| 		":\xf3""n" | ||||
| 		":\xf4""p" | ||||
| 		":\xf5""U" | ||||
| 		":ru"	   /* --map-root-user or -r implies -u */ | ||||
| 		":\xfd""m" /* --mount-proc implies -m */ | ||||
| 	; | ||||
| 	applet_long_options = unshare_longopts; | ||||
| 	opts = getopt32(argv, opt_str, | ||||
| 			&proc_mnt_target, &prop_str, &setgrp_str, | ||||
| 			&ns_ctx_list[NS_MNT_POS].path, | ||||
| 			&ns_ctx_list[NS_UTS_POS].path, | ||||
| 			&ns_ctx_list[NS_IPC_POS].path, | ||||
| 			&ns_ctx_list[NS_NET_POS].path, | ||||
| 			&ns_ctx_list[NS_PID_POS].path, | ||||
| 			&ns_ctx_list[NS_USR_POS].path | ||||
| 	); | ||||
| 	argv += optind; | ||||
| 	//bb_error_msg("opts:0x%x", opts); | ||||
| 	//bb_error_msg("mount:%s", ns_ctx_list[NS_MNT_POS].path); | ||||
| 	//bb_error_msg("proc_mnt_target:%s", proc_mnt_target); | ||||
| 	//bb_error_msg("prop_str:%s", prop_str); | ||||
| 	//bb_error_msg("setgrp_str:%s", setgrp_str); | ||||
| 	//exit(1); | ||||
|  | ||||
| 	if (setgrp_str) { | ||||
| 		if (strcmp(setgrp_str, "allow") == 0) { | ||||
| 			if (opts & OPT_map_root) { | ||||
| 				bb_error_msg_and_die( | ||||
| 					"--setgroups=allow and --map-root-user " | ||||
| 					"are mutually exclusive" | ||||
| 				); | ||||
| 			} | ||||
| 		} else { | ||||
| 			/* It's not "allow", must be "deny" */ | ||||
| 			if (strcmp(setgrp_str, "deny") != 0) | ||||
| 				bb_error_msg_and_die("unrecognized: --%s=%s", | ||||
| 					"setgroups", setgrp_str); | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	unsflags = 0; | ||||
| 	need_mount = 0; | ||||
| 	for (i = 0; i < NS_COUNT; i++) { | ||||
| 		const struct namespace_descr *ns = &ns_list[i]; | ||||
| 		struct namespace_ctx *ns_ctx = &ns_ctx_list[i]; | ||||
|  | ||||
| 		if (opts & (1 << i)) | ||||
| 			unsflags |= ns->flag; | ||||
|  | ||||
| 		need_mount |= (uintptr_t)(ns_ctx->path); | ||||
| 	} | ||||
| 	/* need_mount != 0 if at least one FILE was given */ | ||||
|  | ||||
| 	prop_flags = MS_REC | MS_PRIVATE; | ||||
| 	/* Silently ignore --propagation if --mount is not requested. */ | ||||
| 	if (opts & OPT_mount) | ||||
| 		prop_flags = parse_propagation(prop_str); | ||||
|  | ||||
| 	/* | ||||
| 	 * Special case: if we were requested to unshare the mount namespace | ||||
| 	 * AND to make any namespace persistent (by bind mounting it) we need | ||||
| 	 * to spawn a child process which will wait for the parent to call | ||||
| 	 * unshare(), then mount parent's namespaces while still in the | ||||
| 	 * previous namespace. | ||||
| 	 */ | ||||
| 	fdp.wr = -1; | ||||
| 	if (need_mount && (opts & OPT_mount)) { | ||||
| 		/* | ||||
| 		 * Can't use getppid() in child, as we can be unsharing the | ||||
| 		 * pid namespace. | ||||
| 		 */ | ||||
| 		pid_t ppid = getpid(); | ||||
|  | ||||
| 		xpiped_pair(fdp); | ||||
|  | ||||
| 		child = xfork(); | ||||
| 		if (child == 0) { | ||||
| 			/* Child */ | ||||
| 			close(fdp.wr); | ||||
|  | ||||
| 			/* Wait until parent calls unshare() */ | ||||
| 			read(fdp.rd, ns_ctx_list, 1); /* ...using bogus buffer */ | ||||
| 			/*close(fdp.rd);*/ | ||||
|  | ||||
| 			/* Mount parent's unshared namespaces. */ | ||||
| 			mount_namespaces(ppid, ns_ctx_list); | ||||
| 			return EXIT_SUCCESS; | ||||
| 		} | ||||
| 		/* Parent continues */ | ||||
| 	} | ||||
|  | ||||
| 	if (unshare(unsflags) != 0) | ||||
| 		bb_perror_msg_and_die("unshare(0x%x)", unsflags); | ||||
|  | ||||
| 	if (fdp.wr >= 0) { | ||||
| 		close(fdp.wr); /* Release child */ | ||||
| 		/*close(fdp.rd);*/ | ||||
| 	} | ||||
|  | ||||
| 	if (need_mount) { | ||||
| 		/* Wait for the child to finish mounting the namespaces. */ | ||||
| 		if (opts & OPT_mount) { | ||||
| 			int exit_status = wait_for_exitstatus(child); | ||||
| 			if (WIFEXITED(exit_status) && | ||||
| 			    WEXITSTATUS(exit_status) != EXIT_SUCCESS) | ||||
| 				return WEXITSTATUS(exit_status); | ||||
| 		} else { | ||||
| 			/* | ||||
| 			 * Regular way - we were requested to mount some other | ||||
| 			 * namespaces: mount them after the call to unshare(). | ||||
| 			 */ | ||||
| 			mount_namespaces(getpid(), ns_ctx_list); | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	/* | ||||
| 	 * When we're unsharing the pid namespace, it's not the process that | ||||
| 	 * calls unshare() that is put into the new namespace, but its first | ||||
| 	 * child. The user may want to use this option to spawn a new process | ||||
| 	 * that'll become PID 1 in this new namespace. | ||||
| 	 */ | ||||
| 	if (opts & OPT_fork) { | ||||
| 		pid_t pid = xfork(); | ||||
| 		if (pid > 0) { | ||||
| 			/* Parent */ | ||||
| 			int exit_status = wait_for_exitstatus(pid); | ||||
| 			if (WIFSIGNALED(exit_status)) | ||||
| 				kill_myself_with_sig(WTERMSIG(exit_status)); | ||||
| 			return WEXITSTATUS(exit_status); | ||||
| 		} | ||||
| 		/* Child continues */ | ||||
| 	} | ||||
|  | ||||
| 	if (opts & OPT_map_root) { | ||||
| 		char uidmap_buf[sizeof("%u 0 1") + sizeof(int)*3]; | ||||
|  | ||||
| 		/* | ||||
| 		 * Since Linux 3.19 unprivileged writing of /proc/self/gid_map | ||||
| 		 * has been disabled unless /proc/self/setgroups is written | ||||
| 		 * first to permanently disable the ability to call setgroups | ||||
| 		 * in that user namespace. | ||||
| 		 */ | ||||
| 		xopen_xwrite_close(PATH_PROC_SETGROUPS, "deny"); | ||||
| 		sprintf(uidmap_buf, "%u 0 1", (unsigned)reuid); | ||||
| 		xopen_xwrite_close(PATH_PROC_UIDMAP, uidmap_buf); | ||||
| 		sprintf(uidmap_buf, "%u 0 1", (unsigned)regid); | ||||
| 		xopen_xwrite_close(PATH_PROC_GIDMAP, uidmap_buf); | ||||
| 	} else | ||||
| 	if (setgrp_str) { | ||||
| 		/* Write "allow" or "deny" */ | ||||
| 		xopen_xwrite_close(PATH_PROC_SETGROUPS, setgrp_str); | ||||
| 	} | ||||
|  | ||||
| 	if (opts & OPT_mount) { | ||||
| 		mount_or_die("none", "/", NULL, prop_flags); | ||||
| 	} | ||||
|  | ||||
| 	if (opts & OPT_mount_proc) { | ||||
| 		/* | ||||
| 		 * When creating a new pid namespace, we might want the pid | ||||
| 		 * subdirectories in /proc to remain consistent with the new | ||||
| 		 * process IDs. Without --mount-proc the pids in /proc would | ||||
| 		 * still reflect the old pid namespace. This is why we make | ||||
| 		 * /proc private here and then do a fresh mount. | ||||
| 		 */ | ||||
| 		mount_or_die("none", proc_mnt_target, NULL, MS_PRIVATE | MS_REC); | ||||
| 		mount_or_die("proc", proc_mnt_target, "proc", MS_NOSUID | MS_NOEXEC | MS_NODEV); | ||||
| 	} | ||||
|  | ||||
| 	if (argv[0]) { | ||||
| 		BB_EXECVP_or_die(argv); | ||||
| 	} | ||||
| 	/* unshare from util-linux 2.27.1, despite not documenting it, | ||||
| 	 * runs a login shell (argv0="-sh") if no PROG is given | ||||
| 	 */ | ||||
| 	run_shell(getenv("SHELL"), /*login:*/ 1, NULL, NULL); | ||||
| } | ||||
		Reference in New Issue
	
	Block a user