c06c636604
This patch adds support for unprivileged user namespaces found in kernel versions 3.8.0 and later. In case of Nix, this is especially useful to prevent having to set up setuid wrappers. The implementation details about this patch can be found at the top of the file "sandbox_userns.patch". My first attempt of creating this patch was by modifying the SUID sandbox. Unfortunately this didn't work out well, because in the event of a sandbox failure, the host zygote process waits for an answer of the inner zygote with no timeout. Even if I'd have set a timeout, this would have been very ugly, giving users which don't have unprivileged user namespaces a delay on startup. An alternative approach to the mentioned problem would be to use select() on the host zygote, watching for changes stdout or stderr and the synchronization socket. But even that approach isn't feasible because it requires a whole bunch of even more patching. Patch was tested with older kernels (3.2.x, 3.7.x) and kernels without user namespace support enabled, where in case the feature is unavailable it reverts back to the previous behaviour (no zygote sandbox, only seccomp BPF). In order to support all Chromium channels, I manually changed the first hunk of the patch to not include the starting context of the diff, because there is a whitespace change in more recent versions of the Chromium source tree. See SVN revision 199882 for the change (revert in this case) in detail: http://src.chromium.org/viewvc/chrome?view=revision&revision=199882 Signed-off-by: aszlig <aszlig@redmoonstudios.org>
292 lines
11 KiB
Diff
292 lines
11 KiB
Diff
From a242351d8a32ea33e6337b928969cc9f715e314e Mon Sep 17 00:00:00 2001
|
|
From: aszlig <aszlig@redmoonstudios.org>
|
|
Date: Thu, 16 May 2013 14:17:56 +0200
|
|
Subject: [PATCH] zygote: Add support for user namespaces on Linux.
|
|
|
|
The implementation is done by patching the Zygote host to execute the sandbox
|
|
binary with CLONE_NEWUSER and setting the uid and gid mapping so that the child
|
|
process is using uid 0 and gid 0 which map to the current user of the parent.
|
|
Afterwards, the sandbox will continue as if it was called as a setuid binary.
|
|
|
|
In addition, this adds new_user_namespace as an option in process_util in order
|
|
to set the UID and GID mapping correctly. The reason for this is that just
|
|
passing CLONE_NEWUSER to clone_flags doesn't help in LaunchProcess(), because
|
|
without setting the mappings exec*() will clear the process's capability sets.
|
|
|
|
If the kernel doesn't support unprivileged user namespaces and the sandbox
|
|
binary doesn't have the setuid flag, the Zygote main process will run without a
|
|
sandbox. This is to mimic the behaviour if no SUID sandbox binary path is set.
|
|
|
|
Signed-off-by: aszlig <aszlig@redmoonstudios.org>
|
|
---
|
|
base/process_util.h | 4 ++
|
|
base/process_util_posix.cc | 71 +++++++++++++++++++++-
|
|
.../browser/zygote_host/zygote_host_impl_linux.cc | 28 +++++++--
|
|
content/zygote/zygote_main_linux.cc | 7 +++
|
|
sandbox/linux/suid/client/setuid_sandbox_client.cc | 8 +++
|
|
sandbox/linux/suid/client/setuid_sandbox_client.h | 4 ++
|
|
sandbox/linux/suid/common/sandbox.h | 1 +
|
|
7 files changed, 117 insertions(+), 6 deletions(-)
|
|
|
|
diff --git a/base/process_util.h b/base/process_util.h
|
|
index 6efc70c..0f0c74c 100644
|
|
--- a/base/process_util.h
|
|
+++ b/base/process_util.h
|
|
@@ -261,3 +261,4 @@ struct LaunchOptions {
|
|
+ , new_user_namespace(false)
|
|
#endif // OS_LINUX
|
|
#if defined(OS_CHROMEOS)
|
|
, ctrl_terminal_fd(-1)
|
|
@@ -332,6 +333,9 @@ struct LaunchOptions {
|
|
#if defined(OS_LINUX)
|
|
// If non-zero, start the process using clone(), using flags as provided.
|
|
int clone_flags;
|
|
+
|
|
+ // If true, start the process in a new user namespace.
|
|
+ bool new_user_namespace;
|
|
#endif // defined(OS_LINUX)
|
|
|
|
#if defined(OS_CHROMEOS)
|
|
diff --git a/base/process_util_posix.cc b/base/process_util_posix.cc
|
|
index 6f15130..cea07f0 100644
|
|
--- a/base/process_util_posix.cc
|
|
+++ b/base/process_util_posix.cc
|
|
@@ -34,6 +34,13 @@
|
|
#include "base/threading/platform_thread.h"
|
|
#include "base/threading/thread_restrictions.h"
|
|
|
|
+#if defined(OS_LINUX)
|
|
+#include <sched.h>
|
|
+#if !defined(CLONE_NEWUSER)
|
|
+#define CLONE_NEWUSER 0x10000000
|
|
+#endif
|
|
+#endif
|
|
+
|
|
#if defined(OS_CHROMEOS)
|
|
#include <sys/ioctl.h>
|
|
#endif
|
|
@@ -621,8 +628,19 @@ bool LaunchProcess(const std::vector<std::string>& argv,
|
|
|
|
pid_t pid;
|
|
#if defined(OS_LINUX)
|
|
- if (options.clone_flags) {
|
|
- pid = syscall(__NR_clone, options.clone_flags, 0, 0, 0);
|
|
+ int map_pipe_fd[2];
|
|
+ int flags = options.clone_flags;
|
|
+
|
|
+ if (options.new_user_namespace) {
|
|
+ flags |= CLONE_NEWUSER;
|
|
+ if (pipe(map_pipe_fd) < 0) {
|
|
+ DPLOG(ERROR) << "user namespace pipe";
|
|
+ return false;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if (options.clone_flags || options.new_user_namespace) {
|
|
+ pid = syscall(__NR_clone, flags, 0, 0, 0);
|
|
} else
|
|
#endif
|
|
{
|
|
@@ -635,6 +653,21 @@ bool LaunchProcess(const std::vector<std::string>& argv,
|
|
} else if (pid == 0) {
|
|
// Child process
|
|
|
|
+#if defined(OS_LINUX)
|
|
+ if (options.new_user_namespace) {
|
|
+ // Close the write end of the pipe so we get an EOF when the parent closes
|
|
+ // the FD. This is to avoid race conditions when the UID/GID mappings are
|
|
+ // written _after_ execvp().
|
|
+ close(map_pipe_fd[1]);
|
|
+
|
|
+ char dummy;
|
|
+ if (HANDLE_EINTR(read(map_pipe_fd[0], &dummy, 1)) != 0) {
|
|
+ RAW_LOG(ERROR, "Unexpected input in uid/gid mapping pipe.");
|
|
+ _exit(127);
|
|
+ }
|
|
+ }
|
|
+#endif
|
|
+
|
|
// DANGER: fork() rule: in the child, if you don't end up doing exec*(),
|
|
// you call _exit() instead of exit(). This is because _exit() does not
|
|
// call any previously-registered (in the parent) exit handlers, which
|
|
@@ -749,6 +782,40 @@ bool LaunchProcess(const std::vector<std::string>& argv,
|
|
_exit(127);
|
|
} else {
|
|
// Parent process
|
|
+#if defined(OS_LINUX)
|
|
+ if (options.new_user_namespace) {
|
|
+ // We need to write UID/GID mapping here to map the current user outside
|
|
+ // the namespace to the root user inside the namespace in order to
|
|
+ // correctly "fool" the child process.
|
|
+ char buf[256];
|
|
+ int map_fd, map_len;
|
|
+
|
|
+ snprintf(buf, sizeof(buf), "/proc/%d/uid_map", pid);
|
|
+ map_fd = open(buf, O_RDWR);
|
|
+ DPCHECK(map_fd >= 0);
|
|
+ snprintf(buf, sizeof(buf), "0 %d 1", geteuid());
|
|
+ map_len = strlen(buf);
|
|
+ if (write(map_fd, buf, map_len) != map_len) {
|
|
+ RAW_LOG(WARNING, "Can't write to uid_map.");
|
|
+ }
|
|
+ close(map_fd);
|
|
+
|
|
+ snprintf(buf, sizeof(buf), "/proc/%d/gid_map", pid);
|
|
+ map_fd = open(buf, O_RDWR);
|
|
+ DPCHECK(map_fd >= 0);
|
|
+ snprintf(buf, sizeof(buf), "0 %d 1", getegid());
|
|
+ map_len = strlen(buf);
|
|
+ if (write(map_fd, buf, map_len) != map_len) {
|
|
+ RAW_LOG(WARNING, "Can't write to gid_map.");
|
|
+ }
|
|
+ close(map_fd);
|
|
+
|
|
+ // Close the pipe on the parent, so the child can continue doing the
|
|
+ // execvp() call.
|
|
+ close(map_pipe_fd[1]);
|
|
+ }
|
|
+#endif
|
|
+
|
|
if (options.wait) {
|
|
// While this isn't strictly disk IO, waiting for another process to
|
|
// finish is the sort of thing ThreadRestrictions is trying to prevent.
|
|
diff --git a/content/browser/zygote_host/zygote_host_impl_linux.cc b/content/browser/zygote_host/zygote_host_impl_linux.cc
|
|
index ba7884f8..2a674a0 100644
|
|
--- a/content/browser/zygote_host/zygote_host_impl_linux.cc
|
|
+++ b/content/browser/zygote_host/zygote_host_impl_linux.cc
|
|
@@ -117,6 +117,9 @@ void ZygoteHostImpl::Init(const std::string& sandbox_cmd) {
|
|
|
|
sandbox_binary_ = sandbox_cmd.c_str();
|
|
|
|
+ bool userns_sandbox = false;
|
|
+ const std::vector<std::string> cmd_line_unwrapped(cmd_line.argv());
|
|
+
|
|
if (!sandbox_cmd.empty()) {
|
|
struct stat st;
|
|
if (stat(sandbox_binary_.c_str(), &st) != 0) {
|
|
@@ -124,16 +127,21 @@ void ZygoteHostImpl::Init(const std::string& sandbox_cmd) {
|
|
<< sandbox_binary_ << " Aborting now.";
|
|
}
|
|
|
|
- if (access(sandbox_binary_.c_str(), X_OK) == 0 &&
|
|
- (st.st_uid == 0) &&
|
|
- (st.st_mode & S_ISUID) &&
|
|
- (st.st_mode & S_IXOTH)) {
|
|
+ if (access(sandbox_binary_.c_str(), X_OK) == 0) {
|
|
using_suid_sandbox_ = true;
|
|
+
|
|
cmd_line.PrependWrapper(sandbox_binary_);
|
|
|
|
scoped_ptr<sandbox::SetuidSandboxClient>
|
|
sandbox_client(sandbox::SetuidSandboxClient::Create());
|
|
sandbox_client->SetupLaunchEnvironment();
|
|
+
|
|
+ if (!((st.st_uid == 0) &&
|
|
+ (st.st_mode & S_ISUID) &&
|
|
+ (st.st_mode & S_IXOTH))) {
|
|
+ userns_sandbox = true;
|
|
+ sandbox_client->SetNoSuid();
|
|
+ }
|
|
} else {
|
|
LOG(FATAL) << "The SUID sandbox helper binary was found, but is not "
|
|
"configured correctly. Rather than run without sandboxing "
|
|
@@ -161,7 +169,19 @@ void ZygoteHostImpl::Init(const std::string& sandbox_cmd) {
|
|
base::ProcessHandle process = -1;
|
|
base::LaunchOptions options;
|
|
options.fds_to_remap = &fds_to_map;
|
|
+ if (userns_sandbox)
|
|
+ options.new_user_namespace = true;
|
|
base::LaunchProcess(cmd_line.argv(), options, &process);
|
|
+
|
|
+ if (process == -1 && userns_sandbox) {
|
|
+ LOG(ERROR) << "User namespace sandbox failed to start, running without "
|
|
+ << "sandbox! You need at least kernel 3.8.0 with CONFIG_USER_NS "
|
|
+ << "enabled in order to use the sandbox without setuid bit.";
|
|
+ using_suid_sandbox_ = false;
|
|
+ options.new_user_namespace = false;
|
|
+ base::LaunchProcess(cmd_line_unwrapped, options, &process);
|
|
+ }
|
|
+
|
|
CHECK(process != -1) << "Failed to launch zygote process";
|
|
|
|
if (using_suid_sandbox_) {
|
|
diff --git a/content/zygote/zygote_main_linux.cc b/content/zygote/zygote_main_linux.cc
|
|
index ca75518..d906411 100644
|
|
--- a/content/zygote/zygote_main_linux.cc
|
|
+++ b/content/zygote/zygote_main_linux.cc
|
|
@@ -402,6 +402,13 @@ static bool EnterSandbox(sandbox::SetuidSandboxClient* setuid_sandbox,
|
|
*has_started_new_init = true;
|
|
}
|
|
|
|
+ // Don't set non-dumpable, as it causes trouble when the host tries to find
|
|
+ // the zygote process (XXX: Not quite sure why this happens with user
|
|
+ // namespaces). Fortunately, we also have the seccomp filter sandbox which
|
|
+ // should disallow the use of ptrace.
|
|
+ if (setuid_sandbox->IsNoSuid())
|
|
+ return true;
|
|
+
|
|
#if !defined(OS_OPENBSD)
|
|
// Previously, we required that the binary be non-readable. This causes the
|
|
// kernel to mark the process as non-dumpable at startup. The thinking was
|
|
diff --git a/sandbox/linux/suid/client/setuid_sandbox_client.cc b/sandbox/linux/suid/client/setuid_sandbox_client.cc
|
|
index 7a174ef..633401e 100644
|
|
--- a/sandbox/linux/suid/client/setuid_sandbox_client.cc
|
|
+++ b/sandbox/linux/suid/client/setuid_sandbox_client.cc
|
|
@@ -166,6 +166,10 @@ bool SetuidSandboxClient::IsInNewNETNamespace() const {
|
|
return env_->HasVar(kSandboxNETNSEnvironmentVarName);
|
|
}
|
|
|
|
+bool SetuidSandboxClient::IsNoSuid() const {
|
|
+ return env_->HasVar(kSandboxNoSuidVarName);
|
|
+}
|
|
+
|
|
bool SetuidSandboxClient::IsSandboxed() const {
|
|
return sandboxed_;
|
|
}
|
|
@@ -175,5 +179,9 @@ void SetuidSandboxClient::SetupLaunchEnvironment() {
|
|
SetSandboxAPIEnvironmentVariable(env_);
|
|
}
|
|
|
|
+void SetuidSandboxClient::SetNoSuid() {
|
|
+ env_->SetVar(kSandboxNoSuidVarName, "1");
|
|
+}
|
|
+
|
|
} // namespace sandbox
|
|
|
|
diff --git a/sandbox/linux/suid/client/setuid_sandbox_client.h b/sandbox/linux/suid/client/setuid_sandbox_client.h
|
|
index a9f6536..2e8113a 100644
|
|
--- a/sandbox/linux/suid/client/setuid_sandbox_client.h
|
|
+++ b/sandbox/linux/suid/client/setuid_sandbox_client.h
|
|
@@ -39,6 +39,8 @@ class SetuidSandboxClient {
|
|
bool IsInNewPIDNamespace() const;
|
|
// Did the setuid helper create a new network namespace ?
|
|
bool IsInNewNETNamespace() const;
|
|
+ // Is sandboxed without SUID binary ?
|
|
+ bool IsNoSuid() const;
|
|
// Are we done and fully sandboxed ?
|
|
bool IsSandboxed() const;
|
|
|
|
@@ -46,6 +48,8 @@ class SetuidSandboxClient {
|
|
// helper.
|
|
void SetupLaunchEnvironment();
|
|
|
|
+ void SetNoSuid();
|
|
+
|
|
private:
|
|
// Holds the environment. Will never be NULL.
|
|
base::Environment* env_;
|
|
diff --git a/sandbox/linux/suid/common/sandbox.h b/sandbox/linux/suid/common/sandbox.h
|
|
index aad4ff8..bd710d5 100644
|
|
--- a/sandbox/linux/suid/common/sandbox.h
|
|
+++ b/sandbox/linux/suid/common/sandbox.h
|
|
@@ -18,6 +18,7 @@ static const char kAdjustLowMemMarginSwitch[] = "--adjust-low-mem";
|
|
|
|
static const char kSandboxDescriptorEnvironmentVarName[] = "SBX_D";
|
|
static const char kSandboxHelperPidEnvironmentVarName[] = "SBX_HELPER_PID";
|
|
+static const char kSandboxNoSuidVarName[] = "SBX_NO_SUID";
|
|
|
|
static const long kSUIDSandboxApiNumber = 1;
|
|
static const char kSandboxEnvironmentApiRequest[] = "SBX_CHROME_API_RQ";
|
|
--
|
|
1.8.2.1
|
|
|