main: if unprivileged, reexec in a user namespace

If our CLI is invoked as an unprivileged user (uid != 0),
* create a namespace using our own UID and GID as "0" followed by the
  ranges matching our name and our primary group's name that we find in
  /etc/subuid and /etc/subgid (the latter by way of using newuidmap and
  newgidmap)
* re-exec ourselves inside of that user namespace, prepending global CLI arguments that:
  * override the driver from storage.conf with "vfs"
  * override the storage root from storage.conf with a "containers/storage" subdirectory
    of $XDG_DATA_HOME, or $HOME/.local/share.
  * override the storage runroot from storage.conf with either "$XDG_RUNTIME_DIR/run" or
    "/var/run/user/$uid/run"
  * set default ID mapping settings to map all of the ranges matching
    our name and our primary group's name that we found in /etc/subuid
    and /etc/subgid
  * can still be overridden using the command line

Add a "buildah unshare" CLI that will start an arbitrary command in the
first namespace, so that manual cleanup of locations used by the second
namespace will be possible.

Signed-off-by: Nalin Dahyabhai <nalin@redhat.com>

Closes: #823
Approved by: rhatdan
This commit is contained in:
Nalin Dahyabhai 2018-06-14 15:42:33 -04:00 committed by Atomic Bot
parent 1accccec61
commit 18063d19b6
15 changed files with 1019 additions and 12 deletions

View File

@ -17,7 +17,7 @@ LDFLAGS := -ldflags '-X main.gitCommit=${GIT_COMMIT} -X main.buildInfo=${BUILD_I
all: buildah imgtype docs
buildah: *.go imagebuildah/*.go bind/*.go cmd/buildah/*.go docker/*.go pkg/cli/*.go pkg/parse/*.go util/*.go
buildah: *.go imagebuildah/*.go bind/*.go cmd/buildah/*.go docker/*.go pkg/cli/*.go pkg/parse/*.go unshare/*.c unshare/*.go util/*.go
$(GO) build $(LDFLAGS) -o buildah $(BUILDFLAGS) ./cmd/buildah
imgtype: *.go docker/*.go util/*.go tests/imgtype/imgtype.go

View File

@ -81,6 +81,7 @@ func main() {
debug = true
logrus.SetLevel(logrus.DebugLevel)
}
maybeReexecUsingUserNamespace(c, false)
return nil
}
app.After = func(c *cli.Context) error {
@ -110,6 +111,7 @@ func main() {
runCommand,
tagCommand,
umountCommand,
unshareCommand,
versionCommand,
}
err := app.Run(os.Args)

234
cmd/buildah/unshare.go Normal file
View File

@ -0,0 +1,234 @@
// +build linux
package main
import (
"bytes"
"fmt"
"os"
"os/exec"
"os/user"
"runtime"
"strconv"
"syscall"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/pkg/errors"
"github.com/projectatomic/buildah/unshare"
"github.com/projectatomic/buildah/util"
"github.com/sirupsen/logrus"
"github.com/urfave/cli"
)
const (
// startedInUserNS is an environment variable that, if set, means that we shouldn't try
// to create and enter a new user namespace and then re-exec ourselves.
startedInUserNS = "_BUILDAH_STARTED_IN_USERNS"
)
var (
unshareDescription = "Runs a command in a modified user namespace"
unshareCommand = cli.Command{
Name: "unshare",
Usage: "Run a command in a modified user namespace",
Description: unshareDescription,
Action: unshareCmd,
ArgsUsage: "[COMMAND [ARGS [...]]]",
SkipArgReorder: true,
}
)
type runnable interface {
Run() error
}
func maybeReexecUsingUserNamespace(c *cli.Context, evenForRoot bool) {
// If we've already been through this once, no need to try again.
if os.Getenv(startedInUserNS) != "" {
return
}
// Figure out if we're already root, or "root", which is close enough,
// unless we've been explicitly told to do this even for root.
me, err := user.Current()
if err != nil {
logrus.Errorf("error determining current user: %v", err)
cli.OsExiter(1)
}
if me.Uid == "0" && !evenForRoot {
return
}
uidNum, err := strconv.ParseUint(me.Uid, 10, 32)
if err != nil {
logrus.Errorf("error parsing current UID %s: %v", me.Uid, err)
cli.OsExiter(1)
}
gidNum, err := strconv.ParseUint(me.Gid, 10, 32)
if err != nil {
logrus.Errorf("error parsing current GID %s: %v", me.Gid, err)
cli.OsExiter(1)
}
runtime.LockOSThread()
defer runtime.UnlockOSThread()
// Read the set of ID mappings that we're allowed to use. Each range
// in /etc/subuid and /etc/subgid file is a starting ID and a range size.
uidmap, gidmap, err := util.GetSubIDMappings(me.Username, me.Username)
if err != nil {
logrus.Errorf("error reading allowed ID mappings: %v", err)
cli.OsExiter(1)
}
if len(uidmap) == 0 {
logrus.Warnf("Found no UID ranges set aside for user %q in /etc/subuid.", me.Username)
}
if len(gidmap) == 0 {
logrus.Warnf("Found no GID ranges set aside for user %q in /etc/subgid.", me.Username)
}
// Build modified maps that map us to uid/gid 0, and maps every other
// range to itself. In a namespace that uses this map, the invoking
// user will appear to be root. This should let us create storage
// directories and access credentials under the invoking user's home
// directory.
uidmap2 := append([]specs.LinuxIDMapping{{HostID: uint32(uidNum), ContainerID: 0, Size: 1}}, uidmap...)
for i := range uidmap2[1:] {
uidmap2[i+1].ContainerID = uidmap2[i+1].HostID
}
gidmap2 := append([]specs.LinuxIDMapping{{HostID: uint32(gidNum), ContainerID: 0, Size: 1}}, gidmap...)
for i := range gidmap2[1:] {
gidmap2[i+1].ContainerID = gidmap2[i+1].HostID
}
// Map the uidmap and gidmap ranges, consecutively, starting at 0.
// When used to created a namespace inside of a namespace that uses the
// maps we've created above, they'll produce mappings which don't map
// in the invoking user. This is more suitable for running commands in
// containers, so we'll want to use it as a default for any containers
// that we create.
umap := new(bytes.Buffer)
for i := range uidmap {
if i > 0 {
fmt.Fprintf(umap, ",")
}
fmt.Fprintf(umap, "%d:%d:%d", uidmap[i].ContainerID, uidmap[i].HostID, uidmap[i].Size)
}
gmap := new(bytes.Buffer)
for i := range gidmap {
if i > 0 {
fmt.Fprintf(gmap, ",")
}
fmt.Fprintf(gmap, "%d:%d:%d", gidmap[i].ContainerID, gidmap[i].HostID, gidmap[i].Size)
}
// Add args to change the global defaults.
defaultStorageDriver := "vfs"
defaultRoot, err := util.UnsharedRootPath(me.HomeDir)
if err != nil {
logrus.Errorf("%v", err)
cli.OsExiter(1)
}
defaultRunroot, err := util.UnsharedRunrootPath(me.Uid)
if err != nil {
logrus.Errorf("%v", err)
cli.OsExiter(1)
}
var moreArgs []string
if !c.GlobalIsSet("storage-driver") || !c.GlobalIsSet("root") || !c.GlobalIsSet("runroot") || (!c.GlobalIsSet("userns-uid-map") && !c.GlobalIsSet("userns-gid-map")) {
logrus.Infof("Running without privileges, assuming arguments:")
if !c.GlobalIsSet("storage-driver") {
logrus.Infof(" --storage-driver %q", defaultStorageDriver)
moreArgs = append(moreArgs, "--storage-driver", defaultStorageDriver)
}
if !c.GlobalIsSet("root") {
logrus.Infof(" --root %q", defaultRoot)
moreArgs = append(moreArgs, "--root", defaultRoot)
}
if !c.GlobalIsSet("runroot") {
logrus.Infof(" --runroot %q", defaultRunroot)
moreArgs = append(moreArgs, "--runroot", defaultRunroot)
}
if !c.GlobalIsSet("userns-uid-map") && !c.GlobalIsSet("userns-gid-map") && umap.Len() > 0 && gmap.Len() > 0 {
logrus.Infof(" --userns-uid-map %q --userns-gid-map %q", umap.String(), gmap.String())
moreArgs = append(moreArgs, "--userns-uid-map", umap.String(), "--userns-gid-map", gmap.String())
}
}
// Unlike most uses of reexec or unshare, we're using a name that
// _won't_ be recognized as a registered reexec handler, since we
// _want_ to fall through reexec.Init() to the normal main().
cmd := unshare.Command(append(append([]string{"buildah-unprivileged"}, moreArgs...), os.Args[1:]...)...)
// If, somehow, we don't become UID 0 in our child, indicate that the child shouldn't try again.
if err = os.Setenv(startedInUserNS, "1"); err != nil {
logrus.Errorf("error setting %s=1 in environment: %v", startedInUserNS, err)
os.Exit(1)
}
// Reuse our stdio.
cmd.Stdin = os.Stdin
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
// Set up a new user namespace with the ID mapping.
cmd.UnshareFlags = syscall.CLONE_NEWUSER
cmd.UseNewuidmap = true
cmd.UidMappings = uidmap2
cmd.UseNewgidmap = true
cmd.GidMappings = gidmap2
cmd.GidMappingsEnableSetgroups = true
// Finish up.
logrus.Debugf("running %+v with environment %+v, UID map %+v, and GID map %+v", cmd.Cmd.Args, os.Environ(), cmd.UidMappings, cmd.GidMappings)
execRunnable(cmd)
}
// execRunnable runs the specified unshare command, captures its exit status,
// and exits with the same status.
func execRunnable(cmd runnable) {
if err := cmd.Run(); err != nil {
if exitError, ok := errors.Cause(err).(*exec.ExitError); ok {
if exitError.ProcessState.Exited() {
if waitStatus, ok := exitError.ProcessState.Sys().(syscall.WaitStatus); ok {
if waitStatus.Exited() {
logrus.Errorf("%v", exitError)
os.Exit(waitStatus.ExitStatus())
}
if waitStatus.Signaled() {
logrus.Errorf("%v", exitError)
os.Exit(int(waitStatus.Signal()) + 128)
}
}
}
}
logrus.Errorf("%v", err)
logrus.Errorf("(unable to determine exit status)")
os.Exit(1)
}
os.Exit(0)
}
// unshareCmd execs whatever using the ID mappings that we want to use for ourselves
func unshareCmd(c *cli.Context) error {
// force reexec using the configured ID mappings
maybeReexecUsingUserNamespace(c, true)
// exec the specified command, if there is one
args := c.Args()
if len(args) < 1 {
// try to exec the shell, if one's set
shell, shellSet := os.LookupEnv("SHELL")
if !shellSet {
logrus.Errorf("no command specified")
os.Exit(1)
}
args = []string{shell}
}
cmd := exec.Command(args[0], args[1:]...)
cmd.Env = append(os.Environ(), "USER=root", "USERNAME=root", "GROUP=root", "LOGNAME=root", "UID=0", "GID=0")
cmd.Stdin = os.Stdin
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
execRunnable(cmd)
os.Exit(1)
return nil
}

View File

@ -0,0 +1,16 @@
// +build !linux
package main
var (
unshareCommand = cli.Command{
Name: "unshare",
Hidden: true,
Action: func(c *cli.Context) error { return nil },
SkipArgReorder: true,
}
)
func maybeReexecUsingUserNamespace(c *cli.Context, evenForRoot bool) {
return
}

View File

@ -753,6 +753,16 @@ return 1
esac
}
_buildah_unshare() {
local boolean_options="
--help
-h
"
local options_with_args="
"
}
_buildah_version() {
local boolean_options="
--help
@ -761,7 +771,7 @@ return 1
local options_with_args="
"
}
}
_buildah() {
local previous_extglob_setting=$(shopt -p extglob)
@ -787,6 +797,7 @@ return 1
tag
umount
unmount
unshare
version
)

View File

@ -27,7 +27,7 @@
Name: buildah
# Bump version in buildah.go too
Version: 1.2
Release: 1.git%{shortcommit}%{?dist}
Release: 2.git%{shortcommit}%{?dist}
Summary: A command line tool used to creating OCI Images
License: ASL 2.0
URL: https://%{provider_prefix}
@ -49,6 +49,7 @@ BuildRequires: make
Requires: runc >= 1.0.0-6
Requires: container-selinux
Requires: skopeo-containers
Requires: shadow-utils
Provides: %{repo} = %{version}-%{release}
%description

32
docs/buildah-unshare.md Normal file
View File

@ -0,0 +1,32 @@
# buildah-unshare "19" "June 2018" "buildah"
## NAME
buildah\-unshare - Run a command inside of a modified user namespace.
## SYNOPSIS
**buildah** **unshare** [*options* [...] --] [**command**]
## DESCRIPTION
Launches a process (by default, *$SHELL*) in a new user namespace. The user
namespace is configured so that the invoking user's UID and primary GID appear
to be UID 0 and GID 0, respectively. Any ranges which match that user and
group in /etc/subuid and /etc/subgid are also mapped in as themselves with the
help of the *newuidmap(1)* and *newgidmap(1)* helpers.
This is mainly useful for troubleshooting unprivileged operations and for
manually clearing storage and other data related to images and containers.
## EXAMPLE
buildah unshare id
buildah unshare pwd
buildah unshare cat /proc/self/uid\_map
buildah unshare cat /proc/self/gid\_map
buildah unshare rm -fr $HOME/.local/share/containers/storage /var/run/user/\`id -u\`/run
## SEE ALSO
buildah(1), namespaces(7), newuidmap(1), newgidmap(1), user\_namespaces(7)

View File

@ -46,23 +46,23 @@ be used, as the default behavior of using the system-wide configuration
**--root** **value**
Storage root dir (default: "/var/lib/containers/storage")
Storage root dir (default: "/var/lib/containers/storage" for UID 0, "$HOME/.local/share/containers/storage" for other users)
Default root dir is configured in /etc/containers/storage.conf
**--runroot** **value**
Storage state dir (default: "/var/run/containers/storage")
Storage state dir (default: "/var/run/containers/storage" for UID 0, "/var/run/user/$UID/run" for other users)
Default state dir is configured in /etc/containers/storage.conf
**--storage-driver** **value**
Storage driver. Default Storage driver is configured in /etc/containers/storage.conf. Overriding
this option, will drop the storage-opt definitions was well from the storage.conf file. User must
Storage driver. The default storage driver for UID 0 is configured in /etc/containers/storage.conf, and is *vfs* for other users.
Overriding this option will cause the *storage-opt* settings in /etc/containers/storage.conf to be ignored. The user must
specify additional options via the `--storage-opt` flag.
**--storage-opt** **value**
Storage driver option, Default Storage driver options are configured in /etc/containers/storage.conf
Storage driver option, Default storage driver options are configured in /etc/containers/storage.conf
**--userns-uid-map** *mapping*
@ -107,8 +107,9 @@ Print the version
| buildah-run(1) | Run a command inside of the container. |
| buildah-tag(1) | Add an additional name to a local image. |
| buildah-umount(1) | Unmount a working container's root file system. |
| buildah-version(1) | Display the Buildah Version Information |
| storage.conf(5) | Syntax of Container Storage configuration file |
| buildah-unshare(1) | Launch a command in a user namespace with modified ID mappings. |
| buildah-version(1) | Display the Buildah Version Information |
| storage.conf(5) | Syntax of Container Storage configuration file |
## Files
@ -134,7 +135,7 @@ Print the version
registries.conf is the configuration file which specifies which container registries should be consulted when completing image names which do not include a registry or domain portion.
## SEE ALSO
podman(1), mounts.conf(5), registries.conf(5), storage.conf(5)
podman(1), mounts.conf(5), newuidmap(1), newgidmap(1), registries.conf(5), storage.conf(5)
## HISTORY
December 2017, Originally compiled by Tom Sweeney <tsweeney@redhat.com>

110
unshare/unshare.c Normal file
View File

@ -0,0 +1,110 @@
#define _GNU_SOURCE
#include <sys/types.h>
#include <sys/ioctl.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <grp.h>
#include <sched.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <termios.h>
#include <unistd.h>
static int _buildah_unshare_parse_envint(const char *envname) {
char *p, *q;
long l;
p = getenv(envname);
if (p == NULL) {
return -1;
}
q = NULL;
l = strtol(p, &q, 10);
if ((q == NULL) || (*q != '\0')) {
fprintf(stderr, "Error parsing \"%s\"=\"%s\"!\n", envname, p);
_exit(1);
}
unsetenv(envname);
return l;
}
void _buildah_unshare(void)
{
int flags, pidfd, continuefd, n, pgrp, sid, ctty, allow_setgroups;
char buf[2048];
flags = _buildah_unshare_parse_envint("_Buildah-unshare");
if (flags == -1) {
return;
}
if ((flags & CLONE_NEWUSER) != 0) {
if (unshare(CLONE_NEWUSER) == -1) {
fprintf(stderr, "Error during unshare(CLONE_NEWUSER): %m\n");
_exit(1);
}
}
pidfd = _buildah_unshare_parse_envint("_Buildah-pid-pipe");
if (pidfd != -1) {
snprintf(buf, sizeof(buf), "%llu", (unsigned long long) getpid());
if (write(pidfd, buf, strlen(buf)) != strlen(buf)) {
fprintf(stderr, "Error writing PID to pipe on fd %d: %m\n", pidfd);
_exit(1);
}
close(pidfd);
}
continuefd = _buildah_unshare_parse_envint("_Buildah-continue-pipe");
if (continuefd != -1) {
n = read(continuefd, buf, sizeof(buf));
if (n > 0) {
fprintf(stderr, "Error: %.*s\n", n, buf);
_exit(1);
}
close(continuefd);
}
sid = _buildah_unshare_parse_envint("_Buildah-setsid");
if (sid == 1) {
if (setsid() == -1) {
fprintf(stderr, "Error during setsid: %m\n");
_exit(1);
}
}
pgrp = _buildah_unshare_parse_envint("_Buildah-setpgrp");
if (pgrp == 1) {
if (setpgrp() == -1) {
fprintf(stderr, "Error during setpgrp: %m\n");
_exit(1);
}
}
ctty = _buildah_unshare_parse_envint("_Buildah-ctty");
if (ctty != -1) {
if (ioctl(ctty, TIOCSCTTY, 0) == -1) {
fprintf(stderr, "Error while setting controlling terminal to %d: %m\n", ctty);
_exit(1);
}
}
allow_setgroups = _buildah_unshare_parse_envint("_Buildah-allow-setgroups");
if ((flags & CLONE_NEWUSER) != 0) {
if (allow_setgroups == 1) {
if (setgroups(0, NULL) != 0) {
fprintf(stderr, "Error during setgroups(0, NULL): %m\n");
_exit(1);
}
}
if (setresgid(0, 0, 0) != 0) {
fprintf(stderr, "Error during setresgid(0): %m\n");
_exit(1);
}
if (setresuid(0, 0, 0) != 0) {
fprintf(stderr, "Error during setresuid(0): %m\n");
_exit(1);
}
}
if ((flags & ~CLONE_NEWUSER) != 0) {
if (unshare(flags & ~CLONE_NEWUSER) == -1) {
fprintf(stderr, "Error during unshare(...): %m\n");
_exit(1);
}
}
return;
}

273
unshare/unshare.go Normal file
View File

@ -0,0 +1,273 @@
// +build linux
package unshare
import (
"bytes"
"fmt"
"io"
"os"
"os/exec"
"runtime"
"strconv"
"strings"
"syscall"
"github.com/containers/storage/pkg/reexec"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/pkg/errors"
"github.com/projectatomic/buildah/util"
)
// Cmd wraps an exec.Cmd created by the reexec package in unshare(), and
// handles setting ID maps and other related settings by triggering
// initialization code in the child.
type Cmd struct {
*exec.Cmd
UnshareFlags int
UseNewuidmap bool
UidMappings []specs.LinuxIDMapping
UseNewgidmap bool
GidMappings []specs.LinuxIDMapping
GidMappingsEnableSetgroups bool
Setsid bool
Setpgrp bool
Ctty *os.File
OOMScoreAdj int
Hook func(pid int) error
}
// Command creates a new Cmd which can be customized.
func Command(args ...string) *Cmd {
cmd := reexec.Command(args...)
return &Cmd{
Cmd: cmd,
}
}
func (c *Cmd) Start() error {
runtime.LockOSThread()
defer runtime.UnlockOSThread()
// Set an environment variable to tell the child to synchronize its startup.
if c.Env == nil {
c.Env = os.Environ()
}
c.Env = append(c.Env, fmt.Sprintf("_Buildah-unshare=%d", c.UnshareFlags))
// Create the pipe for reading the child's PID.
pidRead, pidWrite, err := os.Pipe()
if err != nil {
return errors.Wrapf(err, "error creating pid pipe")
}
c.Env = append(c.Env, fmt.Sprintf("_Buildah-pid-pipe=%d", len(c.ExtraFiles)+3))
c.ExtraFiles = append(c.ExtraFiles, pidWrite)
// Create the pipe for letting the child know to proceed.
continueRead, continueWrite, err := os.Pipe()
if err != nil {
pidRead.Close()
pidWrite.Close()
return errors.Wrapf(err, "error creating pid pipe")
}
c.Env = append(c.Env, fmt.Sprintf("_Buildah-continue-pipe=%d", len(c.ExtraFiles)+3))
c.ExtraFiles = append(c.ExtraFiles, continueRead)
// Pass along other instructions.
if c.Setsid {
c.Env = append(c.Env, "_Buildah-setsid=1")
}
if c.Setpgrp {
c.Env = append(c.Env, "_Buildah-setpgrp=1")
}
if c.Ctty != nil {
c.Env = append(c.Env, fmt.Sprintf("_Buildah-ctty=%d", len(c.ExtraFiles)+3))
c.ExtraFiles = append(c.ExtraFiles, c.Ctty)
}
if c.GidMappingsEnableSetgroups {
c.Env = append(c.Env, "_Buildah-allow-setgroups=1")
} else {
c.Env = append(c.Env, "_Buildah-allow-setgroups=0")
}
// Make sure we clean up our pipes.
defer func() {
if pidRead != nil {
pidRead.Close()
}
if pidWrite != nil {
pidWrite.Close()
}
if continueRead != nil {
continueRead.Close()
}
if continueWrite != nil {
continueWrite.Close()
}
}()
// Start the new process.
err = c.Cmd.Start()
if err != nil {
return err
}
// Close the ends of the pipes that the parent doesn't need.
continueRead.Close()
continueRead = nil
pidWrite.Close()
pidWrite = nil
// Read the child's PID from the pipe.
pidString := ""
b := new(bytes.Buffer)
io.Copy(b, pidRead)
pidString = b.String()
pid, err := strconv.Atoi(pidString)
if err != nil {
fmt.Fprintf(continueWrite, "error parsing PID %q: %v", pidString, err)
return errors.Wrapf(err, "error parsing PID %q", pidString)
}
pidString = fmt.Sprintf("%d", pid)
// If we created a new user namespace, set any specified mappings.
if c.UnshareFlags&syscall.CLONE_NEWUSER != 0 {
// Always set "setgroups".
setgroups, err := os.OpenFile(fmt.Sprintf("/proc/%s/setgroups", pidString), os.O_TRUNC|os.O_WRONLY, 0)
if err != nil {
fmt.Fprintf(continueWrite, "error opening setgroups: %v", err)
return errors.Wrapf(err, "error opening /proc/%s/setgroups", pidString)
}
defer setgroups.Close()
if c.GidMappingsEnableSetgroups {
if _, err := fmt.Fprintf(setgroups, "allow"); err != nil {
fmt.Fprintf(continueWrite, "error writing \"allow\" to setgroups: %v", err)
return errors.Wrapf(err, "error opening \"allow\" to /proc/%s/setgroups", pidString)
}
} else {
if _, err := fmt.Fprintf(setgroups, "deny"); err != nil {
fmt.Fprintf(continueWrite, "error writing \"deny\" to setgroups: %v", err)
return errors.Wrapf(err, "error writing \"deny\" to /proc/%s/setgroups", pidString)
}
}
if len(c.UidMappings) == 0 || len(c.GidMappings) == 0 {
uidmap, gidmap, err := util.GetHostIDMappings("")
if err != nil {
fmt.Fprintf(continueWrite, "error reading ID mappings in parent: %v", err)
return errors.Wrapf(err, "error reading ID mappings in parent")
}
if len(c.UidMappings) == 0 {
c.UidMappings = uidmap
for i := range c.UidMappings {
c.UidMappings[i].HostID = c.UidMappings[i].ContainerID
}
}
if len(c.GidMappings) == 0 {
c.GidMappings = gidmap
for i := range c.GidMappings {
c.GidMappings[i].HostID = c.GidMappings[i].ContainerID
}
}
}
if len(c.GidMappings) > 0 {
// Build the GID map, since writing to the proc file has to be done all at once.
g := new(bytes.Buffer)
for _, m := range c.GidMappings {
fmt.Fprintf(g, "%d %d %d\n", m.ContainerID, m.HostID, m.Size)
}
// Set the GID map.
if c.UseNewgidmap {
cmd := exec.Command("newgidmap", append([]string{pidString}, strings.Fields(strings.Replace(g.String(), "\n", " ", -1))...)...)
g.Reset()
cmd.Stdout = g
cmd.Stderr = g
err := cmd.Run()
if err != nil {
fmt.Fprintf(continueWrite, "error running newgidmap: %v: %s", err, g.String())
return errors.Wrapf(err, "error running newgidmap: %s", g.String())
}
} else {
gidmap, err := os.OpenFile(fmt.Sprintf("/proc/%s/gid_map", pidString), os.O_TRUNC|os.O_WRONLY, 0)
if err != nil {
fmt.Fprintf(continueWrite, "error opening /proc/%s/gid_map: %v", pidString, err)
return errors.Wrapf(err, "error opening /proc/%s/gid_map", pidString)
}
defer gidmap.Close()
if _, err := fmt.Fprintf(gidmap, "%s", g.String()); err != nil {
fmt.Fprintf(continueWrite, "error writing /proc/%s/gid_map: %v", pidString, err)
return errors.Wrapf(err, "error writing /proc/%s/gid_map", pidString)
}
}
}
if len(c.UidMappings) > 0 {
// Build the UID map, since writing to the proc file has to be done all at once.
u := new(bytes.Buffer)
for _, m := range c.UidMappings {
fmt.Fprintf(u, "%d %d %d\n", m.ContainerID, m.HostID, m.Size)
}
// Set the GID map.
if c.UseNewuidmap {
cmd := exec.Command("newuidmap", append([]string{pidString}, strings.Fields(strings.Replace(u.String(), "\n", " ", -1))...)...)
u.Reset()
cmd.Stdout = u
cmd.Stderr = u
err := cmd.Run()
if err != nil {
fmt.Fprintf(continueWrite, "error running newuidmap: %v: %s", err, u.String())
return errors.Wrapf(err, "error running newuidmap: %s", u.String())
}
} else {
uidmap, err := os.OpenFile(fmt.Sprintf("/proc/%s/uid_map", pidString), os.O_TRUNC|os.O_WRONLY, 0)
if err != nil {
fmt.Fprintf(continueWrite, "error opening /proc/%s/uid_map: %v", pidString, err)
return errors.Wrapf(err, "error opening /proc/%s/uid_map", pidString)
}
defer uidmap.Close()
if _, err := fmt.Fprintf(uidmap, "%s", u.String()); err != nil {
fmt.Fprintf(continueWrite, "error writing /proc/%s/uid_map: %v", pidString, err)
return errors.Wrapf(err, "error writing /proc/%s/uid_map", pidString)
}
}
}
}
// Adjust the process's OOM score.
oomScoreAdj, err := os.OpenFile(fmt.Sprintf("/proc/%s/oom_score_adj", pidString), os.O_TRUNC|os.O_WRONLY, 0)
if err != nil {
fmt.Fprintf(continueWrite, "error opening oom_score_adj: %v", err)
return errors.Wrapf(err, "error opening /proc/%s/oom_score_adj", pidString)
}
if _, err := fmt.Fprintf(oomScoreAdj, "%d\n", c.OOMScoreAdj); err != nil {
fmt.Fprintf(continueWrite, "error writing \"%d\" to oom_score_adj: %v", c.OOMScoreAdj, err)
return errors.Wrapf(err, "error writing \"%d\" to /proc/%s/oom_score_adj", c.OOMScoreAdj)
}
defer oomScoreAdj.Close()
// Run any additional setup that we want to do before the child starts running proper.
if c.Hook != nil {
if err = c.Hook(pid); err != nil {
fmt.Fprintf(continueWrite, "hook error: %v", err)
return err
}
}
return nil
}
func (c *Cmd) Run() error {
if err := c.Start(); err != nil {
return err
}
return c.Wait()
}
func (c *Cmd) CombinedOutput() ([]byte, error) {
return nil, errors.New("unshare: CombinedOutput() not implemented")
}
func (c *Cmd) Output() ([]byte, error) {
return nil, errors.New("unshare: Output() not implemented")
}

10
unshare/unshare_cgo.go Normal file
View File

@ -0,0 +1,10 @@
// +build linux,cgo,!gccgo
package unshare
// #cgo CFLAGS: -Wall
// extern void _buildah_unshare(void);
// void __attribute__((constructor)) init(void) {
// _buildah_unshare();
// }
import "C"

25
unshare/unshare_gccgo.go Normal file
View File

@ -0,0 +1,25 @@
// +build linux,cgo,gccgo
package unshare
// #cgo CFLAGS: -Wall -Wextra
// extern void _buildah_unshare(void);
// void __attribute__((constructor)) init(void) {
// _buildah_unshare();
// }
import "C"
// This next bit is straight out of libcontainer.
// AlwaysFalse is here to stay false
// (and be exported so the compiler doesn't optimize out its reference)
var AlwaysFalse bool
func init() {
if AlwaysFalse {
// by referencing this C init() in a noop test, it will ensure the compiler
// links in the C function.
// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=65134
C.init()
}
}

255
unshare/unshare_test.go Normal file
View File

@ -0,0 +1,255 @@
// +build linux
package unshare
import (
"bytes"
"encoding/json"
"io/ioutil"
"os"
"strconv"
"strings"
"syscall"
"testing"
"github.com/containers/storage/pkg/reexec"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/projectatomic/buildah/util"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
)
func TestMain(m *testing.M) {
if reexec.Init() {
return
}
os.Exit(m.Run())
}
func init() {
reexec.Register("report", report)
}
var (
CloneFlags = map[string]int{
"ipc": syscall.CLONE_NEWIPC,
"net": syscall.CLONE_NEWNET,
"mnt": syscall.CLONE_NEWNS,
"user": syscall.CLONE_NEWUSER,
"uts": syscall.CLONE_NEWUTS,
}
)
type Report struct {
Namespaces map[string]string
UIDMappings []specs.LinuxIDMapping
GIDMappings []specs.LinuxIDMapping
Pgrp int
Sid int
OOMScoreAdj int
}
func report() {
var report Report
report.Namespaces = make(map[string]string)
for name := range CloneFlags {
linkTarget, err := os.Readlink("/proc/self/ns/" + name)
if err != nil {
logrus.Errorf("error reading link /proc/self/ns/%s: %v", name, err)
os.Exit(1)
}
report.Namespaces[name] = linkTarget
}
report.Pgrp = syscall.Getpgrp()
sid, err := unix.Getsid(unix.Getpid())
if err != nil {
logrus.Errorf("error reading current session ID: %v", err)
os.Exit(1)
}
report.Sid = sid
oomBytes, err := ioutil.ReadFile("/proc/self/oom_score_adj")
if err != nil {
logrus.Errorf("error reading current oom_score_adj: %v", err)
os.Exit(1)
}
oomFields := strings.Fields(string(oomBytes))
if len(oomFields) != 1 {
logrus.Errorf("error parsing current oom_score_adj %q: wrong number of fields", string(oomBytes))
os.Exit(1)
}
oom, err := strconv.Atoi(oomFields[0])
if err != nil {
logrus.Errorf("error parsing current oom_score_adj %q: %v", oomFields[0], err)
os.Exit(1)
}
report.OOMScoreAdj = oom
uidmap, gidmap, err := util.GetHostIDMappings("")
if err != nil {
logrus.Errorf("error reading current ID mappings: %v", err)
os.Exit(1)
}
for _, m := range uidmap {
report.UIDMappings = append(report.UIDMappings, m)
}
for _, m := range gidmap {
report.GIDMappings = append(report.GIDMappings, m)
}
json.NewEncoder(os.Stdout).Encode(report)
}
func TestUnshareNamespaces(t *testing.T) {
for name, flag := range CloneFlags {
var report Report
buf := new(bytes.Buffer)
cmd := Command("report")
cmd.UnshareFlags = syscall.CLONE_NEWUSER | flag
cmd.UidMappings = []specs.LinuxIDMapping{{HostID: uint32(syscall.Getuid()), ContainerID: 0, Size: 1}}
cmd.GidMappings = []specs.LinuxIDMapping{{HostID: uint32(syscall.Getgid()), ContainerID: 0, Size: 1}}
cmd.Stdout = buf
cmd.Stderr = buf
err := cmd.Run()
if err != nil {
t.Fatalf("run %q: %v: %s", name, err, buf.String())
break
}
if err = json.Unmarshal(buf.Bytes(), &report); err != nil {
t.Fatalf("error parsing results: %v", err)
break
}
for ns := range CloneFlags {
linkTarget, err := os.Readlink("/proc/self/ns/" + ns)
if err != nil {
t.Fatalf("error reading link /proc/self/ns/%s: %v", ns, err)
os.Exit(1)
}
if ns == name || ns == "user" { // we always create a new user namespace
if report.Namespaces[ns] == linkTarget {
t.Fatalf("child is still in our %q namespace", name)
os.Exit(1)
}
} else {
if report.Namespaces[ns] != linkTarget {
t.Fatalf("child is not in our %q namespace", name)
os.Exit(1)
}
}
}
}
}
func TestUnsharePgrp(t *testing.T) {
for _, same := range []bool{false, true} {
var report Report
buf := new(bytes.Buffer)
cmd := Command("report")
cmd.Setpgrp = !same
cmd.Stdout = buf
cmd.Stderr = buf
err := cmd.Run()
if err != nil {
t.Fatalf("run: %v: %s", err, buf.String())
break
}
if err = json.Unmarshal(buf.Bytes(), &report); err != nil {
t.Fatalf("error parsing results: %v", err)
break
}
if (report.Pgrp == syscall.Getpgrp()) != same {
t.Fatalf("expected %d == %d to be %v", report.Pgrp, syscall.Getpgrp(), same)
}
}
}
func TestUnshareSid(t *testing.T) {
sid, err := unix.Getsid(unix.Getpid())
if err != nil {
t.Fatalf("error reading current session ID: %v", err)
}
for _, same := range []bool{false, true} {
var report Report
buf := new(bytes.Buffer)
cmd := Command("report")
cmd.Setsid = !same
cmd.Stdout = buf
cmd.Stderr = buf
err := cmd.Run()
if err != nil {
t.Fatalf("run: %v: %s", err, buf.String())
break
}
if err = json.Unmarshal(buf.Bytes(), &report); err != nil {
t.Fatalf("error parsing results: %v", err)
break
}
if (report.Sid == sid) != same {
t.Fatalf("expected %d == %d to be %v", report.Sid, sid, same)
}
}
}
func TestUnshareOOMScoreAdj(t *testing.T) {
for _, adj := range []int{0, 1, 2, 3} {
var report Report
buf := new(bytes.Buffer)
cmd := Command("report")
cmd.OOMScoreAdj = adj
cmd.Stdout = buf
cmd.Stderr = buf
err := cmd.Run()
if err != nil {
t.Fatalf("run: %v: %s", err, buf.String())
break
}
if err = json.Unmarshal(buf.Bytes(), &report); err != nil {
t.Fatalf("error parsing results: %v", err)
break
}
if report.OOMScoreAdj != adj {
t.Fatalf("saw oom_score_adj %d to be %v", adj, report.OOMScoreAdj)
}
}
}
func TestUnshareIDMappings(t *testing.T) {
var report Report
buf := new(bytes.Buffer)
cmd := Command("report")
cmd.UnshareFlags = syscall.CLONE_NEWUSER
cmd.UidMappings = []specs.LinuxIDMapping{{HostID: uint32(syscall.Getuid()), ContainerID: 0, Size: 1}}
cmd.GidMappings = []specs.LinuxIDMapping{{HostID: uint32(syscall.Getgid()), ContainerID: 0, Size: 1}}
cmd.Stdout = buf
cmd.Stderr = buf
err := cmd.Run()
if err != nil {
t.Fatalf("run: %v: %s", err, buf.String())
}
if err = json.Unmarshal(buf.Bytes(), &report); err != nil {
t.Fatalf("error parsing results: %v", err)
}
if len(cmd.UidMappings) != len(report.UIDMappings) {
t.Fatalf("set %d UID mappings, read %d instead", len(cmd.UidMappings), len(report.UIDMappings))
}
for i := range cmd.UidMappings {
if cmd.UidMappings[i].ContainerID != report.UIDMappings[i].ContainerID ||
cmd.UidMappings[i].HostID != report.UIDMappings[i].HostID ||
cmd.UidMappings[i].Size != report.UIDMappings[i].Size {
t.Fatalf("uid map entry %#v != %#v", cmd.UidMappings[i], report.UIDMappings[i])
}
}
if len(cmd.GidMappings) != len(report.GIDMappings) {
t.Fatalf("set %d GID mappings, read %d instead", len(cmd.GidMappings), len(report.GIDMappings))
}
for i := range cmd.GidMappings {
if cmd.GidMappings[i].ContainerID != report.GIDMappings[i].ContainerID ||
cmd.GidMappings[i].HostID != report.GIDMappings[i].HostID ||
cmd.GidMappings[i].Size != report.GIDMappings[i].Size {
t.Fatalf("gid map entry %#v != %#v", cmd.GidMappings[i], report.GIDMappings[i])
}
}
}

View File

@ -0,0 +1 @@
package unshare

View File

@ -7,6 +7,7 @@ import (
"net/url"
"os"
"path"
"path/filepath"
"strconv"
"strings"
@ -330,7 +331,8 @@ func getHostIDMappings(path string) ([]specs.LinuxIDMapping, error) {
return mappings, nil
}
// GetHostIDMappings reads mappings for the current process from the kernel.
// GetHostIDMappings reads mappings for the specified process (or the current
// process if pid is "self" or an empty string) from the kernel.
func GetHostIDMappings(pid string) ([]specs.LinuxIDMapping, []specs.LinuxIDMapping, error) {
if pid == "" {
pid = "self"
@ -428,3 +430,37 @@ func ParseIDMappings(uidmap, gidmap []string) ([]idtools.IDMap, []idtools.IDMap,
}
return uid, gid, nil
}
// UnsharedRootPath returns a location under ($XDG_DATA_HOME/containers/storage,
// or $HOME/.local/share/containers/storage, or
// (the user's home directory)/.local/share/containers/storage, or an error.
func UnsharedRootPath(homedir string) (string, error) {
// If $XDG_DATA_HOME is defined...
if envDataHome, haveDataHome := os.LookupEnv("XDG_DATA_HOME"); haveDataHome {
return filepath.Join(envDataHome, "containers", "storage"), nil
}
// If $XDG_DATA_HOME is not defined, but $HOME is defined...
if envHomedir, haveHomedir := os.LookupEnv("HOME"); haveHomedir {
// Default to the user's $HOME/.local/share/containers/storage subdirectory.
return filepath.Join(envHomedir, ".local", "share", "containers", "storage"), nil
}
// If we know where our home directory is...
if homedir != "" {
// Default to the user's homedir/.local/share/containers/storage subdirectory.
return filepath.Join(homedir, ".local", "share", "containers", "storage"), nil
}
return "", errors.New("unable to determine a --root location: neither $XDG_DATA_HOME nor $HOME is set")
}
// UnsharedRunrootPath returns $XDG_RUNTIME_DIR/run, /var/run/user/(the user's UID)/run, or an error.
func UnsharedRunrootPath(uid string) (string, error) {
// If $XDG_RUNTIME_DIR is defined...
if envRuntimeDir, haveRuntimeDir := os.LookupEnv("XDG_RUNTIME_DIR"); haveRuntimeDir {
return filepath.Join(envRuntimeDir, "run"), nil
}
// If $XDG_RUNTIME_DIR is not defined, but we know our UID...
if uid != "" {
return filepath.Join("/var/run/user", uid, "run"), nil
}
return "", errors.New("unable to determine a --runroot location: $XDG_RUNTIME_DIR is not set, and we don't know our UID")
}