2174 lines
		
	
	
		
			69 KiB
		
	
	
	
		
			Go
		
	
	
	
			
		
		
	
	
			2174 lines
		
	
	
		
			69 KiB
		
	
	
	
		
			Go
		
	
	
	
| //go:build linux || freebsd
 | |
| 
 | |
| package buildah
 | |
| 
 | |
| import (
 | |
| 	"archive/tar"
 | |
| 	"bytes"
 | |
| 	"encoding/json"
 | |
| 	"errors"
 | |
| 	"fmt"
 | |
| 	"io"
 | |
| 	"io/fs"
 | |
| 	"net"
 | |
| 	"os"
 | |
| 	"os/exec"
 | |
| 	"os/signal"
 | |
| 	"path"
 | |
| 	"path/filepath"
 | |
| 	"runtime"
 | |
| 	"slices"
 | |
| 	"strconv"
 | |
| 	"strings"
 | |
| 	"sync"
 | |
| 	"sync/atomic"
 | |
| 	"syscall"
 | |
| 	"time"
 | |
| 
 | |
| 	"github.com/containers/buildah/bind"
 | |
| 	"github.com/containers/buildah/copier"
 | |
| 	"github.com/containers/buildah/define"
 | |
| 	"github.com/containers/buildah/internal"
 | |
| 	"github.com/containers/buildah/internal/tmpdir"
 | |
| 	"github.com/containers/buildah/internal/volumes"
 | |
| 	"github.com/containers/buildah/pkg/overlay"
 | |
| 	"github.com/containers/buildah/pkg/sshagent"
 | |
| 	"github.com/containers/buildah/util"
 | |
| 	"github.com/containers/common/libnetwork/etchosts"
 | |
| 	"github.com/containers/common/libnetwork/network"
 | |
| 	"github.com/containers/common/libnetwork/resolvconf"
 | |
| 	netTypes "github.com/containers/common/libnetwork/types"
 | |
| 	netUtil "github.com/containers/common/libnetwork/util"
 | |
| 	"github.com/containers/common/pkg/config"
 | |
| 	"github.com/containers/common/pkg/subscriptions"
 | |
| 	"github.com/containers/image/v5/types"
 | |
| 	"github.com/containers/storage"
 | |
| 	"github.com/containers/storage/pkg/fileutils"
 | |
| 	"github.com/containers/storage/pkg/idtools"
 | |
| 	"github.com/containers/storage/pkg/ioutils"
 | |
| 	"github.com/containers/storage/pkg/lockfile"
 | |
| 	"github.com/containers/storage/pkg/mount"
 | |
| 	"github.com/containers/storage/pkg/reexec"
 | |
| 	"github.com/containers/storage/pkg/regexp"
 | |
| 	"github.com/containers/storage/pkg/unshare"
 | |
| 	"github.com/opencontainers/go-digest"
 | |
| 	"github.com/opencontainers/runtime-spec/specs-go"
 | |
| 	"github.com/opencontainers/runtime-tools/generate"
 | |
| 	"github.com/opencontainers/selinux/go-selinux/label"
 | |
| 	"github.com/sirupsen/logrus"
 | |
| 	"golang.org/x/sys/unix"
 | |
| 	"golang.org/x/term"
 | |
| )
 | |
| 
 | |
| const maxHostnameLen = 64
 | |
| 
 | |
| var validHostnames = regexp.Delayed("[A-Za-z0-9][A-Za-z0-9.-]+")
 | |
| 
 | |
| func (b *Builder) createResolvConf(rdir string, chownOpts *idtools.IDPair) (string, error) {
 | |
| 	cfile := filepath.Join(rdir, "resolv.conf")
 | |
| 	f, err := os.Create(cfile)
 | |
| 	if err != nil {
 | |
| 		return "", err
 | |
| 	}
 | |
| 	defer f.Close()
 | |
| 
 | |
| 	uid := 0
 | |
| 	gid := 0
 | |
| 	if chownOpts != nil {
 | |
| 		uid = chownOpts.UID
 | |
| 		gid = chownOpts.GID
 | |
| 	}
 | |
| 	if err = f.Chown(uid, gid); err != nil {
 | |
| 		return "", err
 | |
| 	}
 | |
| 
 | |
| 	if err := relabel(cfile, b.MountLabel, false); err != nil {
 | |
| 		return "", err
 | |
| 	}
 | |
| 	return cfile, nil
 | |
| }
 | |
| 
 | |
| // addResolvConf copies files from host and sets them up to bind mount into container
 | |
| func (b *Builder) addResolvConfEntries(file string, networkNameServer []string,
 | |
| 	spec *specs.Spec, keepHostServers, ipv6 bool,
 | |
| ) error {
 | |
| 	defaultConfig, err := config.Default()
 | |
| 	if err != nil {
 | |
| 		return fmt.Errorf("failed to get config: %w", err)
 | |
| 	}
 | |
| 
 | |
| 	var namespaces []specs.LinuxNamespace
 | |
| 	if spec.Linux != nil {
 | |
| 		namespaces = spec.Linux.Namespaces
 | |
| 	}
 | |
| 
 | |
| 	dnsServers, dnsSearch, dnsOptions := b.CommonBuildOpts.DNSServers, b.CommonBuildOpts.DNSSearch, b.CommonBuildOpts.DNSOptions
 | |
| 	nameservers := make([]string, 0, len(defaultConfig.Containers.DNSServers.Get())+len(dnsServers))
 | |
| 	nameservers = append(nameservers, defaultConfig.Containers.DNSServers.Get()...)
 | |
| 	nameservers = append(nameservers, dnsServers...)
 | |
| 
 | |
| 	searches := make([]string, 0, len(defaultConfig.Containers.DNSSearches.Get())+len(dnsSearch))
 | |
| 	searches = append(searches, defaultConfig.Containers.DNSSearches.Get()...)
 | |
| 	searches = append(searches, dnsSearch...)
 | |
| 
 | |
| 	options := make([]string, 0, len(defaultConfig.Containers.DNSOptions.Get())+len(dnsOptions))
 | |
| 	options = append(options, defaultConfig.Containers.DNSOptions.Get()...)
 | |
| 	options = append(options, dnsOptions...)
 | |
| 
 | |
| 	if len(nameservers) == 0 {
 | |
| 		nameservers = networkNameServer
 | |
| 	}
 | |
| 
 | |
| 	if err := resolvconf.New(&resolvconf.Params{
 | |
| 		Path:            file,
 | |
| 		Namespaces:      namespaces,
 | |
| 		IPv6Enabled:     ipv6,
 | |
| 		KeepHostServers: keepHostServers,
 | |
| 		Nameservers:     nameservers,
 | |
| 		Searches:        searches,
 | |
| 		Options:         options,
 | |
| 	}); err != nil {
 | |
| 		return fmt.Errorf("building resolv.conf for container %s: %w", b.ContainerID, err)
 | |
| 	}
 | |
| 
 | |
| 	return nil
 | |
| }
 | |
| 
 | |
| // createHostsFile creates a containers hosts file
 | |
| func (b *Builder) createHostsFile(rdir string, chownOpts *idtools.IDPair) (string, error) {
 | |
| 	targetfile := filepath.Join(rdir, "hosts")
 | |
| 	f, err := os.Create(targetfile)
 | |
| 	if err != nil {
 | |
| 		return "", err
 | |
| 	}
 | |
| 	defer f.Close()
 | |
| 	uid := 0
 | |
| 	gid := 0
 | |
| 	if chownOpts != nil {
 | |
| 		uid = chownOpts.UID
 | |
| 		gid = chownOpts.GID
 | |
| 	}
 | |
| 	if err := f.Chown(uid, gid); err != nil {
 | |
| 		return "", err
 | |
| 	}
 | |
| 	if err := relabel(targetfile, b.MountLabel, false); err != nil {
 | |
| 		return "", err
 | |
| 	}
 | |
| 
 | |
| 	return targetfile, nil
 | |
| }
 | |
| 
 | |
| func (b *Builder) addHostsEntries(file, imageRoot string, entries etchosts.HostEntries, exclude []net.IP, preferIP string) error {
 | |
| 	conf, err := config.Default()
 | |
| 	if err != nil {
 | |
| 		return err
 | |
| 	}
 | |
| 
 | |
| 	base, err := etchosts.GetBaseHostFile(conf.Containers.BaseHostsFile, imageRoot)
 | |
| 	if err != nil {
 | |
| 		return err
 | |
| 	}
 | |
| 	return etchosts.New(&etchosts.Params{
 | |
| 		BaseFile:   base,
 | |
| 		ExtraHosts: b.CommonBuildOpts.AddHost,
 | |
| 		HostContainersInternalIP: etchosts.GetHostContainersInternalIP(etchosts.HostContainersInternalOptions{
 | |
| 			Conf:     conf,
 | |
| 			Exclude:  exclude,
 | |
| 			PreferIP: preferIP,
 | |
| 		}),
 | |
| 		TargetFile:   file,
 | |
| 		ContainerIPs: entries,
 | |
| 	})
 | |
| }
 | |
| 
 | |
| // generateHostname creates a containers /etc/hostname file
 | |
| func (b *Builder) generateHostname(rdir, hostname string, chownOpts *idtools.IDPair) (string, error) {
 | |
| 	cfile := filepath.Join(rdir, "hostname")
 | |
| 	if err := ioutils.AtomicWriteFile(cfile, append([]byte(hostname), '\n'), 0o644); err != nil {
 | |
| 		return "", fmt.Errorf("writing /etc/hostname into the container: %w", err)
 | |
| 	}
 | |
| 
 | |
| 	uid := 0
 | |
| 	gid := 0
 | |
| 	if chownOpts != nil {
 | |
| 		uid = chownOpts.UID
 | |
| 		gid = chownOpts.GID
 | |
| 	}
 | |
| 	if err := os.Chown(cfile, uid, gid); err != nil {
 | |
| 		return "", err
 | |
| 	}
 | |
| 	if err := relabel(cfile, b.MountLabel, false); err != nil {
 | |
| 		return "", err
 | |
| 	}
 | |
| 
 | |
| 	return cfile, nil
 | |
| }
 | |
| 
 | |
| func setupTerminal(g *generate.Generator, terminalPolicy TerminalPolicy, terminalSize *specs.Box) {
 | |
| 	switch terminalPolicy {
 | |
| 	case DefaultTerminal:
 | |
| 		onTerminal := term.IsTerminal(unix.Stdin) && term.IsTerminal(unix.Stdout) && term.IsTerminal(unix.Stderr)
 | |
| 		if onTerminal {
 | |
| 			logrus.Debugf("stdio is a terminal, defaulting to using a terminal")
 | |
| 		} else {
 | |
| 			logrus.Debugf("stdio is not a terminal, defaulting to not using a terminal")
 | |
| 		}
 | |
| 		g.SetProcessTerminal(onTerminal)
 | |
| 	case WithTerminal:
 | |
| 		g.SetProcessTerminal(true)
 | |
| 	case WithoutTerminal:
 | |
| 		g.SetProcessTerminal(false)
 | |
| 	}
 | |
| 	if terminalSize != nil {
 | |
| 		g.SetProcessConsoleSize(terminalSize.Width, terminalSize.Height)
 | |
| 	}
 | |
| }
 | |
| 
 | |
| // Search for a command that isn't given as an absolute path using the $PATH
 | |
| // under the rootfs.  We can't resolve absolute symbolic links without
 | |
| // chroot()ing, which we may not be able to do, so just accept a link as a
 | |
| // valid resolution.
 | |
| func runLookupPath(g *generate.Generator, command []string) []string {
 | |
| 	// Look for the configured $PATH.
 | |
| 	spec := g.Config
 | |
| 	envPath := ""
 | |
| 	for i := range spec.Process.Env {
 | |
| 		if strings.HasPrefix(spec.Process.Env[i], "PATH=") {
 | |
| 			envPath = spec.Process.Env[i]
 | |
| 		}
 | |
| 	}
 | |
| 	// If there is no configured $PATH, supply one.
 | |
| 	if envPath == "" {
 | |
| 		defaultPath := "/usr/local/bin:/usr/local/sbin:/usr/bin:/usr/sbin:/bin:/sbin"
 | |
| 		envPath = "PATH=" + defaultPath
 | |
| 		g.AddProcessEnv("PATH", defaultPath)
 | |
| 	}
 | |
| 	// No command, nothing to do.
 | |
| 	if len(command) == 0 {
 | |
| 		return command
 | |
| 	}
 | |
| 	// Command is already an absolute path, use it as-is.
 | |
| 	if filepath.IsAbs(command[0]) {
 | |
| 		return command
 | |
| 	}
 | |
| 	// For each element in the PATH,
 | |
| 	for _, pathEntry := range filepath.SplitList(envPath[5:]) {
 | |
| 		// if it's the empty string, it's ".", which is the Cwd,
 | |
| 		if pathEntry == "" {
 | |
| 			pathEntry = spec.Process.Cwd
 | |
| 		}
 | |
| 		// build the absolute path which it might be,
 | |
| 		candidate := filepath.Join(pathEntry, command[0])
 | |
| 		// check if it's there,
 | |
| 		if fi, err := os.Lstat(filepath.Join(spec.Root.Path, candidate)); fi != nil && err == nil {
 | |
| 			// and if it's not a directory, and either a symlink or executable,
 | |
| 			if !fi.IsDir() && ((fi.Mode()&os.ModeSymlink != 0) || (fi.Mode()&0o111 != 0)) {
 | |
| 				// use that.
 | |
| 				return append([]string{candidate}, command[1:]...)
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| 	return command
 | |
| }
 | |
| 
 | |
| func (b *Builder) configureUIDGID(g *generate.Generator, mountPoint string, options RunOptions) (string, error) {
 | |
| 	// Set the user UID/GID/supplemental group list/capabilities lists.
 | |
| 	user, homeDir, err := b.userForRun(mountPoint, options.User)
 | |
| 	if err != nil {
 | |
| 		return "", err
 | |
| 	}
 | |
| 	if err := setupCapabilities(g, b.Capabilities, options.AddCapabilities, options.DropCapabilities); err != nil {
 | |
| 		return "", err
 | |
| 	}
 | |
| 	g.SetProcessUID(user.UID)
 | |
| 	g.SetProcessGID(user.GID)
 | |
| 	g.AddProcessAdditionalGid(user.GID)
 | |
| 	for _, gid := range user.AdditionalGids {
 | |
| 		g.AddProcessAdditionalGid(gid)
 | |
| 	}
 | |
| 	for _, group := range b.GroupAdd {
 | |
| 		if group == "keep-groups" {
 | |
| 			if len(b.GroupAdd) > 1 {
 | |
| 				return "", errors.New("the '--group-add keep-groups' option is not allowed with any other --group-add options")
 | |
| 			}
 | |
| 			g.AddAnnotation("run.oci.keep_original_groups", "1")
 | |
| 			continue
 | |
| 		}
 | |
| 		gid, err := strconv.ParseUint(group, 10, 32)
 | |
| 		if err != nil {
 | |
| 			return "", err
 | |
| 		}
 | |
| 		g.AddProcessAdditionalGid(uint32(gid))
 | |
| 	}
 | |
| 
 | |
| 	// Remove capabilities if not running as root except Bounding set
 | |
| 	if user.UID != 0 && g.Config.Process.Capabilities != nil {
 | |
| 		bounding := g.Config.Process.Capabilities.Bounding
 | |
| 		g.ClearProcessCapabilities()
 | |
| 		g.Config.Process.Capabilities.Bounding = bounding
 | |
| 	}
 | |
| 
 | |
| 	return homeDir, nil
 | |
| }
 | |
| 
 | |
| func (b *Builder) configureEnvironment(g *generate.Generator, options RunOptions, defaultEnv []string) {
 | |
| 	g.ClearProcessEnv()
 | |
| 
 | |
| 	if b.CommonBuildOpts.HTTPProxy {
 | |
| 		for _, envSpec := range config.ProxyEnv {
 | |
| 			if envVal, ok := os.LookupEnv(envSpec); ok {
 | |
| 				g.AddProcessEnv(envSpec, envVal)
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	for _, envSpec := range util.MergeEnv(util.MergeEnv(defaultEnv, b.Env()), options.Env) {
 | |
| 		env := strings.SplitN(envSpec, "=", 2)
 | |
| 		if len(env) > 1 {
 | |
| 			g.AddProcessEnv(env[0], env[1])
 | |
| 		}
 | |
| 	}
 | |
| }
 | |
| 
 | |
| // getNetworkInterface creates the network interface
 | |
| func getNetworkInterface(store storage.Store, cniConfDir, cniPluginPath string) (netTypes.ContainerNetwork, error) {
 | |
| 	conf, err := config.Default()
 | |
| 	if err != nil {
 | |
| 		return nil, err
 | |
| 	}
 | |
| 	// copy the config to not modify the default by accident
 | |
| 	newconf := *conf
 | |
| 	if len(cniConfDir) > 0 {
 | |
| 		newconf.Network.NetworkConfigDir = cniConfDir
 | |
| 	}
 | |
| 	if len(cniPluginPath) > 0 {
 | |
| 		plugins := strings.Split(cniPluginPath, string(os.PathListSeparator))
 | |
| 		newconf.Network.CNIPluginDirs.Set(plugins)
 | |
| 	}
 | |
| 
 | |
| 	_, netInt, err := network.NetworkBackend(store, &newconf, false)
 | |
| 	if err != nil {
 | |
| 		return nil, err
 | |
| 	}
 | |
| 	return netInt, nil
 | |
| }
 | |
| 
 | |
| func netStatusToNetResult(netStatus map[string]netTypes.StatusBlock, hostnames []string) *netResult {
 | |
| 	result := &netResult{
 | |
| 		keepHostResolvers: false,
 | |
| 	}
 | |
| 	for _, status := range netStatus {
 | |
| 		for _, dns := range status.DNSServerIPs {
 | |
| 			result.dnsServers = append(result.dnsServers, dns.String())
 | |
| 		}
 | |
| 		for _, netInt := range status.Interfaces {
 | |
| 			for _, netAddress := range netInt.Subnets {
 | |
| 				e := etchosts.HostEntry{IP: netAddress.IPNet.IP.String(), Names: hostnames}
 | |
| 				result.entries = append(result.entries, e)
 | |
| 				if !result.ipv6 && netUtil.IsIPv6(netAddress.IPNet.IP) {
 | |
| 					result.ipv6 = true
 | |
| 				}
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| 	return result
 | |
| }
 | |
| 
 | |
| // DefaultNamespaceOptions returns the default namespace settings from the
 | |
| // runtime-tools generator library.
 | |
| func DefaultNamespaceOptions() (define.NamespaceOptions, error) {
 | |
| 	cfg, err := config.Default()
 | |
| 	if err != nil {
 | |
| 		return nil, fmt.Errorf("failed to get container config: %w", err)
 | |
| 	}
 | |
| 	options := define.NamespaceOptions{
 | |
| 		{Name: string(specs.CgroupNamespace), Host: cfg.CgroupNS() == "host"},
 | |
| 		{Name: string(specs.IPCNamespace), Host: cfg.IPCNS() == "host"},
 | |
| 		{Name: string(specs.MountNamespace), Host: false},
 | |
| 		{Name: string(specs.NetworkNamespace), Host: cfg.NetNS() == "host"},
 | |
| 		{Name: string(specs.PIDNamespace), Host: cfg.PidNS() == "host"},
 | |
| 		{Name: string(specs.UserNamespace), Host: cfg.Containers.UserNS == "" || cfg.Containers.UserNS == "host"},
 | |
| 		{Name: string(specs.UTSNamespace), Host: cfg.UTSNS() == "host"},
 | |
| 	}
 | |
| 	return options, nil
 | |
| }
 | |
| 
 | |
| func checkAndOverrideIsolationOptions(isolation define.Isolation, options *RunOptions) error {
 | |
| 	switch isolation {
 | |
| 	case IsolationOCIRootless:
 | |
| 		// only change the netns if the caller did not set it
 | |
| 		if ns := options.NamespaceOptions.Find(string(specs.NetworkNamespace)); ns == nil {
 | |
| 			if _, err := exec.LookPath("slirp4netns"); err != nil {
 | |
| 				// if slirp4netns is not installed we have to use the hosts net namespace
 | |
| 				options.NamespaceOptions.AddOrReplace(define.NamespaceOption{Name: string(specs.NetworkNamespace), Host: true})
 | |
| 			}
 | |
| 		}
 | |
| 		fallthrough
 | |
| 	case IsolationOCI:
 | |
| 		pidns := options.NamespaceOptions.Find(string(specs.PIDNamespace))
 | |
| 		userns := options.NamespaceOptions.Find(string(specs.UserNamespace))
 | |
| 		if (pidns != nil && pidns.Host) && (userns != nil && !userns.Host) {
 | |
| 			return fmt.Errorf("not allowed to mix host PID namespace with container user namespace")
 | |
| 		}
 | |
| 	case IsolationChroot:
 | |
| 		logrus.Info("network namespace isolation not supported with chroot isolation, forcing host network")
 | |
| 		options.NamespaceOptions.AddOrReplace(define.NamespaceOption{Name: string(specs.NetworkNamespace), Host: true})
 | |
| 	}
 | |
| 	return nil
 | |
| }
 | |
| 
 | |
| // fileCloser is a helper struct to prevent closing the file twice in the code
 | |
| // users must call (fileCloser).Close() and not fileCloser.File.Close()
 | |
| type fileCloser struct {
 | |
| 	file   *os.File
 | |
| 	closed bool
 | |
| }
 | |
| 
 | |
| func (f *fileCloser) Close() {
 | |
| 	if !f.closed {
 | |
| 		if err := f.file.Close(); err != nil {
 | |
| 			logrus.Errorf("failed to close file: %v", err)
 | |
| 		}
 | |
| 		f.closed = true
 | |
| 	}
 | |
| }
 | |
| 
 | |
| // waitForSync waits for a maximum of 4 minutes to read something from the file
 | |
| func waitForSync(pipeR *os.File) error {
 | |
| 	if err := pipeR.SetDeadline(time.Now().Add(4 * time.Minute)); err != nil {
 | |
| 		return err
 | |
| 	}
 | |
| 	b := make([]byte, 16)
 | |
| 	_, err := pipeR.Read(b)
 | |
| 	return err
 | |
| }
 | |
| 
 | |
| func runUsingRuntime(options RunOptions, configureNetwork bool, moreCreateArgs []string, spec *specs.Spec, bundlePath, containerName string,
 | |
| 	containerCreateW io.WriteCloser, containerStartR io.ReadCloser,
 | |
| ) (wstatus unix.WaitStatus, err error) {
 | |
| 	if options.Logger == nil {
 | |
| 		options.Logger = logrus.StandardLogger()
 | |
| 	}
 | |
| 
 | |
| 	// Lock the caller to a single OS-level thread.
 | |
| 	runtime.LockOSThread()
 | |
| 
 | |
| 	// Set up bind mounts for things that a namespaced user might not be able to get to directly.
 | |
| 	unmountAll, err := bind.SetupIntermediateMountNamespace(spec, bundlePath)
 | |
| 	if unmountAll != nil {
 | |
| 		defer func() {
 | |
| 			if err := unmountAll(); err != nil {
 | |
| 				options.Logger.Error(err)
 | |
| 			}
 | |
| 		}()
 | |
| 	}
 | |
| 	if err != nil {
 | |
| 		return 1, err
 | |
| 	}
 | |
| 
 | |
| 	// Write the runtime configuration.
 | |
| 	specbytes, err := json.Marshal(spec)
 | |
| 	if err != nil {
 | |
| 		return 1, fmt.Errorf("encoding configuration %#v as json: %w", spec, err)
 | |
| 	}
 | |
| 	if err = ioutils.AtomicWriteFile(filepath.Join(bundlePath, "config.json"), specbytes, 0o600); err != nil {
 | |
| 		return 1, fmt.Errorf("storing runtime configuration: %w", err)
 | |
| 	}
 | |
| 
 | |
| 	logrus.Debugf("config = %v", string(specbytes))
 | |
| 
 | |
| 	// Decide which runtime to use.
 | |
| 	runtime := options.Runtime
 | |
| 	if runtime == "" {
 | |
| 		runtime = util.Runtime()
 | |
| 	}
 | |
| 	localRuntime := util.FindLocalRuntime(runtime)
 | |
| 	if localRuntime != "" {
 | |
| 		runtime = localRuntime
 | |
| 	}
 | |
| 
 | |
| 	// Default to just passing down our stdio.
 | |
| 	getCreateStdio := func() (io.ReadCloser, io.WriteCloser, io.WriteCloser) {
 | |
| 		return os.Stdin, os.Stdout, os.Stderr
 | |
| 	}
 | |
| 
 | |
| 	// Figure out how we're doing stdio handling, and create pipes and sockets.
 | |
| 	var stdio sync.WaitGroup
 | |
| 	var consoleListener *net.UnixListener
 | |
| 	var errorFds, closeBeforeReadingErrorFds []int
 | |
| 	stdioPipe := make([][]int, 3)
 | |
| 	copyConsole := false
 | |
| 	copyPipes := false
 | |
| 	finishCopy := make([]int, 2)
 | |
| 	if err = unix.Pipe(finishCopy); err != nil {
 | |
| 		return 1, fmt.Errorf("creating pipe for notifying to stop stdio: %w", err)
 | |
| 	}
 | |
| 	finishedCopy := make(chan struct{}, 1)
 | |
| 	var pargs []string
 | |
| 	if spec.Process != nil {
 | |
| 		pargs = spec.Process.Args
 | |
| 		if spec.Process.Terminal {
 | |
| 			copyConsole = true
 | |
| 			// Create a listening socket for accepting the container's terminal's PTY master.
 | |
| 			socketPath := filepath.Join(bundlePath, "console.sock")
 | |
| 			consoleListener, err = net.ListenUnix("unix", &net.UnixAddr{Name: socketPath, Net: "unix"})
 | |
| 			if err != nil {
 | |
| 				return 1, fmt.Errorf("creating socket %q to receive terminal descriptor: %w", consoleListener.Addr(), err)
 | |
| 			}
 | |
| 			// Add console socket arguments.
 | |
| 			moreCreateArgs = append(moreCreateArgs, "--console-socket", socketPath)
 | |
| 		} else {
 | |
| 			copyPipes = true
 | |
| 			// Figure out who should own the pipes.
 | |
| 			uid, gid, err := util.GetHostRootIDs(spec)
 | |
| 			if err != nil {
 | |
| 				return 1, err
 | |
| 			}
 | |
| 			// Create stdio pipes.
 | |
| 			if stdioPipe, err = runMakeStdioPipe(int(uid), int(gid)); err != nil {
 | |
| 				return 1, err
 | |
| 			}
 | |
| 			if spec.Linux != nil {
 | |
| 				if err = runLabelStdioPipes(stdioPipe, spec.Process.SelinuxLabel, spec.Linux.MountLabel); err != nil {
 | |
| 					return 1, err
 | |
| 				}
 | |
| 			}
 | |
| 			errorFds = []int{stdioPipe[unix.Stdout][0], stdioPipe[unix.Stderr][0]}
 | |
| 			closeBeforeReadingErrorFds = []int{stdioPipe[unix.Stdout][1], stdioPipe[unix.Stderr][1]}
 | |
| 			// Set stdio to our pipes.
 | |
| 			getCreateStdio = func() (io.ReadCloser, io.WriteCloser, io.WriteCloser) {
 | |
| 				stdin := os.NewFile(uintptr(stdioPipe[unix.Stdin][0]), "/dev/stdin")
 | |
| 				stdout := os.NewFile(uintptr(stdioPipe[unix.Stdout][1]), "/dev/stdout")
 | |
| 				stderr := os.NewFile(uintptr(stdioPipe[unix.Stderr][1]), "/dev/stderr")
 | |
| 				return stdin, stdout, stderr
 | |
| 			}
 | |
| 		}
 | |
| 	} else {
 | |
| 		if options.Quiet {
 | |
| 			// Discard stdout.
 | |
| 			getCreateStdio = func() (io.ReadCloser, io.WriteCloser, io.WriteCloser) {
 | |
| 				return os.Stdin, nil, os.Stderr
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	runtimeArgs := slices.Clone(options.Args)
 | |
| 	if options.CgroupManager == config.SystemdCgroupsManager {
 | |
| 		runtimeArgs = append(runtimeArgs, "--systemd-cgroup")
 | |
| 	}
 | |
| 
 | |
| 	// Build the commands that we'll execute.
 | |
| 	pidFile := filepath.Join(bundlePath, "pid")
 | |
| 	args := append(append(append(runtimeArgs, "create", "--bundle", bundlePath, "--pid-file", pidFile), moreCreateArgs...), containerName)
 | |
| 	create := exec.Command(runtime, args...)
 | |
| 	setPdeathsig(create)
 | |
| 	create.Dir = bundlePath
 | |
| 	stdin, stdout, stderr := getCreateStdio()
 | |
| 	create.Stdin, create.Stdout, create.Stderr = stdin, stdout, stderr
 | |
| 
 | |
| 	args = append(options.Args, "start", containerName)
 | |
| 	start := exec.Command(runtime, args...)
 | |
| 	setPdeathsig(start)
 | |
| 	start.Dir = bundlePath
 | |
| 	start.Stderr = os.Stderr
 | |
| 
 | |
| 	kill := func(signal string) *exec.Cmd {
 | |
| 		args := append(options.Args, "kill", containerName)
 | |
| 		if signal != "" {
 | |
| 			args = append(args, signal)
 | |
| 		}
 | |
| 		kill := exec.Command(runtime, args...)
 | |
| 		kill.Dir = bundlePath
 | |
| 		kill.Stderr = os.Stderr
 | |
| 		return kill
 | |
| 	}
 | |
| 
 | |
| 	args = append(options.Args, "delete", containerName)
 | |
| 	del := exec.Command(runtime, args...)
 | |
| 	del.Dir = bundlePath
 | |
| 	del.Stderr = os.Stderr
 | |
| 
 | |
| 	// Actually create the container.
 | |
| 	logrus.Debugf("Running %q", create.Args)
 | |
| 	err = create.Run()
 | |
| 	if err != nil {
 | |
| 		return 1, fmt.Errorf("from %s creating container for %v: %s: %w", runtime, pargs, runCollectOutput(options.Logger, errorFds, closeBeforeReadingErrorFds), err)
 | |
| 	}
 | |
| 	defer func() {
 | |
| 		err2 := del.Run()
 | |
| 		if err2 != nil {
 | |
| 			if err == nil {
 | |
| 				err = fmt.Errorf("deleting container: %w", err2)
 | |
| 			} else {
 | |
| 				options.Logger.Infof("error from %s deleting container: %v", runtime, err2)
 | |
| 			}
 | |
| 		}
 | |
| 	}()
 | |
| 
 | |
| 	// Make sure we read the container's exit status when it exits.
 | |
| 	pidValue, err := os.ReadFile(pidFile)
 | |
| 	if err != nil {
 | |
| 		return 1, err
 | |
| 	}
 | |
| 	pid, err := strconv.Atoi(strings.TrimSpace(string(pidValue)))
 | |
| 	if err != nil {
 | |
| 		return 1, fmt.Errorf("parsing pid %s as a number: %w", string(pidValue), err)
 | |
| 	}
 | |
| 	var stopped uint32
 | |
| 	var reaping sync.WaitGroup
 | |
| 	reaping.Add(1)
 | |
| 	go func() {
 | |
| 		defer reaping.Done()
 | |
| 		var err error
 | |
| 		_, err = unix.Wait4(pid, &wstatus, 0, nil)
 | |
| 		if err != nil {
 | |
| 			wstatus = 0
 | |
| 			options.Logger.Errorf("error waiting for container child process %d: %v\n", pid, err)
 | |
| 		}
 | |
| 		atomic.StoreUint32(&stopped, 1)
 | |
| 	}()
 | |
| 
 | |
| 	if configureNetwork {
 | |
| 		if _, err := containerCreateW.Write([]byte{1}); err != nil {
 | |
| 			return 1, err
 | |
| 		}
 | |
| 		containerCreateW.Close()
 | |
| 		logrus.Debug("waiting for parent start message")
 | |
| 		b := make([]byte, 1)
 | |
| 		if _, err := containerStartR.Read(b); err != nil {
 | |
| 			return 1, fmt.Errorf("did not get container start message from parent: %w", err)
 | |
| 		}
 | |
| 		containerStartR.Close()
 | |
| 	}
 | |
| 
 | |
| 	if copyPipes {
 | |
| 		// We don't need the ends of the pipes that belong to the container.
 | |
| 		stdin.Close()
 | |
| 		if stdout != nil {
 | |
| 			stdout.Close()
 | |
| 		}
 | |
| 		stderr.Close()
 | |
| 	}
 | |
| 
 | |
| 	// Handle stdio for the container in the background.
 | |
| 	stdio.Add(1)
 | |
| 	go runCopyStdio(options.Logger, &stdio, copyPipes, stdioPipe, copyConsole, consoleListener, finishCopy, finishedCopy, spec)
 | |
| 
 | |
| 	// Start the container.
 | |
| 	logrus.Debugf("Running %q", start.Args)
 | |
| 	err = start.Run()
 | |
| 	if err != nil {
 | |
| 		return 1, fmt.Errorf("from %s starting container: %w", runtime, err)
 | |
| 	}
 | |
| 	defer func() {
 | |
| 		if atomic.LoadUint32(&stopped) == 0 {
 | |
| 			if err := kill("").Run(); err != nil {
 | |
| 				options.Logger.Infof("error from %s stopping container: %v", runtime, err)
 | |
| 			}
 | |
| 			atomic.StoreUint32(&stopped, 1)
 | |
| 		}
 | |
| 	}()
 | |
| 
 | |
| 	// Wait for the container to exit.
 | |
| 	interrupted := make(chan os.Signal, 100)
 | |
| 	go func() {
 | |
| 		for range interrupted {
 | |
| 			if err := kill("SIGKILL").Run(); err != nil {
 | |
| 				logrus.Errorf("%v sending SIGKILL", err)
 | |
| 			}
 | |
| 		}
 | |
| 	}()
 | |
| 	signal.Notify(interrupted, syscall.SIGHUP, syscall.SIGINT, syscall.SIGTERM)
 | |
| 	for {
 | |
| 		now := time.Now()
 | |
| 		var state specs.State
 | |
| 		args = append(options.Args, "state", containerName)
 | |
| 		stat := exec.Command(runtime, args...)
 | |
| 		stat.Dir = bundlePath
 | |
| 		stat.Stderr = os.Stderr
 | |
| 		stateOutput, err := stat.Output()
 | |
| 		if err != nil {
 | |
| 			if atomic.LoadUint32(&stopped) != 0 {
 | |
| 				// container exited
 | |
| 				break
 | |
| 			}
 | |
| 			return 1, fmt.Errorf("reading container state from %s (got output: %q): %w", runtime, string(stateOutput), err)
 | |
| 		}
 | |
| 		if err = json.Unmarshal(stateOutput, &state); err != nil {
 | |
| 			return 1, fmt.Errorf("parsing container state %q from %s: %w", string(stateOutput), runtime, err)
 | |
| 		}
 | |
| 		switch state.Status {
 | |
| 		case specs.StateCreating, specs.StateCreated, specs.StateRunning:
 | |
| 			// all fine
 | |
| 		case specs.StateStopped:
 | |
| 			atomic.StoreUint32(&stopped, 1)
 | |
| 		default:
 | |
| 			return 1, fmt.Errorf("container status unexpectedly changed to %q", state.Status)
 | |
| 		}
 | |
| 		if atomic.LoadUint32(&stopped) != 0 {
 | |
| 			break
 | |
| 		}
 | |
| 		select {
 | |
| 		case <-finishedCopy:
 | |
| 			atomic.StoreUint32(&stopped, 1)
 | |
| 		case <-time.After(time.Until(now.Add(100 * time.Millisecond))):
 | |
| 			continue
 | |
| 		}
 | |
| 		if atomic.LoadUint32(&stopped) != 0 {
 | |
| 			break
 | |
| 		}
 | |
| 	}
 | |
| 	signal.Stop(interrupted)
 | |
| 	close(interrupted)
 | |
| 
 | |
| 	// Close the writing end of the stop-handling-stdio notification pipe.
 | |
| 	unix.Close(finishCopy[1])
 | |
| 	// Wait for the stdio copy goroutine to flush.
 | |
| 	stdio.Wait()
 | |
| 	// Wait until we finish reading the exit status.
 | |
| 	reaping.Wait()
 | |
| 
 | |
| 	return wstatus, nil
 | |
| }
 | |
| 
 | |
| func runCollectOutput(logger *logrus.Logger, fds, closeBeforeReadingFds []int) string {
 | |
| 	for _, fd := range closeBeforeReadingFds {
 | |
| 		unix.Close(fd)
 | |
| 	}
 | |
| 	var b bytes.Buffer
 | |
| 	buf := make([]byte, 8192)
 | |
| 	for _, fd := range fds {
 | |
| 		nread, err := unix.Read(fd, buf)
 | |
| 		if err != nil {
 | |
| 			if errno, isErrno := err.(syscall.Errno); isErrno {
 | |
| 				switch errno {
 | |
| 				default:
 | |
| 					logger.Errorf("error reading from pipe %d: %v", fd, err)
 | |
| 				case syscall.EINTR, syscall.EAGAIN:
 | |
| 				}
 | |
| 			} else {
 | |
| 				logger.Errorf("unable to wait for data from pipe %d: %v", fd, err)
 | |
| 			}
 | |
| 			continue
 | |
| 		}
 | |
| 		for nread > 0 {
 | |
| 			r := buf[:nread]
 | |
| 			if nwritten, err := b.Write(r); err != nil || nwritten != len(r) {
 | |
| 				if nwritten != len(r) {
 | |
| 					logger.Errorf("error buffering data from pipe %d: %v", fd, err)
 | |
| 					break
 | |
| 				}
 | |
| 			}
 | |
| 			nread, err = unix.Read(fd, buf)
 | |
| 			if err != nil {
 | |
| 				if errno, isErrno := err.(syscall.Errno); isErrno {
 | |
| 					switch errno {
 | |
| 					default:
 | |
| 						logger.Errorf("error reading from pipe %d: %v", fd, err)
 | |
| 					case syscall.EINTR, syscall.EAGAIN:
 | |
| 					}
 | |
| 				} else {
 | |
| 					logger.Errorf("unable to wait for data from pipe %d: %v", fd, err)
 | |
| 				}
 | |
| 				break
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| 	return b.String()
 | |
| }
 | |
| 
 | |
| func setNonblock(logger *logrus.Logger, fd int, description string, nonblocking bool) (bool, error) {
 | |
| 	mask, err := unix.FcntlInt(uintptr(fd), unix.F_GETFL, 0)
 | |
| 	if err != nil {
 | |
| 		return false, err
 | |
| 	}
 | |
| 	blocked := mask&unix.O_NONBLOCK == 0
 | |
| 
 | |
| 	if err := unix.SetNonblock(fd, nonblocking); err != nil {
 | |
| 		if nonblocking {
 | |
| 			logger.Errorf("error setting %s to nonblocking: %v", description, err)
 | |
| 		} else {
 | |
| 			logger.Errorf("error setting descriptor %s blocking: %v", description, err)
 | |
| 		}
 | |
| 	}
 | |
| 	return blocked, err
 | |
| }
 | |
| 
 | |
| func runCopyStdio(logger *logrus.Logger, stdio *sync.WaitGroup, copyPipes bool, stdioPipe [][]int, copyConsole bool, consoleListener *net.UnixListener, finishCopy []int, finishedCopy chan struct{}, spec *specs.Spec) {
 | |
| 	defer func() {
 | |
| 		unix.Close(finishCopy[0])
 | |
| 		if copyPipes {
 | |
| 			unix.Close(stdioPipe[unix.Stdin][1])
 | |
| 			unix.Close(stdioPipe[unix.Stdout][0])
 | |
| 			unix.Close(stdioPipe[unix.Stderr][0])
 | |
| 		}
 | |
| 		stdio.Done()
 | |
| 		finishedCopy <- struct{}{}
 | |
| 		close(finishedCopy)
 | |
| 	}()
 | |
| 	// Map describing where data on an incoming descriptor should go.
 | |
| 	relayMap := make(map[int]int)
 | |
| 	// Map describing incoming and outgoing descriptors.
 | |
| 	readDesc := make(map[int]string)
 | |
| 	writeDesc := make(map[int]string)
 | |
| 	// Buffers.
 | |
| 	relayBuffer := make(map[int]*bytes.Buffer)
 | |
| 	// Set up the terminal descriptor or pipes for polling.
 | |
| 	if copyConsole {
 | |
| 		// Accept a connection over our listening socket.
 | |
| 		fd, err := runAcceptTerminal(logger, consoleListener, spec.Process.ConsoleSize)
 | |
| 		if err != nil {
 | |
| 			logger.Errorf("%v", err)
 | |
| 			return
 | |
| 		}
 | |
| 		terminalFD := fd
 | |
| 		// Input from our stdin, output from the terminal descriptor.
 | |
| 		relayMap[unix.Stdin] = terminalFD
 | |
| 		readDesc[unix.Stdin] = "stdin"
 | |
| 		relayBuffer[terminalFD] = new(bytes.Buffer)
 | |
| 		writeDesc[terminalFD] = "container terminal input"
 | |
| 		relayMap[terminalFD] = unix.Stdout
 | |
| 		readDesc[terminalFD] = "container terminal output"
 | |
| 		relayBuffer[unix.Stdout] = new(bytes.Buffer)
 | |
| 		writeDesc[unix.Stdout] = "output"
 | |
| 		// Set our terminal's mode to raw, to pass handling of special
 | |
| 		// terminal input to the terminal in the container.
 | |
| 		if term.IsTerminal(unix.Stdin) {
 | |
| 			if state, err := term.MakeRaw(unix.Stdin); err != nil {
 | |
| 				logger.Warnf("error setting terminal state: %v", err)
 | |
| 			} else {
 | |
| 				defer func() {
 | |
| 					if err = term.Restore(unix.Stdin, state); err != nil {
 | |
| 						logger.Errorf("unable to restore terminal state: %v", err)
 | |
| 					}
 | |
| 				}()
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| 	if copyPipes {
 | |
| 		// Input from our stdin, output from the stdout and stderr pipes.
 | |
| 		relayMap[unix.Stdin] = stdioPipe[unix.Stdin][1]
 | |
| 		readDesc[unix.Stdin] = "stdin"
 | |
| 		relayBuffer[stdioPipe[unix.Stdin][1]] = new(bytes.Buffer)
 | |
| 		writeDesc[stdioPipe[unix.Stdin][1]] = "container stdin"
 | |
| 		relayMap[stdioPipe[unix.Stdout][0]] = unix.Stdout
 | |
| 		readDesc[stdioPipe[unix.Stdout][0]] = "container stdout"
 | |
| 		relayBuffer[unix.Stdout] = new(bytes.Buffer)
 | |
| 		writeDesc[unix.Stdout] = "stdout"
 | |
| 		relayMap[stdioPipe[unix.Stderr][0]] = unix.Stderr
 | |
| 		readDesc[stdioPipe[unix.Stderr][0]] = "container stderr"
 | |
| 		relayBuffer[unix.Stderr] = new(bytes.Buffer)
 | |
| 		writeDesc[unix.Stderr] = "stderr"
 | |
| 	}
 | |
| 	// Set our reading descriptors to non-blocking.
 | |
| 	for rfd, wfd := range relayMap {
 | |
| 		blocked, err := setNonblock(logger, rfd, readDesc[rfd], true)
 | |
| 		if err != nil {
 | |
| 			return
 | |
| 		}
 | |
| 		if blocked {
 | |
| 			defer setNonblock(logger, rfd, readDesc[rfd], false) //nolint:errcheck
 | |
| 		}
 | |
| 		setNonblock(logger, wfd, writeDesc[wfd], false) //nolint:errcheck
 | |
| 	}
 | |
| 
 | |
| 	if copyPipes {
 | |
| 		setNonblock(logger, stdioPipe[unix.Stdin][1], writeDesc[stdioPipe[unix.Stdin][1]], true) //nolint:errcheck
 | |
| 	}
 | |
| 
 | |
| 	runCopyStdioPassData(copyPipes, stdioPipe, finishCopy, relayMap, relayBuffer, readDesc, writeDesc)
 | |
| }
 | |
| 
 | |
| func canRetry(err error) bool {
 | |
| 	if errno, isErrno := err.(syscall.Errno); isErrno {
 | |
| 		return errno == syscall.EINTR || errno == syscall.EAGAIN
 | |
| 	}
 | |
| 	return false
 | |
| }
 | |
| 
 | |
| func runCopyStdioPassData(copyPipes bool, stdioPipe [][]int, finishCopy []int, relayMap map[int]int, relayBuffer map[int]*bytes.Buffer, readDesc map[int]string, writeDesc map[int]string) {
 | |
| 	closeStdin := false
 | |
| 
 | |
| 	// Pass data back and forth.
 | |
| 	pollTimeout := -1
 | |
| 	for len(relayMap) > 0 {
 | |
| 		// Start building the list of descriptors to poll.
 | |
| 		pollFds := make([]unix.PollFd, 0, len(relayMap)+1)
 | |
| 		// Poll for a notification that we should stop handling stdio.
 | |
| 		pollFds = append(pollFds, unix.PollFd{Fd: int32(finishCopy[0]), Events: unix.POLLIN | unix.POLLHUP})
 | |
| 		// Poll on our reading descriptors.
 | |
| 		for rfd := range relayMap {
 | |
| 			pollFds = append(pollFds, unix.PollFd{Fd: int32(rfd), Events: unix.POLLIN | unix.POLLHUP})
 | |
| 		}
 | |
| 		buf := make([]byte, 8192)
 | |
| 		// Wait for new data from any input descriptor, or a notification that we're done.
 | |
| 		_, err := unix.Poll(pollFds, pollTimeout)
 | |
| 		if !util.LogIfNotRetryable(err, fmt.Sprintf("error waiting for stdio/terminal data to relay: %v", err)) {
 | |
| 			return
 | |
| 		}
 | |
| 		removes := make(map[int]struct{})
 | |
| 		for _, pollFd := range pollFds {
 | |
| 			// If this descriptor's just been closed from the other end, mark it for
 | |
| 			// removal from the set that we're checking for.
 | |
| 			if pollFd.Revents&unix.POLLHUP == unix.POLLHUP {
 | |
| 				removes[int(pollFd.Fd)] = struct{}{}
 | |
| 			}
 | |
| 			// If the descriptor was closed elsewhere, remove it from our list.
 | |
| 			if pollFd.Revents&unix.POLLNVAL != 0 {
 | |
| 				logrus.Debugf("error polling descriptor %s: closed?", readDesc[int(pollFd.Fd)])
 | |
| 				removes[int(pollFd.Fd)] = struct{}{}
 | |
| 			}
 | |
| 			// If the POLLIN flag isn't set, then there's no data to be read from this descriptor.
 | |
| 			if pollFd.Revents&unix.POLLIN == 0 {
 | |
| 				continue
 | |
| 			}
 | |
| 			// Read whatever there is to be read.
 | |
| 			readFD := int(pollFd.Fd)
 | |
| 			writeFD, needToRelay := relayMap[readFD]
 | |
| 			if needToRelay {
 | |
| 				n, err := unix.Read(readFD, buf)
 | |
| 				if !util.LogIfNotRetryable(err, fmt.Sprintf("unable to read %s data: %v", readDesc[readFD], err)) {
 | |
| 					return
 | |
| 				}
 | |
| 				// If it's zero-length on our stdin and we're
 | |
| 				// using pipes, it's an EOF, so close the stdin
 | |
| 				// pipe's writing end.
 | |
| 				if n == 0 && !canRetry(err) && int(pollFd.Fd) == unix.Stdin {
 | |
| 					removes[int(pollFd.Fd)] = struct{}{}
 | |
| 				} else if n > 0 {
 | |
| 					// Buffer the data in case we get blocked on where they need to go.
 | |
| 					nwritten, err := relayBuffer[writeFD].Write(buf[:n])
 | |
| 					if err != nil {
 | |
| 						logrus.Debugf("buffer: %v", err)
 | |
| 						continue
 | |
| 					}
 | |
| 					if nwritten != n {
 | |
| 						logrus.Debugf("buffer: expected to buffer %d bytes, wrote %d", n, nwritten)
 | |
| 						continue
 | |
| 					}
 | |
| 					// If this is the last of the data we'll be able to read from this
 | |
| 					// descriptor, read all that there is to read.
 | |
| 					for pollFd.Revents&unix.POLLHUP == unix.POLLHUP {
 | |
| 						nr, err := unix.Read(readFD, buf)
 | |
| 						util.LogIfUnexpectedWhileDraining(err, fmt.Sprintf("read %s: %v", readDesc[readFD], err))
 | |
| 						if nr <= 0 {
 | |
| 							break
 | |
| 						}
 | |
| 						nwritten, err := relayBuffer[writeFD].Write(buf[:nr])
 | |
| 						if err != nil {
 | |
| 							logrus.Debugf("buffer: %v", err)
 | |
| 							break
 | |
| 						}
 | |
| 						if nwritten != nr {
 | |
| 							logrus.Debugf("buffer: expected to buffer %d bytes, wrote %d", nr, nwritten)
 | |
| 							break
 | |
| 						}
 | |
| 					}
 | |
| 				}
 | |
| 			}
 | |
| 		}
 | |
| 		// Try to drain the output buffers.  Set the default timeout
 | |
| 		// for the next poll() to 100ms if we still have data to write.
 | |
| 		pollTimeout = -1
 | |
| 		for writeFD := range relayBuffer {
 | |
| 			if relayBuffer[writeFD].Len() > 0 {
 | |
| 				n, err := unix.Write(writeFD, relayBuffer[writeFD].Bytes())
 | |
| 				if !util.LogIfNotRetryable(err, fmt.Sprintf("unable to write %s data: %v", writeDesc[writeFD], err)) {
 | |
| 					return
 | |
| 				}
 | |
| 				if n > 0 {
 | |
| 					relayBuffer[writeFD].Next(n)
 | |
| 				}
 | |
| 				if closeStdin && writeFD == stdioPipe[unix.Stdin][1] && stdioPipe[unix.Stdin][1] >= 0 && relayBuffer[stdioPipe[unix.Stdin][1]].Len() == 0 {
 | |
| 					logrus.Debugf("closing stdin")
 | |
| 					unix.Close(stdioPipe[unix.Stdin][1])
 | |
| 					stdioPipe[unix.Stdin][1] = -1
 | |
| 				}
 | |
| 			}
 | |
| 			if relayBuffer[writeFD].Len() > 0 {
 | |
| 				pollTimeout = 100
 | |
| 			}
 | |
| 		}
 | |
| 		// Remove any descriptors which we don't need to poll any more from the poll descriptor list.
 | |
| 		for remove := range removes {
 | |
| 			if copyPipes && remove == unix.Stdin {
 | |
| 				closeStdin = true
 | |
| 				if relayBuffer[stdioPipe[unix.Stdin][1]].Len() == 0 {
 | |
| 					logrus.Debugf("closing stdin")
 | |
| 					unix.Close(stdioPipe[unix.Stdin][1])
 | |
| 					stdioPipe[unix.Stdin][1] = -1
 | |
| 				}
 | |
| 			}
 | |
| 			delete(relayMap, remove)
 | |
| 		}
 | |
| 		// If the we-can-return pipe had anything for us, we're done.
 | |
| 		for _, pollFd := range pollFds {
 | |
| 			if int(pollFd.Fd) == finishCopy[0] && pollFd.Revents != 0 {
 | |
| 				// The pipe is closed, indicating that we can stop now.
 | |
| 				return
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| }
 | |
| 
 | |
| func runAcceptTerminal(logger *logrus.Logger, consoleListener *net.UnixListener, terminalSize *specs.Box) (int, error) {
 | |
| 	defer consoleListener.Close()
 | |
| 	c, err := consoleListener.AcceptUnix()
 | |
| 	if err != nil {
 | |
| 		return -1, fmt.Errorf("accepting socket descriptor connection: %w", err)
 | |
| 	}
 | |
| 	defer c.Close()
 | |
| 	// Expect a control message over our new connection.
 | |
| 	b := make([]byte, 8192)
 | |
| 	oob := make([]byte, 8192)
 | |
| 	n, oobn, _, _, err := c.ReadMsgUnix(b, oob)
 | |
| 	if err != nil {
 | |
| 		return -1, fmt.Errorf("reading socket descriptor: %w", err)
 | |
| 	}
 | |
| 	if n > 0 {
 | |
| 		logrus.Debugf("socket descriptor is for %q", string(b[:n]))
 | |
| 	}
 | |
| 	if oobn > len(oob) {
 | |
| 		return -1, fmt.Errorf("too much out-of-bounds data (%d bytes)", oobn)
 | |
| 	}
 | |
| 	// Parse the control message.
 | |
| 	scm, err := unix.ParseSocketControlMessage(oob[:oobn])
 | |
| 	if err != nil {
 | |
| 		return -1, fmt.Errorf("parsing out-of-bound data as a socket control message: %w", err)
 | |
| 	}
 | |
| 	logrus.Debugf("control messages: %v", scm)
 | |
| 	// Expect to get a descriptor.
 | |
| 	terminalFD := -1
 | |
| 	for i := range scm {
 | |
| 		fds, err := unix.ParseUnixRights(&scm[i])
 | |
| 		if err != nil {
 | |
| 			return -1, fmt.Errorf("parsing unix rights control message: %v: %w", &scm[i], err)
 | |
| 		}
 | |
| 		logrus.Debugf("fds: %v", fds)
 | |
| 		if len(fds) == 0 {
 | |
| 			continue
 | |
| 		}
 | |
| 		terminalFD = fds[0]
 | |
| 		break
 | |
| 	}
 | |
| 	if terminalFD == -1 {
 | |
| 		return -1, fmt.Errorf("unable to read terminal descriptor")
 | |
| 	}
 | |
| 	// Set the pseudoterminal's size to the configured size, or our own.
 | |
| 	winsize := &unix.Winsize{}
 | |
| 	if terminalSize != nil {
 | |
| 		// Use configured sizes.
 | |
| 		winsize.Row = uint16(terminalSize.Height)
 | |
| 		winsize.Col = uint16(terminalSize.Width)
 | |
| 	} else {
 | |
| 		if term.IsTerminal(unix.Stdin) {
 | |
| 			// Use the size of our terminal.
 | |
| 			if winsize, err = unix.IoctlGetWinsize(unix.Stdin, unix.TIOCGWINSZ); err != nil {
 | |
| 				logger.Warnf("error reading size of controlling terminal: %v", err)
 | |
| 				winsize.Row = 0
 | |
| 				winsize.Col = 0
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| 	if winsize.Row != 0 && winsize.Col != 0 {
 | |
| 		if err = unix.IoctlSetWinsize(terminalFD, unix.TIOCSWINSZ, winsize); err != nil {
 | |
| 			logger.Warnf("error setting size of container pseudoterminal: %v", err)
 | |
| 		}
 | |
| 		// FIXME - if we're connected to a terminal, we should
 | |
| 		// be passing the updated terminal size down when we
 | |
| 		// receive a SIGWINCH.
 | |
| 	}
 | |
| 	return terminalFD, nil
 | |
| }
 | |
| 
 | |
| func runUsingRuntimeMain() {
 | |
| 	var options runUsingRuntimeSubprocOptions
 | |
| 	// Set logging.
 | |
| 	if level := os.Getenv("LOGLEVEL"); level != "" {
 | |
| 		if ll, err := strconv.Atoi(level); err == nil {
 | |
| 			logrus.SetLevel(logrus.Level(ll))
 | |
| 		}
 | |
| 	}
 | |
| 	// Unpack our configuration.
 | |
| 	confPipe := os.NewFile(3, "confpipe")
 | |
| 	if confPipe == nil {
 | |
| 		fmt.Fprintf(os.Stderr, "error reading options pipe\n")
 | |
| 		os.Exit(1)
 | |
| 	}
 | |
| 	defer confPipe.Close()
 | |
| 	if err := json.NewDecoder(confPipe).Decode(&options); err != nil {
 | |
| 		fmt.Fprintf(os.Stderr, "error decoding options: %v\n", err)
 | |
| 		os.Exit(1)
 | |
| 	}
 | |
| 	// Set ourselves up to read the container's exit status.  We're doing this in a child process
 | |
| 	// so that we won't mess with the setting in a caller of the library.
 | |
| 	if err := setChildProcess(); err != nil {
 | |
| 		os.Exit(1)
 | |
| 	}
 | |
| 	ospec := options.Spec
 | |
| 	if ospec == nil {
 | |
| 		fmt.Fprintf(os.Stderr, "options spec not specified\n")
 | |
| 		os.Exit(1)
 | |
| 	}
 | |
| 
 | |
| 	// open the pipes used to communicate with the parent process
 | |
| 	var containerCreateW *os.File
 | |
| 	var containerStartR *os.File
 | |
| 	if options.ConfigureNetwork {
 | |
| 		containerCreateW = os.NewFile(4, "containercreatepipe")
 | |
| 		if containerCreateW == nil {
 | |
| 			fmt.Fprintf(os.Stderr, "could not open fd 4\n")
 | |
| 			os.Exit(1)
 | |
| 		}
 | |
| 		containerStartR = os.NewFile(5, "containerstartpipe")
 | |
| 		if containerStartR == nil {
 | |
| 			fmt.Fprintf(os.Stderr, "could not open fd 5\n")
 | |
| 			os.Exit(1)
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	// Run the container, start to finish.
 | |
| 	status, err := runUsingRuntime(options.Options, options.ConfigureNetwork, options.MoreCreateArgs, ospec, options.BundlePath, options.ContainerName, containerCreateW, containerStartR)
 | |
| 	if err != nil {
 | |
| 		fmt.Fprintf(os.Stderr, "error running container: %v\n", err)
 | |
| 		os.Exit(1)
 | |
| 	}
 | |
| 	// Pass the container's exit status back to the caller by exiting with the same status.
 | |
| 	if status.Exited() {
 | |
| 		os.Exit(status.ExitStatus())
 | |
| 	} else if status.Signaled() {
 | |
| 		fmt.Fprintf(os.Stderr, "container exited on %s\n", status.Signal())
 | |
| 		os.Exit(1)
 | |
| 	}
 | |
| 	os.Exit(1)
 | |
| }
 | |
| 
 | |
| func (b *Builder) runUsingRuntimeSubproc(isolation define.Isolation, options RunOptions, configureNetwork bool, networkString string,
 | |
| 	moreCreateArgs []string, spec *specs.Spec, rootPath, bundlePath, containerName, buildContainerName, hostsFile, resolvFile string,
 | |
| ) (err error) {
 | |
| 	// Lock the caller to a single OS-level thread.
 | |
| 	runtime.LockOSThread()
 | |
| 	defer runtime.UnlockOSThread()
 | |
| 
 | |
| 	var confwg sync.WaitGroup
 | |
| 	config, conferr := json.Marshal(runUsingRuntimeSubprocOptions{
 | |
| 		Options:          options,
 | |
| 		Spec:             spec,
 | |
| 		RootPath:         rootPath,
 | |
| 		BundlePath:       bundlePath,
 | |
| 		ConfigureNetwork: configureNetwork,
 | |
| 		MoreCreateArgs:   moreCreateArgs,
 | |
| 		ContainerName:    containerName,
 | |
| 		Isolation:        isolation,
 | |
| 	})
 | |
| 	if conferr != nil {
 | |
| 		return fmt.Errorf("encoding configuration for %q: %w", runUsingRuntimeCommand, conferr)
 | |
| 	}
 | |
| 	cmd := reexec.Command(runUsingRuntimeCommand)
 | |
| 	setPdeathsig(cmd)
 | |
| 	cmd.Dir = bundlePath
 | |
| 	cmd.Stdin = options.Stdin
 | |
| 	if cmd.Stdin == nil {
 | |
| 		cmd.Stdin = os.Stdin
 | |
| 	}
 | |
| 	cmd.Stdout = options.Stdout
 | |
| 	if cmd.Stdout == nil {
 | |
| 		cmd.Stdout = os.Stdout
 | |
| 	}
 | |
| 	cmd.Stderr = options.Stderr
 | |
| 	if cmd.Stderr == nil {
 | |
| 		cmd.Stderr = os.Stderr
 | |
| 	}
 | |
| 	cmd.Env = util.MergeEnv(os.Environ(), []string{fmt.Sprintf("LOGLEVEL=%d", logrus.GetLevel())})
 | |
| 	preader, pwriter, err := os.Pipe()
 | |
| 	if err != nil {
 | |
| 		return fmt.Errorf("creating configuration pipe: %w", err)
 | |
| 	}
 | |
| 	confwg.Add(1)
 | |
| 	go func() {
 | |
| 		_, conferr = io.Copy(pwriter, bytes.NewReader(config))
 | |
| 		if conferr != nil {
 | |
| 			conferr = fmt.Errorf("while copying configuration down pipe to child process: %w", conferr)
 | |
| 		}
 | |
| 		confwg.Done()
 | |
| 	}()
 | |
| 
 | |
| 	// create network configuration pipes
 | |
| 	var containerCreateR, containerCreateW fileCloser
 | |
| 	var containerStartR, containerStartW fileCloser
 | |
| 	if configureNetwork {
 | |
| 		containerCreateR.file, containerCreateW.file, err = os.Pipe()
 | |
| 		if err != nil {
 | |
| 			return fmt.Errorf("creating container create pipe: %w", err)
 | |
| 		}
 | |
| 		defer containerCreateR.Close()
 | |
| 		defer containerCreateW.Close()
 | |
| 
 | |
| 		containerStartR.file, containerStartW.file, err = os.Pipe()
 | |
| 		if err != nil {
 | |
| 			return fmt.Errorf("creating container start pipe: %w", err)
 | |
| 		}
 | |
| 		defer containerStartR.Close()
 | |
| 		defer containerStartW.Close()
 | |
| 		cmd.ExtraFiles = []*os.File{containerCreateW.file, containerStartR.file}
 | |
| 	}
 | |
| 
 | |
| 	cmd.ExtraFiles = append([]*os.File{preader}, cmd.ExtraFiles...)
 | |
| 	defer preader.Close()
 | |
| 	defer pwriter.Close()
 | |
| 	if err := cmd.Start(); err != nil {
 | |
| 		return fmt.Errorf("while starting runtime: %w", err)
 | |
| 	}
 | |
| 
 | |
| 	interrupted := make(chan os.Signal, 100)
 | |
| 	go func() {
 | |
| 		for receivedSignal := range interrupted {
 | |
| 			if err := cmd.Process.Signal(receivedSignal); err != nil {
 | |
| 				logrus.Infof("%v while attempting to forward %v to child process", err, receivedSignal)
 | |
| 			}
 | |
| 		}
 | |
| 	}()
 | |
| 	signal.Notify(interrupted, syscall.SIGHUP, syscall.SIGINT, syscall.SIGTERM)
 | |
| 
 | |
| 	if configureNetwork {
 | |
| 		// we already passed the fd to the child, now close the writer so we do not hang if the child closes it
 | |
| 		containerCreateW.Close()
 | |
| 		if err := waitForSync(containerCreateR.file); err != nil {
 | |
| 			// we do not want to return here since we want to capture the exit code from the child via cmd.Wait()
 | |
| 			// close the pipes here so that the child will not hang forever
 | |
| 			containerCreateR.Close()
 | |
| 			containerStartW.Close()
 | |
| 			logrus.Errorf("did not get container create message from subprocess: %v", err)
 | |
| 		} else {
 | |
| 			pidFile := filepath.Join(bundlePath, "pid")
 | |
| 			pidValue, err := os.ReadFile(pidFile)
 | |
| 			if err != nil {
 | |
| 				return err
 | |
| 			}
 | |
| 			pid, err := strconv.Atoi(strings.TrimSpace(string(pidValue)))
 | |
| 			if err != nil {
 | |
| 				return fmt.Errorf("parsing pid %s as a number: %w", string(pidValue), err)
 | |
| 			}
 | |
| 
 | |
| 			teardown, netResult, err := b.runConfigureNetwork(pid, isolation, options, networkString, containerName, []string{spec.Hostname, buildContainerName})
 | |
| 			if teardown != nil {
 | |
| 				defer teardown()
 | |
| 			}
 | |
| 			if err != nil {
 | |
| 				return fmt.Errorf("setup network: %w", err)
 | |
| 			}
 | |
| 
 | |
| 			// only add hosts if we manage the hosts file
 | |
| 			if hostsFile != "" {
 | |
| 				err = b.addHostsEntries(hostsFile, rootPath, netResult.entries, netResult.excludeIPs, netResult.preferredHostContainersInternalIP)
 | |
| 				if err != nil {
 | |
| 					return err
 | |
| 				}
 | |
| 			}
 | |
| 
 | |
| 			if resolvFile != "" {
 | |
| 				err = b.addResolvConfEntries(resolvFile, netResult.dnsServers, spec, netResult.keepHostResolvers, netResult.ipv6)
 | |
| 				if err != nil {
 | |
| 					return err
 | |
| 				}
 | |
| 			}
 | |
| 
 | |
| 			logrus.Debug("network namespace successfully setup, send start message to child")
 | |
| 			_, err = containerStartW.file.Write([]byte{1})
 | |
| 			if err != nil {
 | |
| 				return err
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	if err := cmd.Wait(); err != nil {
 | |
| 		return fmt.Errorf("while running runtime: %w", err)
 | |
| 	}
 | |
| 	confwg.Wait()
 | |
| 	signal.Stop(interrupted)
 | |
| 	close(interrupted)
 | |
| 	if err == nil {
 | |
| 		return conferr
 | |
| 	}
 | |
| 	if conferr != nil {
 | |
| 		logrus.Debugf("%v", conferr)
 | |
| 	}
 | |
| 	return err
 | |
| }
 | |
| 
 | |
| type runUsingRuntimeSubprocOptions struct {
 | |
| 	Options          RunOptions
 | |
| 	Spec             *specs.Spec
 | |
| 	RootPath         string
 | |
| 	BundlePath       string
 | |
| 	ConfigureNetwork bool
 | |
| 	MoreCreateArgs   []string
 | |
| 	ContainerName    string
 | |
| 	Isolation        define.Isolation
 | |
| }
 | |
| 
 | |
| func init() {
 | |
| 	reexec.Register(runUsingRuntimeCommand, runUsingRuntimeMain)
 | |
| }
 | |
| 
 | |
| // If this succeeds, after the command which uses the spec finishes running,
 | |
| // the caller must call b.cleanupRunMounts() on the returned runMountArtifacts
 | |
| // structure.
 | |
| func (b *Builder) setupMounts(mountPoint string, spec *specs.Spec, bundlePath string, optionMounts []specs.Mount, bindFiles map[string]string, builtinVolumes []string, compatBuiltinVolumes types.OptionalBool, volumeMounts []string, runFileMounts []string, runMountInfo runMountInfo) (*runMountArtifacts, error) {
 | |
| 	// Start building a new list of mounts.
 | |
| 	var mounts []specs.Mount
 | |
| 	haveMount := func(destination string) bool {
 | |
| 		for _, mount := range mounts {
 | |
| 			if mount.Destination == destination {
 | |
| 				// Already have something to mount there.
 | |
| 				return true
 | |
| 			}
 | |
| 		}
 | |
| 		return false
 | |
| 	}
 | |
| 
 | |
| 	specMounts, err := setupSpecialMountSpecChanges(spec, b.CommonBuildOpts.ShmSize)
 | |
| 	if err != nil {
 | |
| 		return nil, err
 | |
| 	}
 | |
| 
 | |
| 	// Get the list of files we need to bind into the container.
 | |
| 	bindFileMounts := runSetupBoundFiles(bundlePath, bindFiles)
 | |
| 
 | |
| 	// After this point we need to know the per-container persistent storage directory.
 | |
| 	cdir, err := b.store.ContainerDirectory(b.ContainerID)
 | |
| 	if err != nil {
 | |
| 		return nil, fmt.Errorf("determining work directory for container %q: %w", b.ContainerID, err)
 | |
| 	}
 | |
| 
 | |
| 	// Figure out which UID and GID to tell the subscriptions package to use
 | |
| 	// for files that it creates.
 | |
| 	rootUID, rootGID, err := util.GetHostRootIDs(spec)
 | |
| 	if err != nil {
 | |
| 		return nil, err
 | |
| 	}
 | |
| 
 | |
| 	// Get host UID and GID of the container process.
 | |
| 	uidMap := []specs.LinuxIDMapping{}
 | |
| 	gidMap := []specs.LinuxIDMapping{}
 | |
| 	if spec.Linux != nil {
 | |
| 		uidMap = spec.Linux.UIDMappings
 | |
| 		gidMap = spec.Linux.GIDMappings
 | |
| 	}
 | |
| 	processUID, processGID, err := util.GetHostIDs(uidMap, gidMap, spec.Process.User.UID, spec.Process.User.GID)
 | |
| 	if err != nil {
 | |
| 		return nil, err
 | |
| 	}
 | |
| 
 | |
| 	// Get the list of subscriptions mounts.
 | |
| 	subscriptionMounts := subscriptions.MountsWithUIDGID(b.MountLabel, cdir, b.DefaultMountsFilePath, mountPoint, int(rootUID), int(rootGID), unshare.IsRootless(), false)
 | |
| 
 | |
| 	idMaps := IDMaps{
 | |
| 		uidmap:     uidMap,
 | |
| 		gidmap:     gidMap,
 | |
| 		rootUID:    int(rootUID),
 | |
| 		rootGID:    int(rootGID),
 | |
| 		processUID: int(processUID),
 | |
| 		processGID: int(processGID),
 | |
| 	}
 | |
| 	// Get the list of mounts that are just for this Run() call.
 | |
| 	runMounts, mountArtifacts, err := b.runSetupRunMounts(bundlePath, runFileMounts, runMountInfo, idMaps)
 | |
| 	if err != nil {
 | |
| 		return nil, err
 | |
| 	}
 | |
| 	succeeded := false
 | |
| 	defer func() {
 | |
| 		if !succeeded {
 | |
| 			if err := b.cleanupRunMounts(mountArtifacts); err != nil {
 | |
| 				b.Logger.Debugf("cleaning up run mounts: %v", err)
 | |
| 			}
 | |
| 		}
 | |
| 	}()
 | |
| 	// Add temporary copies of the contents of volume locations at the
 | |
| 	// volume locations, unless we already have something there.
 | |
| 	builtins, err := runSetupBuiltinVolumes(b.MountLabel, mountPoint, cdir, builtinVolumes, compatBuiltinVolumes, int(rootUID), int(rootGID))
 | |
| 	if err != nil {
 | |
| 		return nil, err
 | |
| 	}
 | |
| 
 | |
| 	// Get the list of explicitly-specified volume mounts.
 | |
| 	mountLabel := ""
 | |
| 	if spec.Linux != nil {
 | |
| 		mountLabel = spec.Linux.MountLabel
 | |
| 	}
 | |
| 	volumes, overlayDirs, err := b.runSetupVolumeMounts(mountLabel, volumeMounts, optionMounts, idMaps)
 | |
| 	if err != nil {
 | |
| 		return nil, err
 | |
| 	}
 | |
| 	mountArtifacts.RunOverlayDirs = append(mountArtifacts.RunOverlayDirs, overlayDirs...)
 | |
| 
 | |
| 	allMounts := util.SortMounts(append(append(append(append(append(volumes, builtins...), runMounts...), subscriptionMounts...), bindFileMounts...), specMounts...))
 | |
| 
 | |
| 	// Add them all, in the preferred order, except where they conflict with something that was previously added.
 | |
| 	for _, mount := range allMounts {
 | |
| 		if haveMount(mount.Destination) {
 | |
| 			// Already mounting something there, no need to bother with this one.
 | |
| 			continue
 | |
| 		}
 | |
| 		// Add the mount.
 | |
| 		mounts = append(mounts, mount)
 | |
| 	}
 | |
| 
 | |
| 	// Set the list in the spec.
 | |
| 	spec.Mounts = mounts
 | |
| 	succeeded = true
 | |
| 	return mountArtifacts, nil
 | |
| }
 | |
| 
 | |
| func runSetupBuiltinVolumes(mountLabel, mountPoint, containerDir string, builtinVolumes []string, compatBuiltinVolumes types.OptionalBool, rootUID, rootGID int) ([]specs.Mount, error) {
 | |
| 	var mounts []specs.Mount
 | |
| 	hostOwner := idtools.IDPair{UID: rootUID, GID: rootGID}
 | |
| 	// Add temporary copies of the contents of volume locations at the
 | |
| 	// volume locations, unless we already have something there.
 | |
| 	for _, volume := range builtinVolumes {
 | |
| 		// Make sure the volume exists in the rootfs.
 | |
| 		createDirPerms := os.FileMode(0o755)
 | |
| 		err := copier.Mkdir(mountPoint, filepath.Join(mountPoint, volume), copier.MkdirOptions{
 | |
| 			ChownNew: &hostOwner,
 | |
| 			ChmodNew: &createDirPerms,
 | |
| 		})
 | |
| 		if err != nil {
 | |
| 			return nil, fmt.Errorf("ensuring volume path %q: %w", filepath.Join(mountPoint, volume), err)
 | |
| 		}
 | |
| 		// If we're not being asked to bind mount anonymous volumes
 | |
| 		// onto the volume paths, we're done here.
 | |
| 		if compatBuiltinVolumes != types.OptionalBoolTrue {
 | |
| 			continue
 | |
| 		}
 | |
| 		// If we need to, create the directory that we'll use to hold
 | |
| 		// the volume contents.  If we do need to create it, then we'll
 | |
| 		// need to populate it, too, so make a note of that.
 | |
| 		volumePath := filepath.Join(containerDir, "buildah-volumes", digest.Canonical.FromString(volume).Hex())
 | |
| 		initializeVolume := false
 | |
| 		if err := fileutils.Exists(volumePath); err != nil {
 | |
| 			if !errors.Is(err, fs.ErrNotExist) {
 | |
| 				return nil, err
 | |
| 			}
 | |
| 			logrus.Debugf("setting up built-in volume path at %q for %q", volumePath, volume)
 | |
| 			if err = os.MkdirAll(volumePath, 0o755); err != nil {
 | |
| 				return nil, err
 | |
| 			}
 | |
| 			if err = relabel(volumePath, mountLabel, false); err != nil {
 | |
| 				return nil, err
 | |
| 			}
 | |
| 			initializeVolume = true
 | |
| 		}
 | |
| 		// Read the attributes of the volume's location in the rootfs.
 | |
| 		srcPath, err := copier.Eval(mountPoint, filepath.Join(mountPoint, volume), copier.EvalOptions{})
 | |
| 		if err != nil {
 | |
| 			return nil, fmt.Errorf("evaluating path %q: %w", srcPath, err)
 | |
| 		}
 | |
| 		stat, err := os.Stat(srcPath)
 | |
| 		if err != nil && !errors.Is(err, os.ErrNotExist) {
 | |
| 			return nil, err
 | |
| 		}
 | |
| 		// If we need to populate the mounted volume's contents with
 | |
| 		// content from the rootfs, set it up now.
 | |
| 		if initializeVolume {
 | |
| 			if err = os.Chmod(volumePath, stat.Mode().Perm()); err != nil {
 | |
| 				return nil, err
 | |
| 			}
 | |
| 			if err = os.Chown(volumePath, int(stat.Sys().(*syscall.Stat_t).Uid), int(stat.Sys().(*syscall.Stat_t).Gid)); err != nil {
 | |
| 				return nil, err
 | |
| 			}
 | |
| 			logrus.Debugf("populating directory %q for volume %q using contents of %q", volumePath, volume, srcPath)
 | |
| 			if err = extractWithTar(mountPoint, srcPath, volumePath); err != nil && !errors.Is(err, os.ErrNotExist) {
 | |
| 				return nil, fmt.Errorf("populating directory %q for volume %q using contents of %q: %w", volumePath, volume, srcPath, err)
 | |
| 			}
 | |
| 		}
 | |
| 		// Add the bind mount.
 | |
| 		mounts = append(mounts, specs.Mount{
 | |
| 			Source:      volumePath,
 | |
| 			Destination: volume,
 | |
| 			Type:        define.TypeBind,
 | |
| 			Options:     define.BindOptions,
 | |
| 		})
 | |
| 	}
 | |
| 	return mounts, nil
 | |
| }
 | |
| 
 | |
| // runSetupRunMounts sets up mounts that exist only in this RUN, not in subsequent runs
 | |
| //
 | |
| // If this function succeeds, the caller must free the returned
 | |
| // runMountArtifacts by calling b.cleanupRunMounts() after the command being
 | |
| // executed with those mounts has finished.
 | |
| func (b *Builder) runSetupRunMounts(bundlePath string, mounts []string, sources runMountInfo, idMaps IDMaps) ([]specs.Mount, *runMountArtifacts, error) {
 | |
| 	tmpFiles := make([]string, 0, len(mounts))
 | |
| 	mountImages := make([]string, 0, len(mounts))
 | |
| 	intermediateMounts := make([]string, 0, len(mounts))
 | |
| 	finalMounts := make([]specs.Mount, 0, len(mounts))
 | |
| 	agents := make([]*sshagent.AgentServer, 0, len(mounts))
 | |
| 	defaultSSHSock := ""
 | |
| 	targetLocks := []*lockfile.LockFile{}
 | |
| 	var overlayDirs []string
 | |
| 	succeeded := false
 | |
| 	defer func() {
 | |
| 		if !succeeded {
 | |
| 			for _, agent := range agents {
 | |
| 				servePath := agent.ServePath()
 | |
| 				if err := agent.Shutdown(); err != nil {
 | |
| 					b.Logger.Errorf("shutting down SSH agent at %q: %v", servePath, err)
 | |
| 				}
 | |
| 			}
 | |
| 			for _, overlayDir := range overlayDirs {
 | |
| 				if err := overlay.RemoveTemp(overlayDir); err != nil {
 | |
| 					b.Logger.Error(err.Error())
 | |
| 				}
 | |
| 			}
 | |
| 			for _, intermediateMount := range intermediateMounts {
 | |
| 				if err := mount.Unmount(intermediateMount); err != nil {
 | |
| 					b.Logger.Errorf("unmounting %q: %v", intermediateMount, err)
 | |
| 				}
 | |
| 				if err := os.Remove(intermediateMount); err != nil {
 | |
| 					b.Logger.Errorf("removing should-be-empty directory %q: %v", intermediateMount, err)
 | |
| 				}
 | |
| 			}
 | |
| 			for _, mountImage := range mountImages {
 | |
| 				if _, err := b.store.UnmountImage(mountImage, false); err != nil {
 | |
| 					b.Logger.Error(err.Error())
 | |
| 				}
 | |
| 			}
 | |
| 			for _, tmpFile := range tmpFiles {
 | |
| 				if err := os.Remove(tmpFile); err != nil && !errors.Is(err, os.ErrNotExist) {
 | |
| 					b.Logger.Error(err.Error())
 | |
| 				}
 | |
| 			}
 | |
| 			volumes.UnlockLockArray(targetLocks)
 | |
| 		}
 | |
| 	}()
 | |
| 	for _, mount := range mounts {
 | |
| 		var mountSpec *specs.Mount
 | |
| 		var err error
 | |
| 		var envFile, image, bundleMountsDir, overlayDir, intermediateMount string
 | |
| 		var agent *sshagent.AgentServer
 | |
| 		var tl *lockfile.LockFile
 | |
| 
 | |
| 		tokens := strings.Split(mount, ",")
 | |
| 
 | |
| 		// If `type` is not set default to TypeBind
 | |
| 		mountType := define.TypeBind
 | |
| 
 | |
| 		for _, field := range tokens {
 | |
| 			if strings.HasPrefix(field, "type=") {
 | |
| 				kv := strings.Split(field, "=")
 | |
| 				if len(kv) != 2 {
 | |
| 					return nil, nil, errors.New("invalid mount type")
 | |
| 				}
 | |
| 				mountType = kv[1]
 | |
| 			}
 | |
| 		}
 | |
| 		switch mountType {
 | |
| 		case "secret":
 | |
| 			mountSpec, envFile, err = b.getSecretMount(tokens, sources.Secrets, idMaps, sources.WorkDir)
 | |
| 			if err != nil {
 | |
| 				return nil, nil, err
 | |
| 			}
 | |
| 			if mountSpec != nil {
 | |
| 				finalMounts = append(finalMounts, *mountSpec)
 | |
| 				if envFile != "" {
 | |
| 					tmpFiles = append(tmpFiles, envFile)
 | |
| 				}
 | |
| 			}
 | |
| 		case "ssh":
 | |
| 			mountSpec, agent, err = b.getSSHMount(tokens, len(agents), sources.SSHSources, idMaps)
 | |
| 			if err != nil {
 | |
| 				return nil, nil, err
 | |
| 			}
 | |
| 			if mountSpec != nil {
 | |
| 				finalMounts = append(finalMounts, *mountSpec)
 | |
| 				if len(agents) == 0 {
 | |
| 					defaultSSHSock = mountSpec.Destination
 | |
| 				}
 | |
| 				agents = append(agents, agent)
 | |
| 			}
 | |
| 		case define.TypeBind:
 | |
| 			if bundleMountsDir == "" {
 | |
| 				if bundleMountsDir, err = os.MkdirTemp(bundlePath, "mounts"); err != nil {
 | |
| 					return nil, nil, err
 | |
| 				}
 | |
| 			}
 | |
| 			mountSpec, image, intermediateMount, overlayDir, err = b.getBindMount(tokens, sources.SystemContext, sources.ContextDir, sources.StageMountPoints, idMaps, sources.WorkDir, bundleMountsDir)
 | |
| 			if err != nil {
 | |
| 				return nil, nil, err
 | |
| 			}
 | |
| 			if image != "" {
 | |
| 				mountImages = append(mountImages, image)
 | |
| 			}
 | |
| 			if intermediateMount != "" {
 | |
| 				intermediateMounts = append(intermediateMounts, intermediateMount)
 | |
| 			}
 | |
| 			if overlayDir != "" {
 | |
| 				overlayDirs = append(overlayDirs, overlayDir)
 | |
| 			}
 | |
| 			finalMounts = append(finalMounts, *mountSpec)
 | |
| 		case "tmpfs":
 | |
| 			mountSpec, err = b.getTmpfsMount(tokens, idMaps, sources.WorkDir)
 | |
| 			if err != nil {
 | |
| 				return nil, nil, err
 | |
| 			}
 | |
| 			finalMounts = append(finalMounts, *mountSpec)
 | |
| 		case "cache":
 | |
| 			if bundleMountsDir == "" {
 | |
| 				if bundleMountsDir, err = os.MkdirTemp(bundlePath, "mounts"); err != nil {
 | |
| 					return nil, nil, err
 | |
| 				}
 | |
| 			}
 | |
| 			mountSpec, image, intermediateMount, overlayDir, tl, err = b.getCacheMount(tokens, sources.SystemContext, sources.StageMountPoints, idMaps, sources.WorkDir, bundleMountsDir)
 | |
| 			if err != nil {
 | |
| 				return nil, nil, err
 | |
| 			}
 | |
| 			if image != "" {
 | |
| 				mountImages = append(mountImages, image)
 | |
| 			}
 | |
| 			if intermediateMount != "" {
 | |
| 				intermediateMounts = append(intermediateMounts, intermediateMount)
 | |
| 			}
 | |
| 			if overlayDir != "" {
 | |
| 				overlayDirs = append(overlayDirs, overlayDir)
 | |
| 			}
 | |
| 			if tl != nil {
 | |
| 				targetLocks = append(targetLocks, tl)
 | |
| 			}
 | |
| 			finalMounts = append(finalMounts, *mountSpec)
 | |
| 		default:
 | |
| 			return nil, nil, fmt.Errorf("invalid mount type %q", mountType)
 | |
| 		}
 | |
| 	}
 | |
| 	succeeded = true
 | |
| 	artifacts := &runMountArtifacts{
 | |
| 		RunOverlayDirs:     overlayDirs,
 | |
| 		Agents:             agents,
 | |
| 		MountedImages:      mountImages,
 | |
| 		SSHAuthSock:        defaultSSHSock,
 | |
| 		TargetLocks:        targetLocks,
 | |
| 		IntermediateMounts: intermediateMounts,
 | |
| 	}
 | |
| 	return finalMounts, artifacts, nil
 | |
| }
 | |
| 
 | |
| func (b *Builder) getBindMount(tokens []string, sys *types.SystemContext, contextDir string, stageMountPoints map[string]internal.StageMountDetails, idMaps IDMaps, workDir, tmpDir string) (*specs.Mount, string, string, string, error) {
 | |
| 	if contextDir == "" {
 | |
| 		return nil, "", "", "", errors.New("context directory for current run invocation is not configured")
 | |
| 	}
 | |
| 	var optionMounts []specs.Mount
 | |
| 	optionMount, image, intermediateMount, overlayMount, err := volumes.GetBindMount(sys, tokens, contextDir, b.store, b.MountLabel, stageMountPoints, workDir, tmpDir)
 | |
| 	if err != nil {
 | |
| 		return nil, "", "", "", err
 | |
| 	}
 | |
| 	succeeded := false
 | |
| 	defer func() {
 | |
| 		if !succeeded {
 | |
| 			if overlayMount != "" {
 | |
| 				if err := overlay.RemoveTemp(overlayMount); err != nil {
 | |
| 					b.Logger.Debug(err.Error())
 | |
| 				}
 | |
| 			}
 | |
| 			if intermediateMount != "" {
 | |
| 				if err := mount.Unmount(intermediateMount); err != nil {
 | |
| 					b.Logger.Debugf("unmounting %q: %v", intermediateMount, err)
 | |
| 				}
 | |
| 				if err := os.Remove(intermediateMount); err != nil {
 | |
| 					b.Logger.Debugf("removing should-be-empty directory %q: %v", intermediateMount, err)
 | |
| 				}
 | |
| 			}
 | |
| 			if image != "" {
 | |
| 				if _, err := b.store.UnmountImage(image, false); err != nil {
 | |
| 					b.Logger.Debugf("unmounting image %q: %v", image, err)
 | |
| 				}
 | |
| 			}
 | |
| 		}
 | |
| 	}()
 | |
| 	optionMounts = append(optionMounts, optionMount)
 | |
| 	volumes, overlayDirs, err := b.runSetupVolumeMounts(b.MountLabel, nil, optionMounts, idMaps)
 | |
| 	if err != nil {
 | |
| 		return nil, "", "", "", err
 | |
| 	}
 | |
| 	if len(overlayDirs) != 0 {
 | |
| 		return nil, "", "", "", errors.New("internal error: did not expect a resolved bind mount to use the O flag")
 | |
| 	}
 | |
| 	succeeded = true
 | |
| 	return &volumes[0], image, intermediateMount, overlayMount, nil
 | |
| }
 | |
| 
 | |
| func (b *Builder) getTmpfsMount(tokens []string, idMaps IDMaps, workDir string) (*specs.Mount, error) {
 | |
| 	var optionMounts []specs.Mount
 | |
| 	mount, err := volumes.GetTmpfsMount(tokens, workDir)
 | |
| 	if err != nil {
 | |
| 		return nil, err
 | |
| 	}
 | |
| 	optionMounts = append(optionMounts, mount)
 | |
| 	volumes, overlayDirs, err := b.runSetupVolumeMounts(b.MountLabel, nil, optionMounts, idMaps)
 | |
| 	if err != nil {
 | |
| 		return nil, err
 | |
| 	}
 | |
| 	if len(overlayDirs) != 0 {
 | |
| 		return nil, errors.New("internal error: did not expect a resolved tmpfs mount to use the O flag")
 | |
| 	}
 | |
| 	return &volumes[0], nil
 | |
| }
 | |
| 
 | |
| func (b *Builder) getSecretMount(tokens []string, secrets map[string]define.Secret, idMaps IDMaps, workdir string) (_ *specs.Mount, _ string, retErr error) {
 | |
| 	errInvalidSyntax := errors.New("secret should have syntax id=id[,target=path,required=bool,mode=uint,uid=uint,gid=uint")
 | |
| 	if len(tokens) == 0 {
 | |
| 		return nil, "", errInvalidSyntax
 | |
| 	}
 | |
| 	var err error
 | |
| 	var id, target string
 | |
| 	var required bool
 | |
| 	var uid, gid uint32
 | |
| 	var mode uint32 = 0o400
 | |
| 	for _, val := range tokens {
 | |
| 		kv := strings.SplitN(val, "=", 2)
 | |
| 		switch kv[0] {
 | |
| 		case "type":
 | |
| 			// This is already processed
 | |
| 			continue
 | |
| 		case "id":
 | |
| 			id = kv[1]
 | |
| 		case "target", "dst", "destination":
 | |
| 			target = kv[1]
 | |
| 			if !filepath.IsAbs(target) {
 | |
| 				target = filepath.Join(workdir, target)
 | |
| 			}
 | |
| 		case "required":
 | |
| 			required = true
 | |
| 			if len(kv) > 1 {
 | |
| 				required, err = strconv.ParseBool(kv[1])
 | |
| 				if err != nil {
 | |
| 					return nil, "", errInvalidSyntax
 | |
| 				}
 | |
| 			}
 | |
| 		case "mode":
 | |
| 			mode64, err := strconv.ParseUint(kv[1], 8, 32)
 | |
| 			if err != nil {
 | |
| 				return nil, "", errInvalidSyntax
 | |
| 			}
 | |
| 			mode = uint32(mode64)
 | |
| 		case "uid":
 | |
| 			uid64, err := strconv.ParseUint(kv[1], 10, 32)
 | |
| 			if err != nil {
 | |
| 				return nil, "", errInvalidSyntax
 | |
| 			}
 | |
| 			uid = uint32(uid64)
 | |
| 		case "gid":
 | |
| 			gid64, err := strconv.ParseUint(kv[1], 10, 32)
 | |
| 			if err != nil {
 | |
| 				return nil, "", errInvalidSyntax
 | |
| 			}
 | |
| 			gid = uint32(gid64)
 | |
| 		default:
 | |
| 			return nil, "", errInvalidSyntax
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	if id == "" {
 | |
| 		return nil, "", errInvalidSyntax
 | |
| 	}
 | |
| 	// Default location for secrets is /run/secrets/id
 | |
| 	if target == "" {
 | |
| 		target = "/run/secrets/" + id
 | |
| 	}
 | |
| 
 | |
| 	secr, ok := secrets[id]
 | |
| 	if !ok {
 | |
| 		if required {
 | |
| 			return nil, "", fmt.Errorf("secret required but no secret with id %q found", id)
 | |
| 		}
 | |
| 		return nil, "", nil
 | |
| 	}
 | |
| 	var data []byte
 | |
| 	var envFile string
 | |
| 	var ctrFileOnHost string
 | |
| 
 | |
| 	switch secr.SourceType {
 | |
| 	case "env":
 | |
| 		data = []byte(os.Getenv(secr.Source))
 | |
| 		tmpFile, err := os.CreateTemp(tmpdir.GetTempDir(), "buildah*")
 | |
| 		if err != nil {
 | |
| 			return nil, "", err
 | |
| 		}
 | |
| 		defer func() {
 | |
| 			if retErr != nil {
 | |
| 				os.Remove(tmpFile.Name())
 | |
| 			}
 | |
| 		}()
 | |
| 		envFile = tmpFile.Name()
 | |
| 		ctrFileOnHost = tmpFile.Name()
 | |
| 	case "file":
 | |
| 		containerWorkingDir, err := b.store.ContainerDirectory(b.ContainerID)
 | |
| 		if err != nil {
 | |
| 			return nil, "", err
 | |
| 		}
 | |
| 		data, err = os.ReadFile(secr.Source)
 | |
| 		if err != nil {
 | |
| 			return nil, "", err
 | |
| 		}
 | |
| 		ctrFileOnHost = filepath.Join(containerWorkingDir, "secrets", digest.FromString(id).Encoded()[:16])
 | |
| 	default:
 | |
| 		return nil, "", errors.New("invalid source secret type")
 | |
| 	}
 | |
| 
 | |
| 	// Copy secrets to container working dir (or tmp dir if it's an env), since we need to chmod,
 | |
| 	// chown and relabel it for the container user and we don't want to mess with the original file
 | |
| 	if err := os.MkdirAll(filepath.Dir(ctrFileOnHost), 0o755); err != nil {
 | |
| 		return nil, "", err
 | |
| 	}
 | |
| 	if err := os.WriteFile(ctrFileOnHost, data, 0o644); err != nil {
 | |
| 		return nil, "", err
 | |
| 	}
 | |
| 
 | |
| 	if err := relabel(ctrFileOnHost, b.MountLabel, false); err != nil {
 | |
| 		return nil, "", err
 | |
| 	}
 | |
| 	hostUID, hostGID, err := util.GetHostIDs(idMaps.uidmap, idMaps.gidmap, uid, gid)
 | |
| 	if err != nil {
 | |
| 		return nil, "", err
 | |
| 	}
 | |
| 	if err := os.Lchown(ctrFileOnHost, int(hostUID), int(hostGID)); err != nil {
 | |
| 		return nil, "", err
 | |
| 	}
 | |
| 	if err := os.Chmod(ctrFileOnHost, os.FileMode(mode)); err != nil {
 | |
| 		return nil, "", err
 | |
| 	}
 | |
| 	newMount := specs.Mount{
 | |
| 		Destination: target,
 | |
| 		Type:        define.TypeBind,
 | |
| 		Source:      ctrFileOnHost,
 | |
| 		Options:     append(define.BindOptions, "rprivate", "ro"),
 | |
| 	}
 | |
| 	return &newMount, envFile, nil
 | |
| }
 | |
| 
 | |
| // getSSHMount parses the --mount type=ssh flag in the Containerfile, checks if there's an ssh source provided, and creates and starts an ssh-agent to be forwarded into the container
 | |
| func (b *Builder) getSSHMount(tokens []string, count int, sshsources map[string]*sshagent.Source, idMaps IDMaps) (*specs.Mount, *sshagent.AgentServer, error) {
 | |
| 	errInvalidSyntax := errors.New("ssh should have syntax id=id[,target=path,required=bool,mode=uint,uid=uint,gid=uint")
 | |
| 
 | |
| 	var err error
 | |
| 	var id, target string
 | |
| 	var required bool
 | |
| 	var uid, gid uint32
 | |
| 	var mode uint32 = 0o600
 | |
| 	for _, val := range tokens {
 | |
| 		kv := strings.SplitN(val, "=", 2)
 | |
| 		if len(kv) < 2 {
 | |
| 			return nil, nil, errInvalidSyntax
 | |
| 		}
 | |
| 		switch kv[0] {
 | |
| 		case "type":
 | |
| 			// This is already processed
 | |
| 			continue
 | |
| 		case "id":
 | |
| 			id = kv[1]
 | |
| 		case "target", "dst", "destination":
 | |
| 			target = kv[1]
 | |
| 		case "required":
 | |
| 			required, err = strconv.ParseBool(kv[1])
 | |
| 			if err != nil {
 | |
| 				return nil, nil, errInvalidSyntax
 | |
| 			}
 | |
| 		case "mode":
 | |
| 			mode64, err := strconv.ParseUint(kv[1], 8, 32)
 | |
| 			if err != nil {
 | |
| 				return nil, nil, errInvalidSyntax
 | |
| 			}
 | |
| 			mode = uint32(mode64)
 | |
| 		case "uid":
 | |
| 			uid64, err := strconv.ParseUint(kv[1], 10, 32)
 | |
| 			if err != nil {
 | |
| 				return nil, nil, errInvalidSyntax
 | |
| 			}
 | |
| 			uid = uint32(uid64)
 | |
| 		case "gid":
 | |
| 			gid64, err := strconv.ParseUint(kv[1], 10, 32)
 | |
| 			if err != nil {
 | |
| 				return nil, nil, errInvalidSyntax
 | |
| 			}
 | |
| 			gid = uint32(gid64)
 | |
| 		default:
 | |
| 			return nil, nil, errInvalidSyntax
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	if id == "" {
 | |
| 		id = "default"
 | |
| 	}
 | |
| 	// Default location for secrets is /run/buildkit/ssh_agent.{i}
 | |
| 	if target == "" {
 | |
| 		target = fmt.Sprintf("/run/buildkit/ssh_agent.%d", count)
 | |
| 	}
 | |
| 
 | |
| 	sshsource, ok := sshsources[id]
 | |
| 	if !ok {
 | |
| 		if required {
 | |
| 			return nil, nil, fmt.Errorf("ssh required but no ssh with id %s found", id)
 | |
| 		}
 | |
| 		return nil, nil, nil
 | |
| 	}
 | |
| 	// Create new agent from keys or socket
 | |
| 	fwdAgent, err := sshagent.NewAgentServer(sshsource)
 | |
| 	if err != nil {
 | |
| 		return nil, nil, err
 | |
| 	}
 | |
| 	// Start ssh server, and get the host sock we're mounting in the container
 | |
| 	hostSock, err := fwdAgent.Serve(b.ProcessLabel)
 | |
| 	if err != nil {
 | |
| 		return nil, nil, err
 | |
| 	}
 | |
| 
 | |
| 	if err := relabel(filepath.Dir(hostSock), b.MountLabel, false); err != nil {
 | |
| 		if shutdownErr := fwdAgent.Shutdown(); shutdownErr != nil {
 | |
| 			b.Logger.Errorf("error shutting down agent: %v", shutdownErr)
 | |
| 		}
 | |
| 		return nil, nil, err
 | |
| 	}
 | |
| 	if err := relabel(hostSock, b.MountLabel, false); err != nil {
 | |
| 		if shutdownErr := fwdAgent.Shutdown(); shutdownErr != nil {
 | |
| 			b.Logger.Errorf("error shutting down agent: %v", shutdownErr)
 | |
| 		}
 | |
| 		return nil, nil, err
 | |
| 	}
 | |
| 	hostUID, hostGID, err := util.GetHostIDs(idMaps.uidmap, idMaps.gidmap, uid, gid)
 | |
| 	if err != nil {
 | |
| 		if shutdownErr := fwdAgent.Shutdown(); shutdownErr != nil {
 | |
| 			b.Logger.Errorf("error shutting down agent: %v", shutdownErr)
 | |
| 		}
 | |
| 		return nil, nil, err
 | |
| 	}
 | |
| 	if err := os.Lchown(hostSock, int(hostUID), int(hostGID)); err != nil {
 | |
| 		if shutdownErr := fwdAgent.Shutdown(); shutdownErr != nil {
 | |
| 			b.Logger.Errorf("error shutting down agent: %v", shutdownErr)
 | |
| 		}
 | |
| 		return nil, nil, err
 | |
| 	}
 | |
| 	if err := os.Chmod(hostSock, os.FileMode(mode)); err != nil {
 | |
| 		if shutdownErr := fwdAgent.Shutdown(); shutdownErr != nil {
 | |
| 			b.Logger.Errorf("error shutting down agent: %v", shutdownErr)
 | |
| 		}
 | |
| 		return nil, nil, err
 | |
| 	}
 | |
| 	newMount := specs.Mount{
 | |
| 		Destination: target,
 | |
| 		Type:        define.TypeBind,
 | |
| 		Source:      hostSock,
 | |
| 		Options:     append(define.BindOptions, "rprivate", "ro"),
 | |
| 	}
 | |
| 	return &newMount, fwdAgent, nil
 | |
| }
 | |
| 
 | |
| // cleanupRunMounts cleans up run mounts so they only appear in this run.
 | |
| func (b *Builder) cleanupRunMounts(artifacts *runMountArtifacts) error {
 | |
| 	for _, agent := range artifacts.Agents {
 | |
| 		servePath := agent.ServePath()
 | |
| 		if err := agent.Shutdown(); err != nil {
 | |
| 			return fmt.Errorf("shutting down SSH agent at %q: %v", servePath, err)
 | |
| 		}
 | |
| 	}
 | |
| 	// clean up any overlays we mounted
 | |
| 	for _, overlayDirectory := range artifacts.RunOverlayDirs {
 | |
| 		if err := overlay.RemoveTemp(overlayDirectory); err != nil {
 | |
| 			return err
 | |
| 		}
 | |
| 	}
 | |
| 	// unmount anything that needs unmounting
 | |
| 	for _, intermediateMount := range artifacts.IntermediateMounts {
 | |
| 		if err := mount.Unmount(intermediateMount); err != nil && !errors.Is(err, os.ErrNotExist) {
 | |
| 			return fmt.Errorf("unmounting %q: %w", intermediateMount, err)
 | |
| 		}
 | |
| 		if err := os.Remove(intermediateMount); err != nil && !errors.Is(err, os.ErrNotExist) {
 | |
| 			return fmt.Errorf("removing should-be-empty directory %q: %w", intermediateMount, err)
 | |
| 		}
 | |
| 	}
 | |
| 	// unmount any images we mounted for this run
 | |
| 	for _, image := range artifacts.MountedImages {
 | |
| 		if _, err := b.store.UnmountImage(image, false); err != nil {
 | |
| 			logrus.Debugf("umounting image %q: %v", image, err)
 | |
| 		}
 | |
| 	}
 | |
| 	// unlock locks we took, most likely for cache mounts
 | |
| 	volumes.UnlockLockArray(artifacts.TargetLocks)
 | |
| 	return nil
 | |
| }
 | |
| 
 | |
| // setPdeathsig sets a parent-death signal for the process
 | |
| // the goroutine that starts the child process should lock itself to
 | |
| // a native thread using runtime.LockOSThread() until the child exits
 | |
| func setPdeathsig(cmd *exec.Cmd) {
 | |
| 	if cmd.SysProcAttr == nil {
 | |
| 		cmd.SysProcAttr = &syscall.SysProcAttr{}
 | |
| 	}
 | |
| 	cmd.SysProcAttr.Pdeathsig = syscall.SIGKILL
 | |
| }
 | |
| 
 | |
| func relabel(path, mountLabel string, shared bool) error {
 | |
| 	if err := label.Relabel(path, mountLabel, shared); err != nil {
 | |
| 		if !errors.Is(err, syscall.ENOTSUP) {
 | |
| 			return err
 | |
| 		}
 | |
| 		logrus.Debugf("Labeling not supported on %q", path)
 | |
| 	}
 | |
| 	return nil
 | |
| }
 | |
| 
 | |
| // mapContainerNameToHostname returns the passed-in string with characters that
 | |
| // don't match validHostnames (defined above) stripped out.
 | |
| func mapContainerNameToHostname(containerName string) string {
 | |
| 	match := validHostnames.FindStringIndex(containerName)
 | |
| 	if match == nil {
 | |
| 		return ""
 | |
| 	}
 | |
| 	trimmed := containerName[match[0]:]
 | |
| 	match[1] -= match[0]
 | |
| 	match[0] = 0
 | |
| 	for match[1] != len(trimmed) && match[1] < match[0]+maxHostnameLen {
 | |
| 		trimmed = trimmed[:match[1]] + trimmed[match[1]+1:]
 | |
| 		match = validHostnames.FindStringIndex(trimmed)
 | |
| 		match[1] = min(match[1], maxHostnameLen)
 | |
| 	}
 | |
| 	return trimmed[:match[1]]
 | |
| }
 | |
| 
 | |
| // createMountTargets creates empty files or directories that are used as
 | |
| // targets for mounts in the spec, and makes a note of what it created.
 | |
| func (b *Builder) createMountTargets(spec *specs.Spec) ([]copier.ConditionalRemovePath, error) {
 | |
| 	// Avoid anything weird happening, just in case.
 | |
| 	if spec == nil || spec.Root == nil {
 | |
| 		return nil, nil
 | |
| 	}
 | |
| 	rootfsPath := spec.Root.Path
 | |
| 	then := time.Unix(0, 0)
 | |
| 	exemptFromTimesPreservation := map[string]struct{}{
 | |
| 		"dev":  {},
 | |
| 		"proc": {},
 | |
| 		"sys":  {},
 | |
| 	}
 | |
| 	exemptFromRemoval := map[string]struct{}{
 | |
| 		"dev":  {},
 | |
| 		"proc": {},
 | |
| 		"sys":  {},
 | |
| 	}
 | |
| 	overridePermissions := map[string]os.FileMode{
 | |
| 		"dev":  0o755,
 | |
| 		"proc": 0o755,
 | |
| 		"sys":  0o755,
 | |
| 	}
 | |
| 	uidmap, gidmap := convertRuntimeIDMaps(b.IDMappingOptions.UIDMap, b.IDMappingOptions.GIDMap)
 | |
| 	targets := copier.EnsureOptions{
 | |
| 		UIDMap: uidmap,
 | |
| 		GIDMap: gidmap,
 | |
| 	}
 | |
| 	for _, mnt := range spec.Mounts {
 | |
| 		typeFlag := byte(tar.TypeDir)
 | |
| 		// If the mount is a "bind" or "rbind" mount, then it's a bind
 | |
| 		// mount, which means the target _could_ be a non-directory.
 | |
| 		// Check the source and make a note.
 | |
| 		if mnt.Type == define.TypeBind || slices.Contains(mnt.Options, "bind") || slices.Contains(mnt.Options, "rbind") {
 | |
| 			if st, err := os.Stat(mnt.Source); err == nil {
 | |
| 				if !st.IsDir() {
 | |
| 					typeFlag = tar.TypeReg
 | |
| 				}
 | |
| 			}
 | |
| 		}
 | |
| 		// Walk the path components from the root all the way down to
 | |
| 		// the target mountpoint and build a list of pathnames that we
 | |
| 		// need to ensure exist.  If we might need to remove them, give
 | |
| 		// them a conspicuous mtime, so that we can detect if they were
 | |
| 		// unmounted and then modified, in which case we'll want to
 | |
| 		// preserve those changes.
 | |
| 		destination := mnt.Destination
 | |
| 		for destination != "" {
 | |
| 			cleanedDestination := strings.Trim(path.Clean(filepath.ToSlash(destination)), "/")
 | |
| 			modTime := &then
 | |
| 			if _, ok := exemptFromTimesPreservation[cleanedDestination]; ok {
 | |
| 				// don't force a timestamp for this path
 | |
| 				modTime = nil
 | |
| 			}
 | |
| 			var mode *os.FileMode
 | |
| 			if _, ok := exemptFromRemoval[cleanedDestination]; ok {
 | |
| 				// we're not going to filter this out later,
 | |
| 				// so don't make it look weird
 | |
| 				perms := os.FileMode(0o755)
 | |
| 				if typeFlag == tar.TypeReg {
 | |
| 					perms = 0o644
 | |
| 				}
 | |
| 				mode = &perms
 | |
| 				modTime = nil
 | |
| 			}
 | |
| 			if perms, ok := overridePermissions[cleanedDestination]; ok {
 | |
| 				// forced permissions
 | |
| 				mode = &perms
 | |
| 			}
 | |
| 			targets.Paths = append(targets.Paths, copier.EnsurePath{
 | |
| 				Path:     destination,
 | |
| 				Typeflag: typeFlag,
 | |
| 				ModTime:  modTime,
 | |
| 				Chmod:    mode,
 | |
| 			})
 | |
| 			typeFlag = tar.TypeDir
 | |
| 			dir, _ := filepath.Split(destination)
 | |
| 			if destination == dir {
 | |
| 				break
 | |
| 			}
 | |
| 			destination = dir
 | |
| 		}
 | |
| 	}
 | |
| 	if len(targets.Paths) == 0 {
 | |
| 		return nil, nil
 | |
| 	}
 | |
| 	created, _, err := copier.Ensure(rootfsPath, rootfsPath, targets)
 | |
| 	if err != nil {
 | |
| 		return nil, err
 | |
| 	}
 | |
| 	logrus.Debugf("created mount targets at %v", created)
 | |
| 	var remove []copier.ConditionalRemovePath
 | |
| 	for _, target := range created {
 | |
| 		cleanedTarget := strings.Trim(path.Clean(filepath.ToSlash(target)), "/")
 | |
| 		if _, ok := exemptFromRemoval[cleanedTarget]; ok {
 | |
| 			continue
 | |
| 		}
 | |
| 		modTime := &then
 | |
| 		if _, ok := exemptFromTimesPreservation[cleanedTarget]; ok {
 | |
| 			modTime = nil
 | |
| 		}
 | |
| 		condition := copier.ConditionalRemovePath{
 | |
| 			Path:    cleanedTarget,
 | |
| 			ModTime: modTime,
 | |
| 			Owner:   &idtools.IDPair{UID: 0, GID: 0},
 | |
| 		}
 | |
| 		remove = append(remove, condition)
 | |
| 	}
 | |
| 	if len(remove) == 0 {
 | |
| 		return nil, nil
 | |
| 	}
 | |
| 	// encode the set of paths we might need to filter out at commit-time
 | |
| 	// in a way that hopefully doesn't break long-running concurrent Run()
 | |
| 	// calls, that lets us also not have to manage any locking for them
 | |
| 	cdir, err := b.store.ContainerDirectory(b.Container)
 | |
| 	if err != nil {
 | |
| 		return nil, fmt.Errorf("finding working container bookkeeping directory: %w", err)
 | |
| 	}
 | |
| 	if err := os.Mkdir(filepath.Join(cdir, containerExcludesDir), 0o700); err != nil && !errors.Is(err, os.ErrExist) {
 | |
| 		return nil, fmt.Errorf("creating exclusions directory: %w", err)
 | |
| 	}
 | |
| 	encoded, err := json.Marshal(remove)
 | |
| 	if err != nil {
 | |
| 		return nil, fmt.Errorf("encoding list of items to exclude at commit-time: %w", err)
 | |
| 	}
 | |
| 	f, err := os.CreateTemp(filepath.Join(cdir, containerExcludesDir), "filter*"+containerExcludesSubstring)
 | |
| 	if err != nil {
 | |
| 		return nil, fmt.Errorf("creating exclusions file: %w", err)
 | |
| 	}
 | |
| 	defer os.Remove(f.Name())
 | |
| 	defer f.Close()
 | |
| 	if err := ioutils.AtomicWriteFile(strings.TrimSuffix(f.Name(), containerExcludesSubstring), encoded, 0o600); err != nil {
 | |
| 		return nil, fmt.Errorf("writing exclusions file: %w", err)
 | |
| 	}
 | |
| 	// return that set of paths directly, in case the caller would prefer
 | |
| 	// to clear them out before commit-time
 | |
| 	return remove, nil
 | |
| }
 |