Bug#988802: unblock: runc/1.0.0~rc93+ds1-4
Package: release.debian.org
Severity: normal
User: release.debian.org@packages.debian.org
Usertags: unblock
X-Debbugs-Cc: zhsj@debian.org
Please unblock package runc
[ Reason ]
Fix CVE-2021-30465
https://github.com/opencontainers/runc/security/advisories/GHSA-c3xm-pvg7-gh7r
[ Impact ]
The package can migrate itself(have autopkgtest and not key package),
but I'd like to reduce the age.
[ Tests ]
I have done some basic tests. But I'm not sure how to trigger the security
issue that I can't verify if it's really fixed.
[ Risks ]
The patch provided by upstream can't be applied clearly to the version we have
in sid. So I look the changes and backport another two PR, which makes the diff
a bit large.
[ Checklist ]
[x] all changes are documented in the d/changelog
[x] I reviewed all changes and I approve them
[*] attach debdiff against the package in testing
Since only patches are added in debian/patches dir, I just attached the 3
new patches.
[ Other info ]
unblock runc/1.0.0~rc93+ds1-4
debian/changelog:
runc (1.0.0~rc93+ds1-4) unstable; urgency=high
.
* Team upload.
* Backport patches for CVE-2021-30465 (Closes: #988768)
To apply CVE-2021-30465 patch clearly, following PR are backported as
well:
+ https://github.com/opencontainers/runc/pull/2798
+ https://github.com/opencontainers/runc/pull/2818
$ cat debian/patches/00{11,12,13}*|filterdiff -x '*.bats' -x '*_test.go' |diffstat
b/libcontainer/container_linux.go | 10 +
b/libcontainer/init_linux.go | 1
b/libcontainer/rootfs_linux.go | 64 ++++++---
b/libcontainer/specconv/example.go | 18 +-
b/libcontainer/utils/utils.go | 54 +++++++
libcontainer/container_linux.go | 52 ++++++-
libcontainer/rootfs_linux.go | 251 ++++++++++++++++++-------------------
7 files changed, 283 insertions(+), 167 deletions(-)
$ cat debian/patches/00{11,12,13}*|filterdiff -x '*.bats' -x '*_test.go'
From: Kir Kolyshkin <kolyshkin@gmail.com>
Date: Tue, 23 Feb 2021 17:58:07 -0800
Subject: PR2818 Fix cgroup2 mount for rootless case
Backport this PR so we can apply the patch for CVE-2021-30465
1. libct/newInitConfig: nit
2. libct/rootfs: introduce and use mountConfig
3 .libct/rootfs/mountCgroupV2: minor refactor
4. Fix cgroup2 mount for rootless case
5. tests/int: use bfq test with rootless
6. tests/int: add a case for cgroupv2 mount
Origin: backport, https://github.com/opencontainers/runc/pull/2818
---
libcontainer/container_linux.go | 10 +++++--
libcontainer/init_linux.go | 1 +
libcontainer/rootfs_linux.go | 64 ++++++++++++++++++++++++++++------------
libcontainer/specconv/example.go | 18 +++++------
tests/integration/cgroups.bats | 34 +++++++++++++++++++++
5 files changed, 96 insertions(+), 31 deletions(-)
diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go
index 3dca29e..1cbc734 100644
--- a/libcontainer/container_linux.go
+++ b/libcontainer/container_linux.go
@@ -594,6 +594,9 @@ func (c *linuxContainer) newInitConfig(process *Process) *initConfig {
AppArmorProfile: c.config.AppArmorProfile,
ProcessLabel: c.config.ProcessLabel,
Rlimits: c.config.Rlimits,
+ CreateConsole: process.ConsoleSocket != nil,
+ ConsoleWidth: process.ConsoleWidth,
+ ConsoleHeight: process.ConsoleHeight,
}
if process.NoNewPrivileges != nil {
cfg.NoNewPrivileges = *process.NoNewPrivileges
@@ -607,9 +610,10 @@ func (c *linuxContainer) newInitConfig(process *Process) *initConfig {
if len(process.Rlimits) > 0 {
cfg.Rlimits = process.Rlimits
}
- cfg.CreateConsole = process.ConsoleSocket != nil
- cfg.ConsoleWidth = process.ConsoleWidth
- cfg.ConsoleHeight = process.ConsoleHeight
+ if cgroups.IsCgroup2UnifiedMode() {
+ cfg.Cgroup2Path = c.cgroupManager.Path("")
+ }
+
return cfg
}
diff --git a/libcontainer/init_linux.go b/libcontainer/init_linux.go
index c57af0e..6817970 100644
--- a/libcontainer/init_linux.go
+++ b/libcontainer/init_linux.go
@@ -70,6 +70,7 @@ type initConfig struct {
RootlessEUID bool `json:"rootless_euid,omitempty"`
RootlessCgroups bool `json:"rootless_cgroups,omitempty"`
SpecState *specs.State `json:"spec_state,omitempty"`
+ Cgroup2Path string `json:"cgroup2_path,omitempty"`
}
type initer interface {
diff --git a/libcontainer/rootfs_linux.go b/libcontainer/rootfs_linux.go
index 411496a..5d2d74c 100644
--- a/libcontainer/rootfs_linux.go
+++ b/libcontainer/rootfs_linux.go
@@ -17,6 +17,7 @@ import (
"github.com/moby/sys/mountinfo"
"github.com/mrunalp/fileutils"
"github.com/opencontainers/runc/libcontainer/cgroups"
+ "github.com/opencontainers/runc/libcontainer/cgroups/fs2"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/devices"
"github.com/opencontainers/runc/libcontainer/system"
@@ -29,6 +30,14 @@ import (
const defaultMountFlags = unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV
+type mountConfig struct {
+ root string
+ label string
+ cgroup2Path string
+ rootlessCgroups bool
+ cgroupns bool
+}
+
// needsSetupDev returns true if /dev needs to be set up.
func needsSetupDev(config *configs.Config) bool {
for _, m := range config.Mounts {
@@ -48,7 +57,13 @@ func prepareRootfs(pipe io.ReadWriter, iConfig *initConfig) (err error) {
return newSystemErrorWithCause(err, "preparing rootfs")
}
- hasCgroupns := config.Namespaces.Contains(configs.NEWCGROUP)
+ mountConfig := &mountConfig{
+ root: config.Rootfs,
+ label: config.MountLabel,
+ cgroup2Path: iConfig.Cgroup2Path,
+ rootlessCgroups: iConfig.RootlessCgroups,
+ cgroupns: config.Namespaces.Contains(configs.NEWCGROUP),
+ }
setupDev := needsSetupDev(config)
for _, m := range config.Mounts {
for _, precmd := range m.PremountCmds {
@@ -56,7 +71,7 @@ func prepareRootfs(pipe io.ReadWriter, iConfig *initConfig) (err error) {
return newSystemErrorWithCause(err, "running premount command")
}
}
- if err := mountToRootfs(m, config.Rootfs, config.MountLabel, hasCgroupns); err != nil {
+ if err := mountToRootfs(m, mountConfig); err != nil {
return newSystemErrorWithCausef(err, "mounting %q to rootfs at %q", m.Source, m.Destination)
}
@@ -222,7 +237,7 @@ func prepareBindMount(m *configs.Mount, rootfs string) error {
return nil
}
-func mountCgroupV1(m *configs.Mount, rootfs, mountLabel string, enableCgroupns bool) error {
+func mountCgroupV1(m *configs.Mount, c *mountConfig) error {
binds, err := getCgroupMounts(m)
if err != nil {
return err
@@ -242,12 +257,12 @@ func mountCgroupV1(m *configs.Mount, rootfs, mountLabel string, enableCgroupns b
Data: "mode=755",
PropagationFlags: m.PropagationFlags,
}
- if err := mountToRootfs(tmpfs, rootfs, mountLabel, enableCgroupns); err != nil {
+ if err := mountToRootfs(tmpfs, c); err != nil {
return err
}
for _, b := range binds {
- if enableCgroupns {
- subsystemPath := filepath.Join(rootfs, b.Destination)
+ if c.cgroupns {
+ subsystemPath := filepath.Join(c.root, b.Destination)
if err := os.MkdirAll(subsystemPath, 0755); err != nil {
return err
}
@@ -266,7 +281,7 @@ func mountCgroupV1(m *configs.Mount, rootfs, mountLabel string, enableCgroupns b
return err
}
} else {
- if err := mountToRootfs(b, rootfs, mountLabel, enableCgroupns); err != nil {
+ if err := mountToRootfs(b, c); err != nil {
return err
}
}
@@ -276,7 +291,7 @@ func mountCgroupV1(m *configs.Mount, rootfs, mountLabel string, enableCgroupns b
// symlink(2) is very dumb, it will just shove the path into
// the link and doesn't do any checks or relative path
// conversion. Also, don't error out if the cgroup already exists.
- if err := os.Symlink(mc, filepath.Join(rootfs, m.Destination, ss)); err != nil && !os.IsExist(err) {
+ if err := os.Symlink(mc, filepath.Join(c.root, m.Destination, ss)); err != nil && !os.IsExist(err) {
return err
}
}
@@ -284,28 +299,39 @@ func mountCgroupV1(m *configs.Mount, rootfs, mountLabel string, enableCgroupns b
return nil
}
-func mountCgroupV2(m *configs.Mount, rootfs, mountLabel string, enableCgroupns bool) error {
- cgroupPath, err := securejoin.SecureJoin(rootfs, m.Destination)
+func mountCgroupV2(m *configs.Mount, c *mountConfig) error {
+ dest, err := securejoin.SecureJoin(c.root, m.Destination)
if err != nil {
return err
}
- if err := os.MkdirAll(cgroupPath, 0755); err != nil {
+ if err := os.MkdirAll(dest, 0755); err != nil {
return err
}
- if err := unix.Mount(m.Source, cgroupPath, "cgroup2", uintptr(m.Flags), m.Data); err != nil {
+ if err := unix.Mount(m.Source, dest, "cgroup2", uintptr(m.Flags), m.Data); err != nil {
// when we are in UserNS but CgroupNS is not unshared, we cannot mount cgroup2 (#2158)
if err == unix.EPERM || err == unix.EBUSY {
- return unix.Mount("/sys/fs/cgroup", cgroupPath, "", uintptr(m.Flags)|unix.MS_BIND, "")
+ src := fs2.UnifiedMountpoint
+ if c.cgroupns && c.cgroup2Path != "" {
+ // Emulate cgroupns by bind-mounting
+ // the container cgroup path rather than
+ // the whole /sys/fs/cgroup.
+ src = c.cgroup2Path
+ }
+ err = unix.Mount(src, dest, "", uintptr(m.Flags)|unix.MS_BIND, "")
+ if err == unix.ENOENT && c.rootlessCgroups {
+ err = nil
+ }
+ return err
}
return err
}
return nil
}
-func mountToRootfs(m *configs.Mount, rootfs, mountLabel string, enableCgroupns bool) error {
- var (
- dest = m.Destination
- )
+func mountToRootfs(m *configs.Mount, c *mountConfig) error {
+ rootfs := c.root
+ mountLabel := c.label
+ dest := m.Destination
if !strings.HasPrefix(dest, rootfs) {
dest = filepath.Join(rootfs, dest)
}
@@ -424,9 +450,9 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string, enableCgroupns b
}
case "cgroup":
if cgroups.IsCgroup2UnifiedMode() {
- return mountCgroupV2(m, rootfs, mountLabel, enableCgroupns)
+ return mountCgroupV2(m, c)
}
- return mountCgroupV1(m, rootfs, mountLabel, enableCgroupns)
+ return mountCgroupV1(m, c)
default:
// ensure that the destination of the mount is resolved of symlinks at mount time because
// any previous mounts can invalidate the next mount's destination.
diff --git a/libcontainer/specconv/example.go b/libcontainer/specconv/example.go
index 8a201bc..56bab3b 100644
--- a/libcontainer/specconv/example.go
+++ b/libcontainer/specconv/example.go
@@ -2,6 +2,7 @@ package specconv
import (
"os"
+ "path/filepath"
"strings"
"github.com/opencontainers/runc/libcontainer/cgroups"
@@ -200,8 +201,14 @@ func ToRootless(spec *specs.Spec) {
// Fix up mounts.
var mounts []specs.Mount
for _, mount := range spec.Mounts {
- // Ignore all mounts that are under /sys.
- if strings.HasPrefix(mount.Destination, "/sys") {
+ // Replace the /sys mount with an rbind.
+ if filepath.Clean(mount.Destination) == "/sys" {
+ mounts = append(mounts, specs.Mount{
+ Source: "/sys",
+ Destination: "/sys",
+ Type: "none",
+ Options: []string{"rbind", "nosuid", "noexec", "nodev", "ro"},
+ })
continue
}
@@ -216,13 +223,6 @@ func ToRootless(spec *specs.Spec) {
mount.Options = options
mounts = append(mounts, mount)
}
- // Add the sysfs mount as an rbind.
- mounts = append(mounts, specs.Mount{
- Source: "/sys",
- Destination: "/sys",
- Type: "none",
- Options: []string{"rbind", "nosuid", "noexec", "nodev", "ro"},
- })
spec.Mounts = mounts
// Remove cgroup settings.
From: Adrian Reber <areber@redhat.com>
Date: Mon, 8 Feb 2021 13:05:54 +0000
Subject: PR2798 Correctly restore containers with nested bind mounts
Backport this PR so we can apply the patch for CVE-2021-30465
1. Re-create mountpoints during restore
2. tests: test nested bind mount restore
Origin: backport, https://github.com/opencontainers/runc/pull/2798
---
libcontainer/container_linux.go | 27 +++++++++++++++++++++++++
tests/integration/checkpoint.bats | 42 +++++++++++++++++++++++++++++++++++++++
2 files changed, 69 insertions(+)
diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go
index 1cbc734..76a69af 100644
--- a/libcontainer/container_linux.go
+++ b/libcontainer/container_linux.go
@@ -1239,11 +1239,38 @@ func (c *linuxContainer) prepareCriuRestoreMounts(mounts []*configs.Mount) error
// Now go through all mounts and create the mountpoints
// if the mountpoints are not on a tmpfs, as CRIU will
// restore the complete tmpfs content from its checkpoint.
+ umounts := []string{}
+ defer func() {
+ for _, u := range umounts {
+ if e := unix.Unmount(u, unix.MNT_DETACH); e != nil {
+ if e != unix.EINVAL {
+ // Ignore EINVAL as it means 'target is not a mount point.'
+ // It probably has already been unmounted.
+ logrus.Warnf("Error during cleanup unmounting of %q (%v)", u, e)
+ }
+ }
+ }
+ }()
for _, m := range mounts {
if !isPathInPrefixList(m.Destination, tmpfs) {
if err := c.makeCriuRestoreMountpoints(m); err != nil {
return err
}
+ // If the mount point is a bind mount, we need to mount
+ // it now so that runc can create the necessary mount
+ // points for mounts in bind mounts.
+ // This also happens during initial container creation.
+ // Without this CRIU restore will fail
+ // See: https://github.com/opencontainers/runc/issues/2748
+ // It is also not necessary to order the mount points
+ // because during initial container creation mounts are
+ // set up in the order they are configured.
+ if m.Device == "bind" {
+ if err := unix.Mount(m.Source, m.Destination, "", unix.MS_BIND|unix.MS_REC, ""); err != nil {
+ return errorsf.Wrapf(err, "unable to bind mount %q to %q", m.Source, m.Destination)
+ }
+ umounts = append(umounts, m.Destination)
+ }
}
}
return nil
From: Aleksa Sarai <cyphar@cyphar.com>
Date: Thu, 1 Apr 2021 12:00:31 -0700
Subject: CVE-2021-30465
Origin: backport, https://github.com/opencontainers/runc/commit/0ca91f44f1664da834bc61115a849b56d22f595f
---
libcontainer/container_linux.go | 25 ++--
libcontainer/rootfs_linux.go | 251 +++++++++++++++++++--------------------
libcontainer/utils/utils.go | 54 +++++++++
libcontainer/utils/utils_test.go | 35 ++++++
4 files changed, 229 insertions(+), 136 deletions(-)
diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go
index 76a69af..309b02a 100644
--- a/libcontainer/container_linux.go
+++ b/libcontainer/container_linux.go
@@ -1202,7 +1202,6 @@ func (c *linuxContainer) makeCriuRestoreMountpoints(m *configs.Mount) error {
if err := checkProcMount(c.config.Rootfs, dest, ""); err != nil {
return err
}
- m.Destination = dest
if err := os.MkdirAll(dest, 0755); err != nil {
return err
}
@@ -1242,13 +1241,16 @@ func (c *linuxContainer) prepareCriuRestoreMounts(mounts []*configs.Mount) error
umounts := []string{}
defer func() {
for _, u := range umounts {
- if e := unix.Unmount(u, unix.MNT_DETACH); e != nil {
- if e != unix.EINVAL {
- // Ignore EINVAL as it means 'target is not a mount point.'
- // It probably has already been unmounted.
- logrus.Warnf("Error during cleanup unmounting of %q (%v)", u, e)
+ _ = utils.WithProcfd(c.config.Rootfs, u, func(procfd string) error {
+ if e := unix.Unmount(procfd, unix.MNT_DETACH); e != nil {
+ if e != unix.EINVAL {
+ // Ignore EINVAL as it means 'target is not a mount point.'
+ // It probably has already been unmounted.
+ logrus.Warnf("Error during cleanup unmounting of %s (%s): %v", procfd, u, e)
+ }
}
- }
+ return nil
+ })
}
}()
for _, m := range mounts {
@@ -1266,8 +1268,13 @@ func (c *linuxContainer) prepareCriuRestoreMounts(mounts []*configs.Mount) error
// because during initial container creation mounts are
// set up in the order they are configured.
if m.Device == "bind" {
- if err := unix.Mount(m.Source, m.Destination, "", unix.MS_BIND|unix.MS_REC, ""); err != nil {
- return errorsf.Wrapf(err, "unable to bind mount %q to %q", m.Source, m.Destination)
+ if err := utils.WithProcfd(c.config.Rootfs, m.Destination, func(procfd string) error {
+ if err := unix.Mount(m.Source, procfd, "", unix.MS_BIND|unix.MS_REC, ""); err != nil {
+ return errorsf.Wrapf(err, "unable to bind mount %q to %q (through %q)", m.Source, m.Destination, procfd)
+ }
+ return nil
+ }); err != nil {
+ return err
}
umounts = append(umounts, m.Destination)
}
diff --git a/libcontainer/rootfs_linux.go b/libcontainer/rootfs_linux.go
index 5d2d74c..96be669 100644
--- a/libcontainer/rootfs_linux.go
+++ b/libcontainer/rootfs_linux.go
@@ -25,6 +25,7 @@ import (
libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/opencontainers/selinux/go-selinux/label"
+ "github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
)
@@ -228,8 +229,6 @@ func prepareBindMount(m *configs.Mount, rootfs string) error {
if err := checkProcMount(rootfs, dest, m.Source); err != nil {
return err
}
- // update the mount with the correct dest after symlinks are resolved.
- m.Destination = dest
if err := createIfNotExists(dest, stat.IsDir()); err != nil {
return err
}
@@ -266,18 +265,21 @@ func mountCgroupV1(m *configs.Mount, c *mountConfig) error {
if err := os.MkdirAll(subsystemPath, 0755); err != nil {
return err
}
- flags := defaultMountFlags
- if m.Flags&unix.MS_RDONLY != 0 {
- flags = flags | unix.MS_RDONLY
- }
- cgroupmount := &configs.Mount{
- Source: "cgroup",
- Device: "cgroup", // this is actually fstype
- Destination: subsystemPath,
- Flags: flags,
- Data: filepath.Base(subsystemPath),
- }
- if err := mountNewCgroup(cgroupmount); err != nil {
+ if err := utils.WithProcfd(c.root, b.Destination, func(procfd string) error {
+ flags := defaultMountFlags
+ if m.Flags&unix.MS_RDONLY != 0 {
+ flags = flags | unix.MS_RDONLY
+ }
+ var (
+ source = "cgroup"
+ data = filepath.Base(subsystemPath)
+ )
+ if data == "systemd" {
+ data = cgroups.CgroupNamePrefix + data
+ source = "systemd"
+ }
+ return unix.Mount(source, procfd, "cgroup", uintptr(flags), data)
+ }); err != nil {
return err
}
} else {
@@ -307,33 +309,79 @@ func mountCgroupV2(m *configs.Mount, c *mountConfig) error {
if err := os.MkdirAll(dest, 0755); err != nil {
return err
}
- if err := unix.Mount(m.Source, dest, "cgroup2", uintptr(m.Flags), m.Data); err != nil {
- // when we are in UserNS but CgroupNS is not unshared, we cannot mount cgroup2 (#2158)
- if err == unix.EPERM || err == unix.EBUSY {
- src := fs2.UnifiedMountpoint
- if c.cgroupns && c.cgroup2Path != "" {
- // Emulate cgroupns by bind-mounting
- // the container cgroup path rather than
- // the whole /sys/fs/cgroup.
- src = c.cgroup2Path
- }
- err = unix.Mount(src, dest, "", uintptr(m.Flags)|unix.MS_BIND, "")
- if err == unix.ENOENT && c.rootlessCgroups {
- err = nil
+ return utils.WithProcfd(c.root, m.Destination, func(procfd string) error {
+ if err := unix.Mount(m.Source, procfd, "cgroup2", uintptr(m.Flags), m.Data); err != nil {
+ // when we are in UserNS but CgroupNS is not unshared, we cannot mount cgroup2 (#2158)
+ if err == unix.EPERM || err == unix.EBUSY {
+ src := fs2.UnifiedMountpoint
+ if c.cgroupns && c.cgroup2Path != "" {
+ // Emulate cgroupns by bind-mounting
+ // the container cgroup path rather than
+ // the whole /sys/fs/cgroup.
+ src = c.cgroup2Path
+ }
+ err = unix.Mount(src, procfd, "", uintptr(m.Flags)|unix.MS_BIND, "")
+ if err == unix.ENOENT && c.rootlessCgroups {
+ err = nil
+ }
}
return err
}
+ return nil
+ })
+}
+
+func doTmpfsCopyUp(m *configs.Mount, rootfs, mountLabel string) (Err error) {
+ // Set up a scratch dir for the tmpfs on the host.
+ tmpdir, err := prepareTmp("/tmp")
+ if err != nil {
+ return newSystemErrorWithCause(err, "tmpcopyup: failed to setup tmpdir")
+ }
+ defer cleanupTmp(tmpdir)
+ tmpDir, err := ioutil.TempDir(tmpdir, "runctmpdir")
+ if err != nil {
+ return newSystemErrorWithCause(err, "tmpcopyup: failed to create tmpdir")
+ }
+ defer os.RemoveAll(tmpDir)
+
+ // Configure the *host* tmpdir as if it's the container mount. We change
+ // m.Destination since we are going to mount *on the host*.
+ oldDest := m.Destination
+ m.Destination = tmpDir
+ err = mountPropagate(m, "/", mountLabel)
+ m.Destination = oldDest
+ if err != nil {
return err
}
- return nil
+ defer func() {
+ if Err != nil {
+ if err := unix.Unmount(tmpDir, unix.MNT_DETACH); err != nil {
+ logrus.Warnf("tmpcopyup: failed to unmount tmpdir on error: %v", err)
+ }
+ }
+ }()
+
+ return utils.WithProcfd(rootfs, m.Destination, func(procfd string) (Err error) {
+ // Copy the container data to the host tmpdir. We append "/" to force
+ // CopyDirectory to resolve the symlink rather than trying to copy the
+ // symlink itself.
+ if err := fileutils.CopyDirectory(procfd+"/", tmpDir); err != nil {
+ return fmt.Errorf("tmpcopyup: failed to copy %s to %s (%s): %w", m.Destination, procfd, tmpDir, err)
+ }
+ // Now move the mount into the container.
+ if err := unix.Mount(tmpDir, procfd, "", unix.MS_MOVE, ""); err != nil {
+ return fmt.Errorf("tmpcopyup: failed to move mount %s to %s (%s): %w", tmpDir, procfd, m.Destination, err)
+ }
+ return nil
+ })
}
func mountToRootfs(m *configs.Mount, c *mountConfig) error {
rootfs := c.root
mountLabel := c.label
- dest := m.Destination
- if !strings.HasPrefix(dest, rootfs) {
- dest = filepath.Join(rootfs, dest)
+ dest, err := securejoin.SecureJoin(rootfs, m.Destination)
+ if err != nil {
+ return err
}
switch m.Device {
@@ -364,53 +412,21 @@ func mountToRootfs(m *configs.Mount, c *mountConfig) error {
}
return label.SetFileLabel(dest, mountLabel)
case "tmpfs":
- copyUp := m.Extensions&configs.EXT_COPYUP == configs.EXT_COPYUP
- tmpDir := ""
- // dest might be an absolute symlink, so it needs
- // to be resolved under rootfs.
- dest, err := securejoin.SecureJoin(rootfs, m.Destination)
- if err != nil {
- return err
- }
- m.Destination = dest
stat, err := os.Stat(dest)
if err != nil {
if err := os.MkdirAll(dest, 0755); err != nil {
return err
}
}
- if copyUp {
- tmpdir, err := prepareTmp("/tmp")
- if err != nil {
- return newSystemErrorWithCause(err, "tmpcopyup: failed to setup tmpdir")
- }
- defer cleanupTmp(tmpdir)
- tmpDir, err = ioutil.TempDir(tmpdir, "runctmpdir")
- if err != nil {
- return newSystemErrorWithCause(err, "tmpcopyup: failed to create tmpdir")
- }
- defer os.RemoveAll(tmpDir)
- m.Destination = tmpDir
+
+ if m.Extensions&configs.EXT_COPYUP == configs.EXT_COPYUP {
+ err = doTmpfsCopyUp(m, rootfs, mountLabel)
+ } else {
+ err = mountPropagate(m, rootfs, mountLabel)
}
- if err := mountPropagate(m, rootfs, mountLabel); err != nil {
+ if err != nil {
return err
}
- if copyUp {
- if err := fileutils.CopyDirectory(dest, tmpDir); err != nil {
- errMsg := fmt.Errorf("tmpcopyup: failed to copy %s to %s: %v", dest, tmpDir, err)
- if err1 := unix.Unmount(tmpDir, unix.MNT_DETACH); err1 != nil {
- return newSystemErrorWithCausef(err1, "tmpcopyup: %v: failed to unmount", errMsg)
- }
- return errMsg
- }
- if err := unix.Mount(tmpDir, dest, "", unix.MS_MOVE, ""); err != nil {
- errMsg := fmt.Errorf("tmpcopyup: failed to move mount %s to %s: %v", tmpDir, dest, err)
- if err1 := unix.Unmount(tmpDir, unix.MNT_DETACH); err1 != nil {
- return newSystemErrorWithCausef(err1, "tmpcopyup: %v: failed to unmount", errMsg)
- }
- return errMsg
- }
- }
if stat != nil {
if err = os.Chmod(dest, stat.Mode()); err != nil {
return err
@@ -454,19 +470,9 @@ func mountToRootfs(m *configs.Mount, c *mountConfig) error {
}
return mountCgroupV1(m, c)
default:
- // ensure that the destination of the mount is resolved of symlinks at mount time because
- // any previous mounts can invalidate the next mount's destination.
- // this can happen when a user specifies mounts within other mounts to cause breakouts or other
- // evil stuff to try to escape the container's rootfs.
- var err error
- if dest, err = securejoin.SecureJoin(rootfs, m.Destination); err != nil {
- return err
- }
if err := checkProcMount(rootfs, dest, m.Source); err != nil {
return err
}
- // update the mount with the correct dest after symlinks are resolved.
- m.Destination = dest
if err := os.MkdirAll(dest, 0755); err != nil {
return err
}
@@ -649,7 +655,7 @@ func createDevices(config *configs.Config) error {
return nil
}
-func bindMountDeviceNode(dest string, node *devices.Device) error {
+func bindMountDeviceNode(rootfs, dest string, node *devices.Device) error {
f, err := os.Create(dest)
if err != nil && !os.IsExist(err) {
return err
@@ -657,7 +663,9 @@ func bindMountDeviceNode(dest string, node *devices.Device) error {
if f != nil {
f.Close()
}
- return unix.Mount(node.Path, dest, "bind", unix.MS_BIND, "")
+ return utils.WithProcfd(rootfs, dest, func(procfd string) error {
+ return unix.Mount(node.Path, procfd, "bind", unix.MS_BIND, "")
+ })
}
// Creates the device node in the rootfs of the container.
@@ -666,18 +674,21 @@ func createDeviceNode(rootfs string, node *devices.Device, bind bool) error {
// The node only exists for cgroup reasons, ignore it here.
return nil
}
- dest := filepath.Join(rootfs, node.Path)
+ dest, err := securejoin.SecureJoin(rootfs, node.Path)
+ if err != nil {
+ return err
+ }
if err := os.MkdirAll(filepath.Dir(dest), 0755); err != nil {
return err
}
if bind {
- return bindMountDeviceNode(dest, node)
+ return bindMountDeviceNode(rootfs, dest, node)
}
if err := mknodDevice(dest, node); err != nil {
if os.IsExist(err) {
return nil
} else if os.IsPermission(err) {
- return bindMountDeviceNode(dest, node)
+ return bindMountDeviceNode(rootfs, dest, node)
}
return err
}
@@ -1013,61 +1024,47 @@ func writeSystemProperty(key, value string) error {
}
func remount(m *configs.Mount, rootfs string) error {
- var (
- dest = m.Destination
- )
- if !strings.HasPrefix(dest, rootfs) {
- dest = filepath.Join(rootfs, dest)
- }
- return unix.Mount(m.Source, dest, m.Device, uintptr(m.Flags|unix.MS_REMOUNT), "")
+ return utils.WithProcfd(rootfs, m.Destination, func(procfd string) error {
+ return unix.Mount(m.Source, procfd, m.Device, uintptr(m.Flags|unix.MS_REMOUNT), "")
+ })
}
// Do the mount operation followed by additional mounts required to take care
-// of propagation flags.
+// of propagation flags. This will always be scoped inside the container rootfs.
func mountPropagate(m *configs.Mount, rootfs string, mountLabel string) error {
var (
- dest = m.Destination
data = label.FormatMountLabel(m.Data, mountLabel)
flags = m.Flags
)
- if libcontainerUtils.CleanPath(dest) == "/dev" {
- flags &= ^unix.MS_RDONLY
- }
-
- // Mount it rw to allow chmod operation. A remount will be performed
- // later to make it ro if set.
- if m.Device == "tmpfs" {
+ // Delay mounting the filesystem read-only if we need to do further
+ // operations on it. We need to set up files in "/dev" and tmpfs mounts may
+ // need to be chmod-ed after mounting. The mount will be remounted ro later
+ // in finalizeRootfs() if necessary.
+ if libcontainerUtils.CleanPath(m.Destination) == "/dev" || m.Device == "tmpfs" {
flags &= ^unix.MS_RDONLY
}
- copyUp := m.Extensions&configs.EXT_COPYUP == configs.EXT_COPYUP
- if !(copyUp || strings.HasPrefix(dest, rootfs)) {
- dest = filepath.Join(rootfs, dest)
- }
-
- if err := unix.Mount(m.Source, dest, m.Device, uintptr(flags), data); err != nil {
- return err
- }
-
- for _, pflag := range m.PropagationFlags {
- if err := unix.Mount("", dest, "", uintptr(pflag), ""); err != nil {
- return err
+ // Because the destination is inside a container path which might be
+ // mutating underneath us, we verify that we are actually going to mount
+ // inside the container with WithProcfd() -- mounting through a procfd
+ // mounts on the target.
+ if err := utils.WithProcfd(rootfs, m.Destination, func(procfd string) error {
+ return unix.Mount(m.Source, procfd, m.Device, uintptr(flags), data)
+ }); err != nil {
+ return fmt.Errorf("mount through procfd: %w", err)
+ }
+ // We have to apply mount propagation flags in a separate WithProcfd() call
+ // because the previous call invalidates the passed procfd -- the mount
+ // target needs to be re-opened.
+ if err := utils.WithProcfd(rootfs, m.Destination, func(procfd string) error {
+ for _, pflag := range m.PropagationFlags {
+ if err := unix.Mount("", procfd, "", uintptr(pflag), ""); err != nil {
+ return err
+ }
}
- }
- return nil
-}
-
-func mountNewCgroup(m *configs.Mount) error {
- var (
- data = m.Data
- source = m.Source
- )
- if data == "systemd" {
- data = cgroups.CgroupNamePrefix + data
- source = "systemd"
- }
- if err := unix.Mount(source, m.Destination, m.Device, uintptr(m.Flags), data); err != nil {
- return err
+ return nil
+ }); err != nil {
+ return fmt.Errorf("change mount propagation through procfd: %w", err)
}
return nil
}
diff --git a/libcontainer/utils/utils.go b/libcontainer/utils/utils.go
index 1b72b7a..cd78f23 100644
--- a/libcontainer/utils/utils.go
+++ b/libcontainer/utils/utils.go
@@ -3,12 +3,15 @@ package utils
import (
"encoding/binary"
"encoding/json"
+ "fmt"
"io"
"os"
"path/filepath"
+ "strconv"
"strings"
"unsafe"
+ "github.com/cyphar/filepath-securejoin"
"golang.org/x/sys/unix"
)
@@ -88,6 +91,57 @@ func CleanPath(path string) string {
return filepath.Clean(path)
}
+// stripRoot returns the passed path, stripping the root path if it was
+// (lexicially) inside it. Note that both passed paths will always be treated
+// as absolute, and the returned path will also always be absolute. In
+// addition, the paths are cleaned before stripping the root.
+func stripRoot(root, path string) string {
+ // Make the paths clean and absolute.
+ root, path = CleanPath("/"+root), CleanPath("/"+path)
+ switch {
+ case path == root:
+ path = "/"
+ case root == "/":
+ // do nothing
+ case strings.HasPrefix(path, root+"/"):
+ path = strings.TrimPrefix(path, root+"/")
+ }
+ return CleanPath("/" + path)
+}
+
+// WithProcfd runs the passed closure with a procfd path (/proc/self/fd/...)
+// corresponding to the unsafePath resolved within the root. Before passing the
+// fd, this path is verified to have been inside the root -- so operating on it
+// through the passed fdpath should be safe. Do not access this path through
+// the original path strings, and do not attempt to use the pathname outside of
+// the passed closure (the file handle will be freed once the closure returns).
+func WithProcfd(root, unsafePath string, fn func(procfd string) error) error {
+ // Remove the root then forcefully resolve inside the root.
+ unsafePath = stripRoot(root, unsafePath)
+ path, err := securejoin.SecureJoin(root, unsafePath)
+ if err != nil {
+ return fmt.Errorf("resolving path inside rootfs failed: %v", err)
+ }
+
+ // Open the target path.
+ fh, err := os.OpenFile(path, unix.O_PATH|unix.O_CLOEXEC, 0)
+ if err != nil {
+ return fmt.Errorf("open o_path procfd: %w", err)
+ }
+ defer fh.Close()
+
+ // Double-check the path is the one we expected.
+ procfd := "/proc/self/fd/" + strconv.Itoa(int(fh.Fd()))
+ if realpath, err := os.Readlink(procfd); err != nil {
+ return fmt.Errorf("procfd verification failed: %w", err)
+ } else if realpath != path {
+ return fmt.Errorf("possibly malicious path detected -- refusing to operate on %s", realpath)
+ }
+
+ // Run the closure.
+ return fn(procfd)
+}
+
// SearchLabels searches a list of key-value pairs for the provided key and
// returns the corresponding value. The pairs must be separated with '='.
func SearchLabels(labels []string, query string) string {
Reply to: