From 639921e66983ee1c34d9f31e22cc3ee558052bc7 Mon Sep 17 00:00:00 2001 From: sidneychang <2190206983@qq.com> Date: Mon, 18 May 2026 22:08:55 -0400 Subject: [PATCH 1/2] feat(shim): inject missing urunc annotations from image metadata (#565) Merge com.urunc.unikernel.* keys from the container image into bundle config.json when the OCI spec is missing them, using a containerd session before task create. Signed-off-by: sidneychang <2190206983@qq.com> --- go.mod | 4 +- .../containerd/inject_missing_annotations.go | 153 ++++++++++++++++++ .../inject_missing_annotations.go | 81 ++++++++++ pkg/containerd-shim/task_service.go | 16 ++ 4 files changed, 252 insertions(+), 2 deletions(-) create mode 100644 pkg/containerd-shim/containerd/inject_missing_annotations.go create mode 100644 pkg/containerd-shim/inject_missing_annotations.go diff --git a/go.mod b/go.mod index 82252800..63d54ffe 100644 --- a/go.mod +++ b/go.mod @@ -8,6 +8,7 @@ require ( github.com/cavaliergopher/cpio v1.0.1 github.com/containerd/containerd v1.7.30 github.com/containerd/containerd/api v1.10.0 + github.com/containerd/platforms v0.2.1 github.com/containerd/ttrpc v1.2.7 github.com/creack/pty v1.1.24 github.com/elastic/go-seccomp-bpf v1.6.0 @@ -18,6 +19,7 @@ require ( github.com/nubificus/hedge_cli v0.0.3 github.com/onsi/ginkgo/v2 v2.28.1 github.com/onsi/gomega v1.39.1 + github.com/opencontainers/image-spec v1.1.1 github.com/opencontainers/runc v1.3.4 github.com/opencontainers/runtime-spec v1.2.1 github.com/prometheus-community/pro-bing v0.8.0 @@ -45,7 +47,6 @@ require ( github.com/containerd/fifo v1.1.0 // indirect github.com/containerd/go-runc v1.0.0 // indirect github.com/containerd/log v0.1.0 // indirect - github.com/containerd/platforms v0.2.1 // indirect github.com/containerd/typeurl/v2 v2.2.3 // indirect github.com/coreos/go-systemd/v22 v22.7.0 // indirect github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect @@ -67,7 +68,6 @@ require ( github.com/moby/sys/sequential v0.6.0 // indirect github.com/moby/sys/user v0.4.0 // indirect github.com/opencontainers/go-digest v1.0.0 // indirect - github.com/opencontainers/image-spec v1.1.1 // indirect github.com/pkg/errors v0.9.1 // indirect github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect github.com/stretchr/objx v0.5.3 // indirect diff --git a/pkg/containerd-shim/containerd/inject_missing_annotations.go b/pkg/containerd-shim/containerd/inject_missing_annotations.go new file mode 100644 index 00000000..803f8684 --- /dev/null +++ b/pkg/containerd-shim/containerd/inject_missing_annotations.go @@ -0,0 +1,153 @@ +// Copyright (c) 2023-2026, Nubificus LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Temporary containerd helpers for urunc #565 (read urunc keys from image metadata). +package containerd + +import ( + "context" + "encoding/json" + "fmt" + "io" + "strings" + + contentapi "github.com/containerd/containerd/api/services/content/v1" + imagesapi "github.com/containerd/containerd/api/services/images/v1" + "github.com/containerd/containerd/api/types" + "github.com/containerd/containerd/images" + "github.com/containerd/platforms" + imageSpec "github.com/opencontainers/image-spec/specs-go/v1" +) + +// ImageAnnotations returns image config labels and manifest annotations with +// the provided prefix. Manifest annotations take precedence over config labels. +func (s *Session) ImageAnnotations(ctx context.Context, prefix string) (map[string]string, error) { + imageRef := s.container.GetImage() + if imageRef == "" { + return nil, fmt.Errorf("container %q has empty image ref", s.containerID) + } + + imageResp, err := s.imagesClient().Get(withNamespace(ctx, s.namespace), &imagesapi.GetImageRequest{Name: imageRef}) + if err != nil { + return nil, fmt.Errorf("get image %s: %w", imageRef, containerdErr(err)) + } + + return s.imageAnnotations(ctx, imageResp.Image.Target, prefix) +} + +func (s *Session) imageAnnotations(ctx context.Context, target *types.Descriptor, prefix string) (map[string]string, error) { + ctx = withNamespace(ctx, s.namespace) + contentClient := s.contentClient() + + manifestDesc, err := manifestDescriptor(ctx, contentClient, target) + if err != nil { + return nil, err + } + + manifestRaw, err := readBlob(ctx, contentClient, manifestDesc.Digest, manifestDesc.Size) + if err != nil { + return nil, fmt.Errorf("read manifest blob: %w", err) + } + var manifest imageSpec.Manifest + if err := json.Unmarshal(manifestRaw, &manifest); err != nil { + return nil, fmt.Errorf("unmarshal manifest: %w", err) + } + + configRaw, err := readBlob(ctx, contentClient, manifest.Config.Digest.String(), manifest.Config.Size) + if err != nil { + return nil, fmt.Errorf("read image config blob: %w", err) + } + var imageConfig imageSpec.Image + if err := json.Unmarshal(configRaw, &imageConfig); err != nil { + return nil, fmt.Errorf("unmarshal image config: %w", err) + } + + annotations := make(map[string]string) + for key, value := range imageConfig.Config.Labels { + if strings.HasPrefix(key, prefix) { + annotations[key] = value + } + } + for key, value := range manifest.Annotations { + if strings.HasPrefix(key, prefix) { + annotations[key] = value + } + } + + return annotations, nil +} + +func manifestDescriptor( + ctx context.Context, + contentClient contentapi.ContentClient, + target *types.Descriptor, +) (*types.Descriptor, error) { + if images.IsManifestType(target.MediaType) { + return target, nil + } + + if !images.IsIndexType(target.MediaType) { + return nil, fmt.Errorf("unsupported image target media type: %s", target.MediaType) + } + + indexRaw, err := readBlob(ctx, contentClient, target.Digest, target.Size) + if err != nil { + return nil, fmt.Errorf("read image index blob: %w", err) + } + + var index imageSpec.Index + if err := json.Unmarshal(indexRaw, &index); err != nil { + return nil, fmt.Errorf("unmarshal image index: %w", err) + } + + matcher := platforms.DefaultStrict() + for _, manifest := range index.Manifests { + if manifest.Platform == nil { + continue + } + if matcher.Match(*manifest.Platform) { + return &types.Descriptor{ + MediaType: manifest.MediaType, + Digest: manifest.Digest.String(), + Size: manifest.Size, + }, nil + } + } + + return nil, fmt.Errorf("no matching manifest found in image index for platform %s", platforms.Format(platforms.DefaultSpec())) +} + +func readBlob(ctx context.Context, contentClient contentapi.ContentClient, digest string, size int64) ([]byte, error) { + stream, err := contentClient.Read(ctx, &contentapi.ReadContentRequest{ + Digest: digest, + Size: size, + }) + if err != nil { + return nil, containerdErr(err) + } + + var raw []byte + for { + resp, err := stream.Recv() + if err == io.EOF { + break + } + if err != nil { + return nil, containerdErr(err) + } + raw = append(raw, resp.Data...) + } + + return raw, nil +} diff --git a/pkg/containerd-shim/inject_missing_annotations.go b/pkg/containerd-shim/inject_missing_annotations.go new file mode 100644 index 00000000..77d9dc9b --- /dev/null +++ b/pkg/containerd-shim/inject_missing_annotations.go @@ -0,0 +1,81 @@ +// Copyright (c) 2023-2026, Nubificus LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Temporary shim-side copy of urunc #565 (inject missing image annotations into +// bundle config.json). Keep this file self-contained so it can be dropped or +// reconciled when #565 merges. +package containerdshim + +import ( + "context" + "encoding/base64" + "encoding/json" + "fmt" + "os" + "path/filepath" + + taskAPI "github.com/containerd/containerd/api/runtime/task/v2" + specs "github.com/opencontainers/runtime-spec/specs-go" + shimcontainerd "github.com/urunc-dev/urunc/pkg/containerd-shim/containerd" +) + +const uruncAnnotationPrefix = "com.urunc.unikernel." + +func (s *taskService) injectMissingAnnotations(ctx context.Context, r *taskAPI.CreateTaskRequest, session *shimcontainerd.Session) error { + configPath := filepath.Join(r.Bundle, "config.json") + info, err := os.Stat(configPath) + if err != nil { + return fmt.Errorf("stat config.json: %w", err) + } + + data, err := os.ReadFile(configPath) + if err != nil { + return fmt.Errorf("read config.json: %w", err) + } + + var spec specs.Spec + if err := json.Unmarshal(data, &spec); err != nil { + return fmt.Errorf("unmarshal config.json: %w", err) + } + if spec.Annotations == nil { + spec.Annotations = make(map[string]string) + } + + imageAnnots, err := session.ImageAnnotations(ctx, uruncAnnotationPrefix) + if err != nil { + return err + } + + changed := false + for key, value := range imageAnnots { + if _, ok := spec.Annotations[key]; ok { + continue + } + spec.Annotations[key] = base64.StdEncoding.EncodeToString([]byte(value)) + changed = true + } + if !changed { + return nil + } + + out, err := json.MarshalIndent(&spec, "", " ") + if err != nil { + return fmt.Errorf("marshal config.json: %w", err) + } + if err := os.WriteFile(configPath, out, info.Mode()); err != nil { + return fmt.Errorf("write config.json: %w", err) + } + + return nil +} diff --git a/pkg/containerd-shim/task_service.go b/pkg/containerd-shim/task_service.go index 70fb44a7..a79f9235 100644 --- a/pkg/containerd-shim/task_service.go +++ b/pkg/containerd-shim/task_service.go @@ -19,6 +19,8 @@ import ( taskAPI "github.com/containerd/containerd/api/runtime/task/v2" "github.com/containerd/ttrpc" + "github.com/sirupsen/logrus" + shimcontainerd "github.com/urunc-dev/urunc/pkg/containerd-shim/containerd" ) // taskService is urunc's shim-side wrapper around containerd's runc task @@ -31,6 +33,20 @@ type taskService struct { } func (s *taskService) Create(ctx context.Context, r *taskAPI.CreateTaskRequest) (*taskAPI.CreateTaskResponse, error) { + session, err := shimcontainerd.OpenSession(ctx, s.containerdAddress, r.ID) + if err != nil { + logrus.WithError(err).WithField("container_id", r.ID).Warn("urunc shim: failed to open containerd session") + } else { + defer session.Close() + } + + // #565: merge image metadata into bundle config.json when spec lacks urunc keys. + if session != nil { + if err := s.injectMissingAnnotations(ctx, r, session); err != nil { + logrus.WithError(err).WithField("container_id", r.ID).Warn("urunc shim: failed to inject missing annotations") + } + } + return s.TaskService.Create(ctx, r) } From abb7d84694c06dea33113c3fb1b94bc44d379f7c Mon Sep 17 00:00:00 2001 From: sidneychang <2190206983@qq.com> Date: Mon, 18 May 2026 22:09:03 -0400 Subject: [PATCH 2/2] feat(shim): precompute guest rootfs before task create Select guest rootfs in the shim from CreateTask rootfs mounts and bundle annotations, persist the result in config.json, and consume it at runtime Exec with a podman fallback when the internal annotation is absent. Signed-off-by: sidneychang <2190206983@qq.com> --- pkg/containerd-shim/guest_rootfs.go | 127 +++++++++++++++++ pkg/containerd-shim/task_service.go | 10 ++ pkg/unikontainers/config.go | 15 ++ pkg/unikontainers/rootfs.go | 209 +++++++++++++++++++++++++--- pkg/unikontainers/unikontainers.go | 106 +++++--------- 5 files changed, 381 insertions(+), 86 deletions(-) create mode 100644 pkg/containerd-shim/guest_rootfs.go diff --git a/pkg/containerd-shim/guest_rootfs.go b/pkg/containerd-shim/guest_rootfs.go new file mode 100644 index 00000000..0711e48f --- /dev/null +++ b/pkg/containerd-shim/guest_rootfs.go @@ -0,0 +1,127 @@ +// Copyright (c) 2023-2026, Nubificus LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package containerdshim + +import ( + "encoding/json" + "errors" + "fmt" + "os" + "path/filepath" + + taskAPI "github.com/containerd/containerd/api/runtime/task/v2" + containerdTypes "github.com/containerd/containerd/api/types" + specs "github.com/opencontainers/runtime-spec/specs-go" + "github.com/sirupsen/logrus" + "github.com/urunc-dev/urunc/pkg/unikontainers" +) + +var errGuestRootfsChoiceSkipped = errors.New("guest rootfs choice skipped") + +// chooseGuestRootfs selects guest rootfs parameters before inner task Create and +// persists them in the bundle OCI spec for runtime Exec to consume. +func chooseGuestRootfs(r *taskAPI.CreateTaskRequest) error { + spec, mode, err := loadSpec(r.Bundle) + if err != nil { + return err + } + log := logrus.WithFields(logrus.Fields{ + "container_id": r.ID, + "bundle": filepath.Clean(r.Bundle), + }) + + config, err := unikontainers.GetUnikernelConfigFromSpecAnnotations(spec) + if err != nil { + return errGuestRootfsChoiceSkipped + } + + annotations := config.Map() + uruncCfg, cfgErr := unikontainers.LoadUruncConfig(unikontainers.UruncConfigPath) + if cfgErr != nil { + log.WithError(cfgErr).Warn("urunc shim: failed to load urunc config; using defaults for guest rootfs choice") + } + + rootfsParams, err := unikontainers.ChooseRootfs(filepath.Clean(r.Bundle), spec.Root.Path, annotations, uruncCfg, rootfsMountsFromCreateTask(r.Rootfs)) + if err != nil { + return err + } + + encoded, err := unikontainers.EncodeRootfsParams(rootfsParams) + if err != nil { + return err + } + if spec.Annotations == nil { + spec.Annotations = make(map[string]string) + } + spec.Annotations[unikontainers.RootfsParamsAnnotation()] = encoded + log.WithFields(logrus.Fields{ + "rootfs_type": rootfsParams.Type, + "rootfs_path": rootfsParams.Path, + "mon_rootfs": rootfsParams.MonRootfs, + }).Info("urunc shim: wrote guest rootfs choice to bundle") + + return saveSpec(r.Bundle, spec, mode) +} + +func rootfsMountsFromCreateTask(rootfs []*containerdTypes.Mount) []unikontainers.RootfsMount { + mounts := make([]unikontainers.RootfsMount, 0, len(rootfs)) + for _, m := range rootfs { + if m == nil { + continue + } + mounts = append(mounts, unikontainers.RootfsMount{ + Type: m.Type, + Source: m.Source, + }) + } + return mounts +} + +// loadSpec reads the OCI runtime spec (config.json) from the task bundle at CreateTask time. +// Callers need the full spec on disk (root path, annotations read/write); the CreateTask RPC does +// not include the OCI document. injectMissingAnnotations runs before chooseGuestRootfs +// in taskService.Create. +func loadSpec(bundle string) (*specs.Spec, os.FileMode, error) { + configPath := filepath.Join(bundle, "config.json") + info, err := os.Stat(configPath) + if err != nil { + return nil, 0, fmt.Errorf("stat config.json: %w", err) + } + + data, err := os.ReadFile(configPath) + if err != nil { + return nil, 0, fmt.Errorf("read config.json: %w", err) + } + + var spec specs.Spec + if err := json.Unmarshal(data, &spec); err != nil { + return nil, 0, fmt.Errorf("unmarshal config.json: %w", err) + } + if spec.Root == nil { + return nil, 0, fmt.Errorf("invalid OCI spec: root section is required") + } + + return &spec, info.Mode(), nil +} + +func saveSpec(bundle string, spec *specs.Spec, mode os.FileMode) error { + data, err := json.MarshalIndent(spec, "", " ") + if err != nil { + return fmt.Errorf("marshal config.json: %w", err) + } + + configPath := filepath.Join(bundle, "config.json") + return os.WriteFile(configPath, data, mode) +} diff --git a/pkg/containerd-shim/task_service.go b/pkg/containerd-shim/task_service.go index a79f9235..690fb6d2 100644 --- a/pkg/containerd-shim/task_service.go +++ b/pkg/containerd-shim/task_service.go @@ -16,6 +16,7 @@ package containerdshim import ( "context" + "errors" taskAPI "github.com/containerd/containerd/api/runtime/task/v2" "github.com/containerd/ttrpc" @@ -47,6 +48,15 @@ func (s *taskService) Create(ctx context.Context, r *taskAPI.CreateTaskRequest) } } + // #684: ChooseRootfs in shim and persist params in bundle before inner task Create. + if err := chooseGuestRootfs(r); err != nil { + if errors.Is(err, errGuestRootfsChoiceSkipped) { + logrus.WithField("container_id", r.ID).Debug("urunc shim: guest rootfs choice skipped") + return s.TaskService.Create(ctx, r) + } + logrus.WithError(err).Warn("urunc shim: failed to choose guest rootfs") + return nil, err + } return s.TaskService.Create(ctx, r) } diff --git a/pkg/unikontainers/config.go b/pkg/unikontainers/config.go index 7751aa27..e2c5403b 100644 --- a/pkg/unikontainers/config.go +++ b/pkg/unikontainers/config.go @@ -116,6 +116,21 @@ func GetUnikernelConfig(bundleDir string, spec *specs.Spec) (*UnikernelConfig, e return jsonConf, nil } +// GetUnikernelConfigFromSpecAnnotations retrieves urunc configuration only +// from OCI spec annotations. Unlike GetUnikernelConfig, it does not fall back +// to files inside the container rootfs, so it is safe to call from the shim +// before containerd mounts the task rootfs. +func GetUnikernelConfigFromSpecAnnotations(spec *specs.Spec) (*UnikernelConfig, error) { + conf := getConfigFromSpec(spec) + if err := conf.validate(); err != nil { + return nil, err + } + if err := conf.decode(); err != nil { + return nil, err + } + return conf, nil +} + // getConfigFromSpec retrieves the urunc specific annotations from the spec and populates the Unikernel config. func getConfigFromSpec(spec *specs.Spec) *UnikernelConfig { unikernelType := spec.Annotations[annotType] diff --git a/pkg/unikontainers/rootfs.go b/pkg/unikontainers/rootfs.go index 0fd7e38d..d51d620e 100644 --- a/pkg/unikontainers/rootfs.go +++ b/pkg/unikontainers/rootfs.go @@ -15,19 +15,27 @@ package unikontainers import ( + "encoding/base64" + "encoding/json" + "errors" "fmt" "os" "path/filepath" "strconv" + "strings" "golang.org/x/sys/unix" + "github.com/urunc-dev/urunc/pkg/unikontainers/hypervisors" "github.com/urunc-dev/urunc/pkg/unikontainers/types" + "github.com/urunc-dev/urunc/pkg/unikontainers/unikernels" ) // TODO: Find and set the correct size for the tmpfs in the host const tmpfsSizeForNoRootfs = "65536k" +const annotInternalRootfsParams = "com.urunc.internal.rootfs.params" + type rootfsBuilder interface { preSetup() error postSetup() error @@ -38,12 +46,84 @@ type rootfsBuilder interface { // rootfsSelector encapsulates the context for rootfs selection type rootfsSelector struct { - bundle string - cntrRootfs string - annot map[string]string - unikernel types.Unikernel - vmm types.VMM - vfsdPath string + cntrRootfs string + annot map[string]string + unikernel types.Unikernel + vmm types.VMM + vfsdPath string + containerRootfsBlock *types.BlockDevParams +} + +// RootfsMount describes a single rootfs mount from a CreateTask request. +type RootfsMount struct { + Type string + Source string +} + +func resolveBundleRootfs(bundle, specRoot string) (string, error) { + return resolveAgainstBase(filepath.Clean(bundle), filepath.Clean(specRoot)) +} + +// BlockFromRootfsMounts returns block device parameters when CreateTask supplies +// a single device-backed rootfs mount supported by the guest unikernel. +func BlockFromRootfsMounts(mounts []RootfsMount, rootfsDir, unikernelType string) *types.BlockDevParams { + if len(mounts) != 1 { + return nil + } + + m := mounts[0] + switch m.Type { + case "", "overlay", "tmpfs", "bind": + return nil + } + if !strings.HasPrefix(m.Source, "/dev/") { + return nil + } + + unikernel, err := unikernels.New(unikernelType) + if err != nil || !unikernel.SupportsFS(m.Type) { + return nil + } + + return &types.BlockDevParams{ + Source: m.Source, + FsType: m.Type, + MountPoint: rootfsDir, + } +} + +// ChooseRootfs selects guest rootfs parameters for a unikernel container from +// bundle layout and annotations. specRoot may be relative to bundle. rootfsMounts +// carries CreateTask mounts from the shim; when nil, block backing may be probed +// from the live container rootfs mount. +func ChooseRootfs(bundle, specRoot string, annot map[string]string, cfg *UruncConfig, rootfsMounts []RootfsMount) (types.RootfsParams, error) { + bundleDir := filepath.Clean(bundle) + rootfsDir, err := resolveBundleRootfs(bundleDir, specRoot) + if err != nil { + uniklog.Errorf("could not resolve rootfs directory %s: %v", rootfsDir, err) + return types.RootfsParams{}, err + } + + var containerRootfsBlock *types.BlockDevParams + if len(rootfsMounts) > 0 { + containerRootfsBlock = BlockFromRootfsMounts(rootfsMounts, rootfsDir, annot[annotType]) + } + if containerRootfsBlock == nil && shouldMountContainerRootfs(annot) { + unikernelPreview, err := unikernels.New(annot[annotType]) + if err != nil { + return types.RootfsParams{}, err + } + if unikernelPreview.SupportsBlock() { + rootFsDevice, err := getMountInfo(rootfsDir) + if err == nil { + containerRootfsBlock = &rootFsDevice + } else if !errors.Is(err, ErrMountpoint) { + uniklog.Errorf("failed to get container's rootfs mount info: %v", err) + } + } + } + + return selectRootfs(bundleDir, rootfsDir, annot, cfg, containerRootfsBlock) } type noRootfs struct { @@ -131,7 +211,11 @@ func (rs *rootfsSelector) tryExplicitBlock() (types.RootfsParams, bool) { // shouldMountContainerRootfs checks if container rootfs should be mounted // based on the respective annotation func (rs *rootfsSelector) shouldMountContainerRootfs() bool { - annotValue := rs.annot[annotMountRootfs] + return shouldMountContainerRootfs(rs.annot) +} + +func shouldMountContainerRootfs(annot map[string]string) bool { + annotValue := annot[annotMountRootfs] if annotValue == "" { return false } @@ -152,12 +236,11 @@ func (rs *rootfsSelector) tryContainerBlockRootfs() (types.RootfsParams, bool) { return types.RootfsParams{}, false } - rootFsDevice, err := getMountInfo(rs.cntrRootfs) - if err != nil { - uniklog.Errorf("failed to get container's rootfs mount info: %v", err) + if rs.containerRootfsBlock == nil { return types.RootfsParams{}, false } + rootFsDevice := *rs.containerRootfsBlock if !rs.unikernel.SupportsFS(rootFsDevice.FsType) { return types.RootfsParams{}, false } @@ -235,15 +318,109 @@ func (rs *rootfsSelector) tryContainerRootfs() (types.RootfsParams, bool) { return types.RootfsParams{}, false } -func switchMonRootfs(res types.RootfsParams, bundle string) (types.RootfsParams, error) { - monRootfs := filepath.Join(bundle, monitorRootfsDirName) - err := os.MkdirAll(monRootfs, 0o755) +// switchMonRootfs records where the monitor process rootfs will live under the +// bundle. It does not create or mount that directory; prepareMonRootfs does that +// later during runtime Exec. +func switchMonRootfs(res types.RootfsParams, bundle string) types.RootfsParams { + res.MonRootfs = filepath.Join(bundle, monitorRootfsDirName) + return res +} + +// selectRootfs determines the guest rootfs without creating, mounting, or copying +// any files. bundle and cntrRootfs must already be resolved absolute paths. +func selectRootfs(bundle, cntrRootfs string, annot map[string]string, cfg *UruncConfig, containerRootfsBlock *types.BlockDevParams) (types.RootfsParams, error) { + if cfg == nil { + return types.RootfsParams{}, fmt.Errorf("urunc config is required for guest rootfs selection") + } + + unikernel, err := unikernels.New(annot[annotType]) + if err != nil { + return types.RootfsParams{}, err + } + + vmm, err := hypervisors.NewVMM(hypervisors.VmmType(annot[annotHypervisor]), cfg.Monitors) + if err != nil { + return types.RootfsParams{}, err + } + + vfsdPath := "" + if bin, ok := cfg.ExtraBins["virtiofsd"]; ok { + vfsdPath = bin.Path + } + + selector := &rootfsSelector{ + cntrRootfs: cntrRootfs, + annot: annot, + unikernel: unikernel, + vmm: vmm, + vfsdPath: vfsdPath, + containerRootfsBlock: containerRootfsBlock, + } + + result, ok := selector.tryInitrd() + if ok { + return result, nil + } + + result, ok = selector.tryExplicitBlock() + if ok { + return result, nil + } + + result, ok = selector.tryContainerRootfs() + if ok { + return switchMonRootfs(result, bundle), nil + } + + if selector.shouldMountContainerRootfs() { + return types.RootfsParams{}, fmt.Errorf("can not use the container rootfs as the sandbox's guest rootfs through block or shared-fs") + } + + uniklog.Info("no rootfs configured for guest") + result.MonRootfs = cntrRootfs + + return result, nil +} + +// RootfsParamsAnnotation returns the internal annotation key used to hand +// shim-selected rootfs parameters to the runtime. +func RootfsParamsAnnotation() string { + return annotInternalRootfsParams +} + +// EncodeRootfsParams serializes rootfs parameters for storage in an OCI +// annotation. +func EncodeRootfsParams(rootfs types.RootfsParams) (string, error) { + data, err := json.Marshal(rootfs) if err != nil { - return types.RootfsParams{}, fmt.Errorf("failed to create monitor rootfs directory %s: %w", monRootfs, err) + return "", err + } + return base64.StdEncoding.EncodeToString(data), nil +} + +// DecodeRootfsParams reads rootfs parameters from OCI annotations. The bool +// return value reports whether the internal annotation was present. +func DecodeRootfsParams(annotations map[string]string) (types.RootfsParams, bool, error) { + encoded := annotations[annotInternalRootfsParams] + if encoded == "" { + return types.RootfsParams{}, false, nil + } + + data, err := base64.StdEncoding.DecodeString(encoded) + if err != nil { + return types.RootfsParams{}, true, err + } + + var rootfs types.RootfsParams + if err := json.Unmarshal(data, &rootfs); err != nil { + return types.RootfsParams{}, true, err + } + + if rootfs.MonRootfs == "" { + return types.RootfsParams{}, true, fmt.Errorf("rootfs params annotation is missing monitor rootfs") } - res.MonRootfs = monRootfs - return res, nil + return rootfs, true, nil } // pivotRootfs changes rootfs with pivot diff --git a/pkg/unikontainers/unikontainers.go b/pkg/unikontainers/unikontainers.go index b7d61def..d14c537d 100644 --- a/pkg/unikontainers/unikontainers.go +++ b/pkg/unikontainers/unikontainers.go @@ -231,73 +231,6 @@ func (u *Unikontainer) SetupNet() (types.NetDevParams, error) { return netArgs, nil } -// chooseRootfs determines the best rootfs configuration based on available options -// Priority order: -// 1. Initrd (if specified) -// 2. Explicit block device annotation (if mounted at /) -// 3. Container rootfs as block device (if MountRootfs=true and supported) -// 4. Container rootfs as shared-fs: virtiofs > 9pfs (if MountRootfs=true and supported) -// 5. No rootfs -func (u *Unikontainer) chooseRootfs() (types.RootfsParams, error) { - bundleDir := filepath.Clean(u.State.Bundle) - rootfsDir := filepath.Clean(u.Spec.Root.Path) - rootfsDir, err := resolveAgainstBase(bundleDir, rootfsDir) - if err != nil { - uniklog.Errorf("could not resolve rootfs directory %s: %v", rootfsDir, err) - return types.RootfsParams{}, err - } - - unikernelType := u.State.Annotations[annotType] - unikernel, err := unikernels.New(unikernelType) - if err != nil { - return types.RootfsParams{}, err - } - - vmmType := u.State.Annotations[annotHypervisor] - vmm, err := hypervisors.NewVMM(hypervisors.VmmType(vmmType), u.UruncCfg.Monitors) - if err != nil { - return types.RootfsParams{}, err - } - - virtiofsdConfig := u.UruncCfg.ExtraBins["virtiofsd"] - - selector := &rootfsSelector{ - bundle: bundleDir, - cntrRootfs: rootfsDir, - annot: u.State.Annotations, - unikernel: unikernel, - vmm: vmm, - vfsdPath: virtiofsdConfig.Path, - } - - // Priority 1: Initrd - result, ok := selector.tryInitrd() - if ok { - return result, nil - } - - // Priority 2: Explicit block annotation - result, ok = selector.tryExplicitBlock() - if ok { - return result, nil - } - - // Priority 3 & 4: Container rootfs (block or shared-fs) - result, ok = selector.tryContainerRootfs() - if ok { - return switchMonRootfs(result, bundleDir) - } - - if selector.shouldMountContainerRootfs() { - return types.RootfsParams{}, fmt.Errorf("can not use the container rootfs as the sandbox's guest rootfs through block or shared-fs") - } - - uniklog.Info("no rootfs configured for guest") - result.MonRootfs = rootfsDir - - return result, nil -} - // nolint:gocyclo func (u *Unikontainer) Exec(metrics m.Writer) error { metrics.Capture(m.TS15) @@ -426,10 +359,36 @@ func (u *Unikontainer) Exec(metrics m.Writer) error { // if the respective annotation is set then, depending on the guest // (supports block or 9pfs), it will use the supported option. In case // both ae supported, then the block option will be used by default. - rootfsParams, err := u.chooseRootfs() + // + // Guest rootfs may already be chosen by the urunc shim and stored in the + // bundle config.json (com.urunc.internal.rootfs.params). When that annotation + // is absent, select here instead (e.g. podman or other runtimes that call urunc + // directly). MonRootfs directory creation still happens below in this Exec path. + // + // Decode from State.Annotations only: the shim writes the internal key to + // Spec (config.json), and saveContainerState copies missing Spec keys into + // state.json during create/InitialSetup before reexec reaches Exec. + rootfsParams, shimPreselected, err := DecodeRootfsParams(u.State.Annotations) if err != nil { - uniklog.Errorf("could not choose guest rootfs: %v", err) - return err + return fmt.Errorf("could not decode shim-provided guest rootfs params: %w", err) + } + if !shimPreselected { + uniklog.Debug("No shim-provided guest rootfs params found; selecting rootfs in runtime") + specRoot := "" + if u.Spec.Root != nil { + specRoot = u.Spec.Root.Path + } + rootfsParams, err = ChooseRootfs(u.State.Bundle, specRoot, u.State.Annotations, u.UruncCfg, nil) + if err != nil { + uniklog.Errorf("could not choose guest rootfs: %v", err) + return err + } + } else { + uniklog.WithFields(logrus.Fields{ + "rootfs_type": rootfsParams.Type, + "rootfs_path": rootfsParams.Path, + "mon_rootfs": rootfsParams.MonRootfs, + }).Info("Using shim-provided guest rootfs params") } // TODO: Add support for using both an existing @@ -479,6 +438,13 @@ func (u *Unikontainer) Exec(metrics m.Writer) error { } } + if rootfsParams.MonRootfs == filepath.Join(bundleDir, monitorRootfsDirName) { + err = os.MkdirAll(rootfsParams.MonRootfs, 0o755) + if err != nil { + return fmt.Errorf("failed to create monitor rootfs directory %s: %w", rootfsParams.MonRootfs, err) + } + } + err = rfsBuilder.preSetup() if err != nil { return fmt.Errorf("pre setup step for rootfs failed: %w", err)