diff --git a/go.mod b/go.mod index 82252800..63d54ffe 100644 --- a/go.mod +++ b/go.mod @@ -8,6 +8,7 @@ require ( github.com/cavaliergopher/cpio v1.0.1 github.com/containerd/containerd v1.7.30 github.com/containerd/containerd/api v1.10.0 + github.com/containerd/platforms v0.2.1 github.com/containerd/ttrpc v1.2.7 github.com/creack/pty v1.1.24 github.com/elastic/go-seccomp-bpf v1.6.0 @@ -18,6 +19,7 @@ require ( github.com/nubificus/hedge_cli v0.0.3 github.com/onsi/ginkgo/v2 v2.28.1 github.com/onsi/gomega v1.39.1 + github.com/opencontainers/image-spec v1.1.1 github.com/opencontainers/runc v1.3.4 github.com/opencontainers/runtime-spec v1.2.1 github.com/prometheus-community/pro-bing v0.8.0 @@ -45,7 +47,6 @@ require ( github.com/containerd/fifo v1.1.0 // indirect github.com/containerd/go-runc v1.0.0 // indirect github.com/containerd/log v0.1.0 // indirect - github.com/containerd/platforms v0.2.1 // indirect github.com/containerd/typeurl/v2 v2.2.3 // indirect github.com/coreos/go-systemd/v22 v22.7.0 // indirect github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect @@ -67,7 +68,6 @@ require ( github.com/moby/sys/sequential v0.6.0 // indirect github.com/moby/sys/user v0.4.0 // indirect github.com/opencontainers/go-digest v1.0.0 // indirect - github.com/opencontainers/image-spec v1.1.1 // indirect github.com/pkg/errors v0.9.1 // indirect github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect github.com/stretchr/objx v0.5.3 // indirect diff --git a/pkg/containerd-shim/containerd/inject_missing_annotations.go b/pkg/containerd-shim/containerd/inject_missing_annotations.go new file mode 100644 index 00000000..803f8684 --- /dev/null +++ b/pkg/containerd-shim/containerd/inject_missing_annotations.go @@ -0,0 +1,153 @@ +// Copyright (c) 2023-2026, Nubificus LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Temporary containerd helpers for urunc #565 (read urunc keys from image metadata). +package containerd + +import ( + "context" + "encoding/json" + "fmt" + "io" + "strings" + + contentapi "github.com/containerd/containerd/api/services/content/v1" + imagesapi "github.com/containerd/containerd/api/services/images/v1" + "github.com/containerd/containerd/api/types" + "github.com/containerd/containerd/images" + "github.com/containerd/platforms" + imageSpec "github.com/opencontainers/image-spec/specs-go/v1" +) + +// ImageAnnotations returns image config labels and manifest annotations with +// the provided prefix. Manifest annotations take precedence over config labels. +func (s *Session) ImageAnnotations(ctx context.Context, prefix string) (map[string]string, error) { + imageRef := s.container.GetImage() + if imageRef == "" { + return nil, fmt.Errorf("container %q has empty image ref", s.containerID) + } + + imageResp, err := s.imagesClient().Get(withNamespace(ctx, s.namespace), &imagesapi.GetImageRequest{Name: imageRef}) + if err != nil { + return nil, fmt.Errorf("get image %s: %w", imageRef, containerdErr(err)) + } + + return s.imageAnnotations(ctx, imageResp.Image.Target, prefix) +} + +func (s *Session) imageAnnotations(ctx context.Context, target *types.Descriptor, prefix string) (map[string]string, error) { + ctx = withNamespace(ctx, s.namespace) + contentClient := s.contentClient() + + manifestDesc, err := manifestDescriptor(ctx, contentClient, target) + if err != nil { + return nil, err + } + + manifestRaw, err := readBlob(ctx, contentClient, manifestDesc.Digest, manifestDesc.Size) + if err != nil { + return nil, fmt.Errorf("read manifest blob: %w", err) + } + var manifest imageSpec.Manifest + if err := json.Unmarshal(manifestRaw, &manifest); err != nil { + return nil, fmt.Errorf("unmarshal manifest: %w", err) + } + + configRaw, err := readBlob(ctx, contentClient, manifest.Config.Digest.String(), manifest.Config.Size) + if err != nil { + return nil, fmt.Errorf("read image config blob: %w", err) + } + var imageConfig imageSpec.Image + if err := json.Unmarshal(configRaw, &imageConfig); err != nil { + return nil, fmt.Errorf("unmarshal image config: %w", err) + } + + annotations := make(map[string]string) + for key, value := range imageConfig.Config.Labels { + if strings.HasPrefix(key, prefix) { + annotations[key] = value + } + } + for key, value := range manifest.Annotations { + if strings.HasPrefix(key, prefix) { + annotations[key] = value + } + } + + return annotations, nil +} + +func manifestDescriptor( + ctx context.Context, + contentClient contentapi.ContentClient, + target *types.Descriptor, +) (*types.Descriptor, error) { + if images.IsManifestType(target.MediaType) { + return target, nil + } + + if !images.IsIndexType(target.MediaType) { + return nil, fmt.Errorf("unsupported image target media type: %s", target.MediaType) + } + + indexRaw, err := readBlob(ctx, contentClient, target.Digest, target.Size) + if err != nil { + return nil, fmt.Errorf("read image index blob: %w", err) + } + + var index imageSpec.Index + if err := json.Unmarshal(indexRaw, &index); err != nil { + return nil, fmt.Errorf("unmarshal image index: %w", err) + } + + matcher := platforms.DefaultStrict() + for _, manifest := range index.Manifests { + if manifest.Platform == nil { + continue + } + if matcher.Match(*manifest.Platform) { + return &types.Descriptor{ + MediaType: manifest.MediaType, + Digest: manifest.Digest.String(), + Size: manifest.Size, + }, nil + } + } + + return nil, fmt.Errorf("no matching manifest found in image index for platform %s", platforms.Format(platforms.DefaultSpec())) +} + +func readBlob(ctx context.Context, contentClient contentapi.ContentClient, digest string, size int64) ([]byte, error) { + stream, err := contentClient.Read(ctx, &contentapi.ReadContentRequest{ + Digest: digest, + Size: size, + }) + if err != nil { + return nil, containerdErr(err) + } + + var raw []byte + for { + resp, err := stream.Recv() + if err == io.EOF { + break + } + if err != nil { + return nil, containerdErr(err) + } + raw = append(raw, resp.Data...) + } + + return raw, nil +} diff --git a/pkg/containerd-shim/guest_rootfs.go b/pkg/containerd-shim/guest_rootfs.go new file mode 100644 index 00000000..0711e48f --- /dev/null +++ b/pkg/containerd-shim/guest_rootfs.go @@ -0,0 +1,127 @@ +// Copyright (c) 2023-2026, Nubificus LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package containerdshim + +import ( + "encoding/json" + "errors" + "fmt" + "os" + "path/filepath" + + taskAPI "github.com/containerd/containerd/api/runtime/task/v2" + containerdTypes "github.com/containerd/containerd/api/types" + specs "github.com/opencontainers/runtime-spec/specs-go" + "github.com/sirupsen/logrus" + "github.com/urunc-dev/urunc/pkg/unikontainers" +) + +var errGuestRootfsChoiceSkipped = errors.New("guest rootfs choice skipped") + +// chooseGuestRootfs selects guest rootfs parameters before inner task Create and +// persists them in the bundle OCI spec for runtime Exec to consume. +func chooseGuestRootfs(r *taskAPI.CreateTaskRequest) error { + spec, mode, err := loadSpec(r.Bundle) + if err != nil { + return err + } + log := logrus.WithFields(logrus.Fields{ + "container_id": r.ID, + "bundle": filepath.Clean(r.Bundle), + }) + + config, err := unikontainers.GetUnikernelConfigFromSpecAnnotations(spec) + if err != nil { + return errGuestRootfsChoiceSkipped + } + + annotations := config.Map() + uruncCfg, cfgErr := unikontainers.LoadUruncConfig(unikontainers.UruncConfigPath) + if cfgErr != nil { + log.WithError(cfgErr).Warn("urunc shim: failed to load urunc config; using defaults for guest rootfs choice") + } + + rootfsParams, err := unikontainers.ChooseRootfs(filepath.Clean(r.Bundle), spec.Root.Path, annotations, uruncCfg, rootfsMountsFromCreateTask(r.Rootfs)) + if err != nil { + return err + } + + encoded, err := unikontainers.EncodeRootfsParams(rootfsParams) + if err != nil { + return err + } + if spec.Annotations == nil { + spec.Annotations = make(map[string]string) + } + spec.Annotations[unikontainers.RootfsParamsAnnotation()] = encoded + log.WithFields(logrus.Fields{ + "rootfs_type": rootfsParams.Type, + "rootfs_path": rootfsParams.Path, + "mon_rootfs": rootfsParams.MonRootfs, + }).Info("urunc shim: wrote guest rootfs choice to bundle") + + return saveSpec(r.Bundle, spec, mode) +} + +func rootfsMountsFromCreateTask(rootfs []*containerdTypes.Mount) []unikontainers.RootfsMount { + mounts := make([]unikontainers.RootfsMount, 0, len(rootfs)) + for _, m := range rootfs { + if m == nil { + continue + } + mounts = append(mounts, unikontainers.RootfsMount{ + Type: m.Type, + Source: m.Source, + }) + } + return mounts +} + +// loadSpec reads the OCI runtime spec (config.json) from the task bundle at CreateTask time. +// Callers need the full spec on disk (root path, annotations read/write); the CreateTask RPC does +// not include the OCI document. injectMissingAnnotations runs before chooseGuestRootfs +// in taskService.Create. +func loadSpec(bundle string) (*specs.Spec, os.FileMode, error) { + configPath := filepath.Join(bundle, "config.json") + info, err := os.Stat(configPath) + if err != nil { + return nil, 0, fmt.Errorf("stat config.json: %w", err) + } + + data, err := os.ReadFile(configPath) + if err != nil { + return nil, 0, fmt.Errorf("read config.json: %w", err) + } + + var spec specs.Spec + if err := json.Unmarshal(data, &spec); err != nil { + return nil, 0, fmt.Errorf("unmarshal config.json: %w", err) + } + if spec.Root == nil { + return nil, 0, fmt.Errorf("invalid OCI spec: root section is required") + } + + return &spec, info.Mode(), nil +} + +func saveSpec(bundle string, spec *specs.Spec, mode os.FileMode) error { + data, err := json.MarshalIndent(spec, "", " ") + if err != nil { + return fmt.Errorf("marshal config.json: %w", err) + } + + configPath := filepath.Join(bundle, "config.json") + return os.WriteFile(configPath, data, mode) +} diff --git a/pkg/containerd-shim/inject_missing_annotations.go b/pkg/containerd-shim/inject_missing_annotations.go new file mode 100644 index 00000000..77d9dc9b --- /dev/null +++ b/pkg/containerd-shim/inject_missing_annotations.go @@ -0,0 +1,81 @@ +// Copyright (c) 2023-2026, Nubificus LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Temporary shim-side copy of urunc #565 (inject missing image annotations into +// bundle config.json). Keep this file self-contained so it can be dropped or +// reconciled when #565 merges. +package containerdshim + +import ( + "context" + "encoding/base64" + "encoding/json" + "fmt" + "os" + "path/filepath" + + taskAPI "github.com/containerd/containerd/api/runtime/task/v2" + specs "github.com/opencontainers/runtime-spec/specs-go" + shimcontainerd "github.com/urunc-dev/urunc/pkg/containerd-shim/containerd" +) + +const uruncAnnotationPrefix = "com.urunc.unikernel." + +func (s *taskService) injectMissingAnnotations(ctx context.Context, r *taskAPI.CreateTaskRequest, session *shimcontainerd.Session) error { + configPath := filepath.Join(r.Bundle, "config.json") + info, err := os.Stat(configPath) + if err != nil { + return fmt.Errorf("stat config.json: %w", err) + } + + data, err := os.ReadFile(configPath) + if err != nil { + return fmt.Errorf("read config.json: %w", err) + } + + var spec specs.Spec + if err := json.Unmarshal(data, &spec); err != nil { + return fmt.Errorf("unmarshal config.json: %w", err) + } + if spec.Annotations == nil { + spec.Annotations = make(map[string]string) + } + + imageAnnots, err := session.ImageAnnotations(ctx, uruncAnnotationPrefix) + if err != nil { + return err + } + + changed := false + for key, value := range imageAnnots { + if _, ok := spec.Annotations[key]; ok { + continue + } + spec.Annotations[key] = base64.StdEncoding.EncodeToString([]byte(value)) + changed = true + } + if !changed { + return nil + } + + out, err := json.MarshalIndent(&spec, "", " ") + if err != nil { + return fmt.Errorf("marshal config.json: %w", err) + } + if err := os.WriteFile(configPath, out, info.Mode()); err != nil { + return fmt.Errorf("write config.json: %w", err) + } + + return nil +} diff --git a/pkg/containerd-shim/task_service.go b/pkg/containerd-shim/task_service.go index 70fb44a7..690fb6d2 100644 --- a/pkg/containerd-shim/task_service.go +++ b/pkg/containerd-shim/task_service.go @@ -16,9 +16,12 @@ package containerdshim import ( "context" + "errors" taskAPI "github.com/containerd/containerd/api/runtime/task/v2" "github.com/containerd/ttrpc" + "github.com/sirupsen/logrus" + shimcontainerd "github.com/urunc-dev/urunc/pkg/containerd-shim/containerd" ) // taskService is urunc's shim-side wrapper around containerd's runc task @@ -31,6 +34,29 @@ type taskService struct { } func (s *taskService) Create(ctx context.Context, r *taskAPI.CreateTaskRequest) (*taskAPI.CreateTaskResponse, error) { + session, err := shimcontainerd.OpenSession(ctx, s.containerdAddress, r.ID) + if err != nil { + logrus.WithError(err).WithField("container_id", r.ID).Warn("urunc shim: failed to open containerd session") + } else { + defer session.Close() + } + + // #565: merge image metadata into bundle config.json when spec lacks urunc keys. + if session != nil { + if err := s.injectMissingAnnotations(ctx, r, session); err != nil { + logrus.WithError(err).WithField("container_id", r.ID).Warn("urunc shim: failed to inject missing annotations") + } + } + + // #684: ChooseRootfs in shim and persist params in bundle before inner task Create. + if err := chooseGuestRootfs(r); err != nil { + if errors.Is(err, errGuestRootfsChoiceSkipped) { + logrus.WithField("container_id", r.ID).Debug("urunc shim: guest rootfs choice skipped") + return s.TaskService.Create(ctx, r) + } + logrus.WithError(err).Warn("urunc shim: failed to choose guest rootfs") + return nil, err + } return s.TaskService.Create(ctx, r) } diff --git a/pkg/unikontainers/config.go b/pkg/unikontainers/config.go index 7751aa27..e2c5403b 100644 --- a/pkg/unikontainers/config.go +++ b/pkg/unikontainers/config.go @@ -116,6 +116,21 @@ func GetUnikernelConfig(bundleDir string, spec *specs.Spec) (*UnikernelConfig, e return jsonConf, nil } +// GetUnikernelConfigFromSpecAnnotations retrieves urunc configuration only +// from OCI spec annotations. Unlike GetUnikernelConfig, it does not fall back +// to files inside the container rootfs, so it is safe to call from the shim +// before containerd mounts the task rootfs. +func GetUnikernelConfigFromSpecAnnotations(spec *specs.Spec) (*UnikernelConfig, error) { + conf := getConfigFromSpec(spec) + if err := conf.validate(); err != nil { + return nil, err + } + if err := conf.decode(); err != nil { + return nil, err + } + return conf, nil +} + // getConfigFromSpec retrieves the urunc specific annotations from the spec and populates the Unikernel config. func getConfigFromSpec(spec *specs.Spec) *UnikernelConfig { unikernelType := spec.Annotations[annotType] diff --git a/pkg/unikontainers/rootfs.go b/pkg/unikontainers/rootfs.go index 0fd7e38d..d51d620e 100644 --- a/pkg/unikontainers/rootfs.go +++ b/pkg/unikontainers/rootfs.go @@ -15,19 +15,27 @@ package unikontainers import ( + "encoding/base64" + "encoding/json" + "errors" "fmt" "os" "path/filepath" "strconv" + "strings" "golang.org/x/sys/unix" + "github.com/urunc-dev/urunc/pkg/unikontainers/hypervisors" "github.com/urunc-dev/urunc/pkg/unikontainers/types" + "github.com/urunc-dev/urunc/pkg/unikontainers/unikernels" ) // TODO: Find and set the correct size for the tmpfs in the host const tmpfsSizeForNoRootfs = "65536k" +const annotInternalRootfsParams = "com.urunc.internal.rootfs.params" + type rootfsBuilder interface { preSetup() error postSetup() error @@ -38,12 +46,84 @@ type rootfsBuilder interface { // rootfsSelector encapsulates the context for rootfs selection type rootfsSelector struct { - bundle string - cntrRootfs string - annot map[string]string - unikernel types.Unikernel - vmm types.VMM - vfsdPath string + cntrRootfs string + annot map[string]string + unikernel types.Unikernel + vmm types.VMM + vfsdPath string + containerRootfsBlock *types.BlockDevParams +} + +// RootfsMount describes a single rootfs mount from a CreateTask request. +type RootfsMount struct { + Type string + Source string +} + +func resolveBundleRootfs(bundle, specRoot string) (string, error) { + return resolveAgainstBase(filepath.Clean(bundle), filepath.Clean(specRoot)) +} + +// BlockFromRootfsMounts returns block device parameters when CreateTask supplies +// a single device-backed rootfs mount supported by the guest unikernel. +func BlockFromRootfsMounts(mounts []RootfsMount, rootfsDir, unikernelType string) *types.BlockDevParams { + if len(mounts) != 1 { + return nil + } + + m := mounts[0] + switch m.Type { + case "", "overlay", "tmpfs", "bind": + return nil + } + if !strings.HasPrefix(m.Source, "/dev/") { + return nil + } + + unikernel, err := unikernels.New(unikernelType) + if err != nil || !unikernel.SupportsFS(m.Type) { + return nil + } + + return &types.BlockDevParams{ + Source: m.Source, + FsType: m.Type, + MountPoint: rootfsDir, + } +} + +// ChooseRootfs selects guest rootfs parameters for a unikernel container from +// bundle layout and annotations. specRoot may be relative to bundle. rootfsMounts +// carries CreateTask mounts from the shim; when nil, block backing may be probed +// from the live container rootfs mount. +func ChooseRootfs(bundle, specRoot string, annot map[string]string, cfg *UruncConfig, rootfsMounts []RootfsMount) (types.RootfsParams, error) { + bundleDir := filepath.Clean(bundle) + rootfsDir, err := resolveBundleRootfs(bundleDir, specRoot) + if err != nil { + uniklog.Errorf("could not resolve rootfs directory %s: %v", rootfsDir, err) + return types.RootfsParams{}, err + } + + var containerRootfsBlock *types.BlockDevParams + if len(rootfsMounts) > 0 { + containerRootfsBlock = BlockFromRootfsMounts(rootfsMounts, rootfsDir, annot[annotType]) + } + if containerRootfsBlock == nil && shouldMountContainerRootfs(annot) { + unikernelPreview, err := unikernels.New(annot[annotType]) + if err != nil { + return types.RootfsParams{}, err + } + if unikernelPreview.SupportsBlock() { + rootFsDevice, err := getMountInfo(rootfsDir) + if err == nil { + containerRootfsBlock = &rootFsDevice + } else if !errors.Is(err, ErrMountpoint) { + uniklog.Errorf("failed to get container's rootfs mount info: %v", err) + } + } + } + + return selectRootfs(bundleDir, rootfsDir, annot, cfg, containerRootfsBlock) } type noRootfs struct { @@ -131,7 +211,11 @@ func (rs *rootfsSelector) tryExplicitBlock() (types.RootfsParams, bool) { // shouldMountContainerRootfs checks if container rootfs should be mounted // based on the respective annotation func (rs *rootfsSelector) shouldMountContainerRootfs() bool { - annotValue := rs.annot[annotMountRootfs] + return shouldMountContainerRootfs(rs.annot) +} + +func shouldMountContainerRootfs(annot map[string]string) bool { + annotValue := annot[annotMountRootfs] if annotValue == "" { return false } @@ -152,12 +236,11 @@ func (rs *rootfsSelector) tryContainerBlockRootfs() (types.RootfsParams, bool) { return types.RootfsParams{}, false } - rootFsDevice, err := getMountInfo(rs.cntrRootfs) - if err != nil { - uniklog.Errorf("failed to get container's rootfs mount info: %v", err) + if rs.containerRootfsBlock == nil { return types.RootfsParams{}, false } + rootFsDevice := *rs.containerRootfsBlock if !rs.unikernel.SupportsFS(rootFsDevice.FsType) { return types.RootfsParams{}, false } @@ -235,15 +318,109 @@ func (rs *rootfsSelector) tryContainerRootfs() (types.RootfsParams, bool) { return types.RootfsParams{}, false } -func switchMonRootfs(res types.RootfsParams, bundle string) (types.RootfsParams, error) { - monRootfs := filepath.Join(bundle, monitorRootfsDirName) - err := os.MkdirAll(monRootfs, 0o755) +// switchMonRootfs records where the monitor process rootfs will live under the +// bundle. It does not create or mount that directory; prepareMonRootfs does that +// later during runtime Exec. +func switchMonRootfs(res types.RootfsParams, bundle string) types.RootfsParams { + res.MonRootfs = filepath.Join(bundle, monitorRootfsDirName) + return res +} + +// selectRootfs determines the guest rootfs without creating, mounting, or copying +// any files. bundle and cntrRootfs must already be resolved absolute paths. +func selectRootfs(bundle, cntrRootfs string, annot map[string]string, cfg *UruncConfig, containerRootfsBlock *types.BlockDevParams) (types.RootfsParams, error) { + if cfg == nil { + return types.RootfsParams{}, fmt.Errorf("urunc config is required for guest rootfs selection") + } + + unikernel, err := unikernels.New(annot[annotType]) + if err != nil { + return types.RootfsParams{}, err + } + + vmm, err := hypervisors.NewVMM(hypervisors.VmmType(annot[annotHypervisor]), cfg.Monitors) + if err != nil { + return types.RootfsParams{}, err + } + + vfsdPath := "" + if bin, ok := cfg.ExtraBins["virtiofsd"]; ok { + vfsdPath = bin.Path + } + + selector := &rootfsSelector{ + cntrRootfs: cntrRootfs, + annot: annot, + unikernel: unikernel, + vmm: vmm, + vfsdPath: vfsdPath, + containerRootfsBlock: containerRootfsBlock, + } + + result, ok := selector.tryInitrd() + if ok { + return result, nil + } + + result, ok = selector.tryExplicitBlock() + if ok { + return result, nil + } + + result, ok = selector.tryContainerRootfs() + if ok { + return switchMonRootfs(result, bundle), nil + } + + if selector.shouldMountContainerRootfs() { + return types.RootfsParams{}, fmt.Errorf("can not use the container rootfs as the sandbox's guest rootfs through block or shared-fs") + } + + uniklog.Info("no rootfs configured for guest") + result.MonRootfs = cntrRootfs + + return result, nil +} + +// RootfsParamsAnnotation returns the internal annotation key used to hand +// shim-selected rootfs parameters to the runtime. +func RootfsParamsAnnotation() string { + return annotInternalRootfsParams +} + +// EncodeRootfsParams serializes rootfs parameters for storage in an OCI +// annotation. +func EncodeRootfsParams(rootfs types.RootfsParams) (string, error) { + data, err := json.Marshal(rootfs) if err != nil { - return types.RootfsParams{}, fmt.Errorf("failed to create monitor rootfs directory %s: %w", monRootfs, err) + return "", err + } + return base64.StdEncoding.EncodeToString(data), nil +} + +// DecodeRootfsParams reads rootfs parameters from OCI annotations. The bool +// return value reports whether the internal annotation was present. +func DecodeRootfsParams(annotations map[string]string) (types.RootfsParams, bool, error) { + encoded := annotations[annotInternalRootfsParams] + if encoded == "" { + return types.RootfsParams{}, false, nil + } + + data, err := base64.StdEncoding.DecodeString(encoded) + if err != nil { + return types.RootfsParams{}, true, err + } + + var rootfs types.RootfsParams + if err := json.Unmarshal(data, &rootfs); err != nil { + return types.RootfsParams{}, true, err + } + + if rootfs.MonRootfs == "" { + return types.RootfsParams{}, true, fmt.Errorf("rootfs params annotation is missing monitor rootfs") } - res.MonRootfs = monRootfs - return res, nil + return rootfs, true, nil } // pivotRootfs changes rootfs with pivot diff --git a/pkg/unikontainers/unikontainers.go b/pkg/unikontainers/unikontainers.go index b7d61def..d14c537d 100644 --- a/pkg/unikontainers/unikontainers.go +++ b/pkg/unikontainers/unikontainers.go @@ -231,73 +231,6 @@ func (u *Unikontainer) SetupNet() (types.NetDevParams, error) { return netArgs, nil } -// chooseRootfs determines the best rootfs configuration based on available options -// Priority order: -// 1. Initrd (if specified) -// 2. Explicit block device annotation (if mounted at /) -// 3. Container rootfs as block device (if MountRootfs=true and supported) -// 4. Container rootfs as shared-fs: virtiofs > 9pfs (if MountRootfs=true and supported) -// 5. No rootfs -func (u *Unikontainer) chooseRootfs() (types.RootfsParams, error) { - bundleDir := filepath.Clean(u.State.Bundle) - rootfsDir := filepath.Clean(u.Spec.Root.Path) - rootfsDir, err := resolveAgainstBase(bundleDir, rootfsDir) - if err != nil { - uniklog.Errorf("could not resolve rootfs directory %s: %v", rootfsDir, err) - return types.RootfsParams{}, err - } - - unikernelType := u.State.Annotations[annotType] - unikernel, err := unikernels.New(unikernelType) - if err != nil { - return types.RootfsParams{}, err - } - - vmmType := u.State.Annotations[annotHypervisor] - vmm, err := hypervisors.NewVMM(hypervisors.VmmType(vmmType), u.UruncCfg.Monitors) - if err != nil { - return types.RootfsParams{}, err - } - - virtiofsdConfig := u.UruncCfg.ExtraBins["virtiofsd"] - - selector := &rootfsSelector{ - bundle: bundleDir, - cntrRootfs: rootfsDir, - annot: u.State.Annotations, - unikernel: unikernel, - vmm: vmm, - vfsdPath: virtiofsdConfig.Path, - } - - // Priority 1: Initrd - result, ok := selector.tryInitrd() - if ok { - return result, nil - } - - // Priority 2: Explicit block annotation - result, ok = selector.tryExplicitBlock() - if ok { - return result, nil - } - - // Priority 3 & 4: Container rootfs (block or shared-fs) - result, ok = selector.tryContainerRootfs() - if ok { - return switchMonRootfs(result, bundleDir) - } - - if selector.shouldMountContainerRootfs() { - return types.RootfsParams{}, fmt.Errorf("can not use the container rootfs as the sandbox's guest rootfs through block or shared-fs") - } - - uniklog.Info("no rootfs configured for guest") - result.MonRootfs = rootfsDir - - return result, nil -} - // nolint:gocyclo func (u *Unikontainer) Exec(metrics m.Writer) error { metrics.Capture(m.TS15) @@ -426,10 +359,36 @@ func (u *Unikontainer) Exec(metrics m.Writer) error { // if the respective annotation is set then, depending on the guest // (supports block or 9pfs), it will use the supported option. In case // both ae supported, then the block option will be used by default. - rootfsParams, err := u.chooseRootfs() + // + // Guest rootfs may already be chosen by the urunc shim and stored in the + // bundle config.json (com.urunc.internal.rootfs.params). When that annotation + // is absent, select here instead (e.g. podman or other runtimes that call urunc + // directly). MonRootfs directory creation still happens below in this Exec path. + // + // Decode from State.Annotations only: the shim writes the internal key to + // Spec (config.json), and saveContainerState copies missing Spec keys into + // state.json during create/InitialSetup before reexec reaches Exec. + rootfsParams, shimPreselected, err := DecodeRootfsParams(u.State.Annotations) if err != nil { - uniklog.Errorf("could not choose guest rootfs: %v", err) - return err + return fmt.Errorf("could not decode shim-provided guest rootfs params: %w", err) + } + if !shimPreselected { + uniklog.Debug("No shim-provided guest rootfs params found; selecting rootfs in runtime") + specRoot := "" + if u.Spec.Root != nil { + specRoot = u.Spec.Root.Path + } + rootfsParams, err = ChooseRootfs(u.State.Bundle, specRoot, u.State.Annotations, u.UruncCfg, nil) + if err != nil { + uniklog.Errorf("could not choose guest rootfs: %v", err) + return err + } + } else { + uniklog.WithFields(logrus.Fields{ + "rootfs_type": rootfsParams.Type, + "rootfs_path": rootfsParams.Path, + "mon_rootfs": rootfsParams.MonRootfs, + }).Info("Using shim-provided guest rootfs params") } // TODO: Add support for using both an existing @@ -479,6 +438,13 @@ func (u *Unikontainer) Exec(metrics m.Writer) error { } } + if rootfsParams.MonRootfs == filepath.Join(bundleDir, monitorRootfsDirName) { + err = os.MkdirAll(rootfsParams.MonRootfs, 0o755) + if err != nil { + return fmt.Errorf("failed to create monitor rootfs directory %s: %w", rootfsParams.MonRootfs, err) + } + } + err = rfsBuilder.preSetup() if err != nil { return fmt.Errorf("pre setup step for rootfs failed: %w", err)