diff --git a/Makefile b/Makefile index 3d3e1ab7..9642b4cb 100644 --- a/Makefile +++ b/Makefile @@ -131,7 +131,7 @@ functests-shared: $(TEST_IMG) .PHONY: ebpf ebpf: $(RUN) $(SUDO) go generate ./internal/datapath - $(RUN) $(SUDO) go test ./internal/datapath -run TestVerifier -count 1 + $(RUN) $(SUDO) go test -tags bpf ./internal/datapath -run TestVerifier -count 1 $(TEST_IMG): @docker build -t test-img -f $(IMG_TESTDATA_DIR)/Dockerfile $(IMG_TESTDATA_DIR) diff --git a/cmd/platformd/main.go b/cmd/platformd/main.go index 8492a7b7..9c4d66dc 100644 --- a/cmd/platformd/main.go +++ b/cmd/platformd/main.go @@ -25,34 +25,33 @@ func main() { mgmtListenSock = fs.String("management-server-listen-sock", "/run/platformd/platformd.sock", "path to the unix domain socket to listen on") //nolint:lll mgmtSockUID = fs.Uint64("management-server-listen-sock-uid", 9012, "unix domain socket uid") mgmtSockGID = fs.Uint64("management-server-listen-sock-gid", 9012, "unix domain socket gid") - criListenSock = fs.String("cri-listen-sock", "/var/run/crio/crio.sock", "path to the unix domain socket the CRI is listening on") //nolint:lll - envoyImage = fs.String("envoy-image", "", "container image to use for envoy") //nolint:lll - coreDNSImage = fs.String("coredns-image", "", "container image to use for CoreDNS") //nolint:lll - getsockoptCgroup = fs.String("getsockopt-cgroup", "", "container image to use for coredns") //nolint:lll - dnsServer = fs.String("dns-server", "", "dns server used by the containers") //nolint:lll - hostIface = fs.String("host-iface", "", "internet-facing network interface for ingress and egress traffic") //nolint:lll - maxAttempts = fs.Uint("max-attempts", 5, "maximum number of attempts workload creation attempts") //nolint:lll - syncInterval = fs.Duration("sync-interval", 200*time.Millisecond, "i") //nolint:lll - nodeID = fs.String("node-id", "", "unique node id") //nolint:lll - minPort = fs.Uint("min-port", 30000, "start of the port range") //nolint:lll - maxPort = fs.Uint("max-port", 40000, "end of the port range") //nolint:lll - workloadNamespace = fs.String("workload-namespace", "", "namespace where the workload is deployed") //nolint:lll - registryEndpoint = fs.String("registry-endpoint", "", "registry endpoint where base images will be pulled from and checkpoints pushed to") //nolint:lll - registryUser = fs.String("registry-user", "", "user for the registry") //nolint:lll - registryPass = fs.String("registry-password", "", "password for the registry") //nolint:lll - controlPlaneEndpoint = fs.String("control-plane-endpoint", "", "control plane endpoint") //nolint:lll - checkCPUPeriod = fs.Uint64("checkpoint-cpu-period", 0, "period of checking CPU period") //nolint:lll - checkCPUQuota = fs.Uint64("checkpoint-cpu-quota", 0, "quota of checking CPU quota") //nolint:lll - checkMemoryLimitInBytes = fs.Uint64("checkpoint-memory-limit-bytes", 0, "memory limit of the container that will be checkpointed") //nolint:lll - checkLocationDir = fs.String("checkpoint-file-dir", "/tmp/platformd", "directory where checkpoint files will be stored") //nolint:lll - checkTimeout = fs.Uint64("checkpoint-timeout-seconds", 60, "timeout for checkpoint creation") //nolint:lll - checkListenAddr = fs.String("checkpoint-listen-addr", "", "timeout for checkpoint creation") //nolint:lll - checkStatusRetentionDuration = fs.Duration("checkpoint-status-retention-period", 1*time.Minute, "timeout for checkpoint creation") //nolint:lll - checkContainerReadyTimeout = fs.Duration("checkpoint-container-ready-timeout", 1*time.Minute, "maximum time to wait until the container is ready for checkpointing") //nolint:lll - checkWaitAfterServerInit = fs.Duration("checkpoint-wait-server-init", 10*time.Second, "how long to wait before performing a checkpoint after server has initialized ") //nolint:lll - mcServerManagementAPIToken = fs.String("mc-server-management-api-token", "", "token to use for the minecraft server management api") //nolint:lll - serverMonImage = fs.String("servermon-image", "", "image to use for the servermon container") //nolint:lll - _ = fs.String("config", "/etc/platformd/config.json", "path to the config file") //nolint:lll + criListenSock = fs.String("cri-listen-sock", "/var/run/crio/crio.sock", "path to the unix domain socket the CRI is listening on") //nolint:lll + envoyImage = fs.String("envoy-image", "", "container image to use for envoy") //nolint:lll + coreDNSImage = fs.String("coredns-image", "", "container image to use for CoreDNS") //nolint:lll + getsockoptCgroup = fs.String("getsockopt-cgroup", "", "container image to use for coredns") //nolint:lll + dnsServer = fs.String("dns-server", "", "dns server used by the containers") //nolint:lll + hostIface = fs.String("host-iface", "", "internet-facing network interface for ingress and egress traffic") //nolint:lll + maxAttempts = fs.Uint("max-attempts", 5, "maximum number of attempts workload creation attempts") //nolint:lll + syncInterval = fs.Duration("sync-interval", 200*time.Millisecond, "i") //nolint:lll + nodeID = fs.String("node-id", "", "unique node id") //nolint:lll + minPort = fs.Uint("min-port", 30000, "start of the port range") //nolint:lll + maxPort = fs.Uint("max-port", 40000, "end of the port range") //nolint:lll + workloadNamespace = fs.String("workload-namespace", "", "namespace where the workload is deployed") //nolint:lll + registryEndpoint = fs.String("registry-endpoint", "", "registry endpoint where base images will be pulled from and checkpoints pushed to") //nolint:lll + registryUser = fs.String("registry-user", "", "user for the registry") //nolint:lll + registryPass = fs.String("registry-password", "", "password for the registry") //nolint:lll + controlPlaneEndpoint = fs.String("control-plane-endpoint", "", "control plane endpoint") //nolint:lll + checkCPUPeriod = fs.Uint64("checkpoint-cpu-period", 0, "period of checking CPU period") //nolint:lll + checkCPUQuota = fs.Uint64("checkpoint-cpu-quota", 0, "quota of checking CPU quota") //nolint:lll + checkMemoryLimitInBytes = fs.Uint64("checkpoint-memory-limit-bytes", 0, "memory limit of the container that will be checkpointed") //nolint:lll + checkLocationDir = fs.String("checkpoint-file-dir", "/tmp/platformd", "directory where checkpoint files will be stored") //nolint:lll + checkTimeout = fs.Uint64("checkpoint-timeout-seconds", 60, "timeout for checkpoint creation") //nolint:lll + checkListenAddr = fs.String("checkpoint-listen-addr", "", "timeout for checkpoint creation") //nolint:lll + checkStatusRetentionDuration = fs.Duration("checkpoint-status-retention-period", 1*time.Minute, "timeout for checkpoint creation") //nolint:lll + checkContainerReadyTimeout = fs.Duration("checkpoint-container-ready-timeout", 1*time.Minute, "maximum time to wait until the container is ready for checkpointing") //nolint:lll + mcServerManagementAPIToken = fs.String("mc-server-management-api-token", "", "token to use for the minecraft server management api") //nolint:lll + serverMonImage = fs.String("servermon-image", "", "image to use for the servermon container") //nolint:lll + _ = fs.String("config", "/etc/platformd/config.json", "path to the config file") //nolint:lll ) if err := ff.Parse(fs, os.Args[1:], ff.WithEnvVarPrefix("PLATFORMD"), @@ -97,7 +96,6 @@ func main() { ListenAddr: *checkListenAddr, StatusRetentionPeriod: *checkStatusRetentionDuration, ContainerReadyTimeout: *checkContainerReadyTimeout, - WaitAfterServerInit: *checkWaitAfterServerInit, }, ManagementSocketUID: *mgmtSockUID, ManagementSocketGID: *mgmtSockGID, diff --git a/internal/datapath/objects.go b/internal/datapath/objects.go index 9194794b..e112833e 100644 --- a/internal/datapath/objects.go +++ b/internal/datapath/objects.go @@ -127,8 +127,9 @@ func (o *Objects) BlockIP4Connections(cgroupPath string) error { Attach: ebpf.AttachCGroupInet4Connect, Path: cgroupPath, }); err != nil { - return err + return fmt.Errorf("attach: %w", err) } + return nil } @@ -138,8 +139,9 @@ func (o *Objects) BlockIP6Connections(cgroupPath string) error { Attach: ebpf.AttachCGroupInet6Connect, Path: cgroupPath, }); err != nil { - return err + return fmt.Errorf("attach: %w", err) } + return nil } @@ -155,7 +157,7 @@ func (o *Objects) AttachAndPinSNAT(iface *net.Interface) error { // pin because cni is short-lived if err := l.Pin(fmt.Sprintf("%s/snat_%s", ProgPinPath, iface.Name)); err != nil { - return fmt.Errorf("pin link: %w", err) + return fmt.Errorf("pin: %w", err) } return nil @@ -177,7 +179,7 @@ func (o *Objects) AttachAndPinDNAT(iface *net.Interface) error { // TODO: update prog return nil } - return fmt.Errorf("pin link: %w", err) + return fmt.Errorf("pin: %w", err) } return nil @@ -195,7 +197,7 @@ func (o *Objects) AttachAndPinARP(iface *net.Interface) error { // pin because cni is short-lived if err := l.Pin(fmt.Sprintf("%s/arp_%s", ProgPinPath, iface.Name)); err != nil { - return fmt.Errorf("pin link: %w", err) + return fmt.Errorf("pin: %w", err) } return nil diff --git a/internal/datapath/sock_bpfeb.o b/internal/datapath/sock_bpfeb.o index 10843cc0..f309b7ba 100644 Binary files a/internal/datapath/sock_bpfeb.o and b/internal/datapath/sock_bpfeb.o differ diff --git a/internal/datapath/sock_bpfel.o b/internal/datapath/sock_bpfel.o index 7a9f5a9e..ed48eb3a 100644 Binary files a/internal/datapath/sock_bpfel.o and b/internal/datapath/sock_bpfel.o differ diff --git a/platformd/checkpoint/config.go b/platformd/checkpoint/config.go index 751ed676..d6f47d3e 100644 --- a/platformd/checkpoint/config.go +++ b/platformd/checkpoint/config.go @@ -33,5 +33,4 @@ type Config struct { ListenAddr string StatusRetentionPeriod time.Duration ContainerReadyTimeout time.Duration - WaitAfterServerInit time.Duration } diff --git a/platformd/checkpoint/service.go b/platformd/checkpoint/service.go index 0a932acd..f49e6906 100644 --- a/platformd/checkpoint/service.go +++ b/platformd/checkpoint/service.go @@ -274,17 +274,16 @@ func (s *ServiceImpl) checkpoint(ctx context.Context, id string, baseRef name.Re return fmt.Errorf("find netns: %w", err) } + // sockets in state TCP_TIME_WAIT and TCP_CLOSE_WAIT, will be a left-over + // because our ebpf programs will not be able to close them. + // + // in order to make restoring the checkpoint work, we have to specify + // the --tcp-close option in /etc/criu/crun.conf to make it work. + // see https://criu.org/CLI/opt/--tcp-close for reference. if err := s.sockHandler.DestroySocks(netnsPath); err != nil { return fmt.Errorf("kill sockets: %w", err) } - // wait a little after closing all sockets, as it seems that if we directly checkpoint we still get: - // - // Error (criu/sk-inet.c:191): inet: Connected TCP socket, consider using --tcp-established option. - // - // linux might take some time to close all sockets eventually. - time.Sleep(s.cfg.WaitAfterServerInit) - logger.InfoContext(ctx, "checkpointing container", "container_id", ctrID) if _, err := s.criService.CheckpointContainer(ctx, &runtimev1.CheckpointContainerRequest{ diff --git a/platformd/config.go b/platformd/config.go index 4d72970f..7f3361c6 100644 --- a/platformd/config.go +++ b/platformd/config.go @@ -34,7 +34,6 @@ type Config struct { ListenAddr string StatusRetentionPeriod time.Duration ContainerReadyTimeout time.Duration - WaitAfterServerInit time.Duration } ManagementSocketUID uint64 ManagementSocketGID uint64 diff --git a/platformd/server.go b/platformd/server.go index 082497de..fd6529b1 100644 --- a/platformd/server.go +++ b/platformd/server.go @@ -145,7 +145,6 @@ func (s *Server) Run(ctx context.Context, cfg Config) error { ListenAddr: cfg.CheckpointConfig.ListenAddr, StatusRetentionPeriod: cfg.CheckpointConfig.StatusRetentionPeriod, ContainerReadyTimeout: cfg.CheckpointConfig.ContainerReadyTimeout, - WaitAfterServerInit: cfg.CheckpointConfig.WaitAfterServerInit, }, criSvc, image.NewService(checkSvcLogger, cfg.RegistryUser, cfg.RegistryPass, "/tmp"),