-
Notifications
You must be signed in to change notification settings - Fork 280
[shimV2] added network controller implementation #2633
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
rawahars
wants to merge
3
commits into
microsoft:main
Choose a base branch
from
rawahars:network-controller
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
+709
−108
Open
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,41 @@ | ||
| //go:build windows | ||
|
|
||
| // Package network provides a controller for managing the network lifecycle of a pod | ||
| // running inside a Utility VM (UVM). | ||
| // | ||
| // It handles attaching an HCN namespace and its endpoints to the guest VM, | ||
| // and tearing them down on pod removal. | ||
| // | ||
| // # Lifecycle | ||
| // | ||
| // A network follows the state machine below. | ||
| // | ||
| // ┌────────────────────┐ | ||
| // │ StateNotConfigured │ | ||
| // └───┬────────────┬───┘ | ||
| // Setup ok │ │ Setup fails | ||
| // ▼ ▼ | ||
| // ┌─────────────────┐ ┌──────────────┐ | ||
| // │ StateConfigured │ │ StateInvalid │ | ||
| // └────────┬────────┘ └──────┬───────┘ | ||
| // │ Teardown │ Teardown | ||
| // ▼ ▼ | ||
| // ┌─────────────────────────────────────┐ | ||
| // │ StateTornDown │ | ||
| // └─────────────────────────────────────┘ | ||
| // | ||
| // State descriptions: | ||
| // | ||
| // - [StateNotConfigured]: initial state; no namespace or NICs have been configured. | ||
| // - [StateConfigured]: after [Controller.Setup] succeeds; the HCN namespace is attached | ||
| // and all endpoints are wired up inside the guest. | ||
| // - [StateInvalid]: entered when [Controller.Setup] fails mid-way; best-effort | ||
| // cleanup should be performed via [Controller.Teardown]. | ||
| // - [StateTornDown]: terminal state reached after [Controller.Teardown] completes. | ||
| // | ||
| // # Platform Variants | ||
| // | ||
| // Guest-side operations differ between LCOW and WCOW and are implemented in | ||
| // platform-specific source files selected via build tags | ||
| // (default for LCOW shim, "wcow" tag for WCOW shim). | ||
| package network |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,239 @@ | ||
| //go:build windows | ||
|
|
||
| package network | ||
|
|
||
| import ( | ||
| "context" | ||
| "errors" | ||
| "fmt" | ||
| "slices" | ||
| "strings" | ||
| "sync" | ||
|
|
||
| "github.com/Microsoft/go-winio/pkg/guid" | ||
| "github.com/Microsoft/hcsshim/hcn" | ||
| "github.com/Microsoft/hcsshim/internal/log" | ||
| "github.com/Microsoft/hcsshim/internal/logfields" | ||
| "github.com/sirupsen/logrus" | ||
| ) | ||
|
|
||
| type Controller struct { | ||
| mu sync.Mutex | ||
|
|
||
| // namespaceID is the HCN namespace ID in use after a successful Setup. | ||
| namespaceID string | ||
|
|
||
| // vmEndpoints maps nicID (ID within UVM) -> HCN endpoint. | ||
| vmEndpoints map[string]*hcn.HostComputeEndpoint | ||
|
|
||
| // netState is the current lifecycle state of the network. | ||
| netState State | ||
|
|
||
| // isNamespaceSupportedByGuest determines if network namespace is supported inside the guest | ||
| isNamespaceSupportedByGuest bool | ||
|
|
||
| // vmNetManager performs host-side NIC hot-add/remove on the UVM. | ||
| vmNetManager vmNetworkManager | ||
|
|
||
| // linuxGuestMgr performs guest-side NIC inject/remove for LCOW. | ||
| linuxGuestMgr linuxGuestNetworkManager | ||
|
|
||
| // winGuestMgr performs guest-side NIC/namespace operations for WCOW. | ||
| winGuestMgr windowsGuestNetworkManager | ||
|
|
||
| // capsProvider exposes the guest's declared capabilities. | ||
| // Used to check IsNamespaceAddRequestSupported. | ||
| capsProvider capabilitiesProvider | ||
| } | ||
|
|
||
| // New creates a ready-to-use Controller in [StateNotConfigured]. | ||
| func New( | ||
| vmNetManager vmNetworkManager, | ||
| linuxGuestMgr linuxGuestNetworkManager, | ||
| windowsGuestMgr windowsGuestNetworkManager, | ||
| capsProvider capabilitiesProvider, | ||
| ) *Controller { | ||
| m := &Controller{ | ||
| vmNetManager: vmNetManager, | ||
| linuxGuestMgr: linuxGuestMgr, | ||
| winGuestMgr: windowsGuestMgr, | ||
| capsProvider: capsProvider, | ||
| netState: StateNotConfigured, | ||
| vmEndpoints: make(map[string]*hcn.HostComputeEndpoint), | ||
| } | ||
|
|
||
| // Cache once at construction so hot-add paths can branch without re-querying. | ||
| if caps := capsProvider.Capabilities(); caps != nil { | ||
| m.isNamespaceSupportedByGuest = caps.IsNamespaceAddRequestSupported() | ||
| } | ||
|
|
||
| return m | ||
| } | ||
|
|
||
| // Setup attaches the requested HCN namespace to the guest VM | ||
| // and hot-adds all endpoints found in that namespace. | ||
| // It must be called only once; subsequent calls return an error. | ||
| func (c *Controller) Setup(ctx context.Context, opts *SetupOptions) (err error) { | ||
| ctx, _ = log.WithContext(ctx, logrus.WithField(logfields.Namespace, opts.NetworkNamespace)) | ||
|
|
||
| c.mu.Lock() | ||
| defer c.mu.Unlock() | ||
|
|
||
| log.G(ctx).Debug("starting network setup") | ||
|
|
||
| // If Setup has already been called, then error out. | ||
| if c.netState != StateNotConfigured { | ||
| return fmt.Errorf("cannot set up network in state %s", c.netState) | ||
| } | ||
|
|
||
| defer func() { | ||
| if err != nil { | ||
| // If setup fails for any reason, move to invalid so no further | ||
| // Setup calls are accepted. | ||
| c.netState = StateInvalid | ||
| log.G(ctx).WithError(err).Error("network setup failed, moving to invalid state") | ||
| } | ||
| }() | ||
|
|
||
| if opts.NetworkNamespace == "" { | ||
| return fmt.Errorf("network namespace must not be empty") | ||
| } | ||
|
|
||
| // Validate that the provided namespace exists. | ||
| hcnNamespace, err := hcn.GetNamespaceByID(opts.NetworkNamespace) | ||
| if err != nil { | ||
| return fmt.Errorf("get network namespace %s: %w", opts.NetworkNamespace, err) | ||
| } | ||
|
|
||
| // Fetch all endpoints in the namespace. | ||
| endpoints, err := c.fetchEndpointsInNamespace(ctx, hcnNamespace) | ||
| if err != nil { | ||
| return fmt.Errorf("fetch endpoints in namespace %s: %w", hcnNamespace.Id, err) | ||
| } | ||
|
|
||
| // Add the namespace to the guest. | ||
| if err = c.addNetNSInsideGuest(ctx, hcnNamespace); err != nil { | ||
| return fmt.Errorf("add network namespace to guest: %w", err) | ||
| } | ||
|
|
||
| // Hot-add all endpoints in the namespace to the guest. | ||
| for _, endpoint := range endpoints { | ||
| nicGUID, err := guid.NewV4() | ||
jterry75 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| if err != nil { | ||
| return fmt.Errorf("generate NIC GUID: %w", err) | ||
| } | ||
| // add the nicID and endpointID to the context for trace. | ||
| nicCtx, _ := log.WithContext(ctx, logrus.WithFields(logrus.Fields{"vm_nic_id": nicGUID.String(), "hns_endpoint_id": endpoint.Id})) | ||
|
|
||
| if err = c.addEndpointToGuestNamespace(nicCtx, nicGUID.String(), endpoint, opts.PolicyBasedRouting); err != nil { | ||
| return fmt.Errorf("add endpoint %s to guest: %w", endpoint.Name, err) | ||
| } | ||
| } | ||
|
|
||
| c.namespaceID = hcnNamespace.Id | ||
| c.netState = StateConfigured | ||
|
|
||
| log.G(ctx).Info("network setup completed successfully") | ||
|
|
||
| return nil | ||
| } | ||
|
|
||
| // Teardown removes all guest-side NICs and the HCN namespace from the UVM. | ||
| // | ||
| // It is idempotent: calling it when the network is already torn down or not yet | ||
| // configured is a no-op. | ||
| func (c *Controller) Teardown(ctx context.Context) error { | ||
| ctx, _ = log.WithContext(ctx, logrus.WithField(logfields.Namespace, c.namespaceID)) | ||
|
|
||
| c.mu.Lock() | ||
| defer c.mu.Unlock() | ||
|
|
||
| log.G(ctx).WithField("State", c.netState).Debug("starting network teardown") | ||
|
|
||
| if c.netState == StateTornDown { | ||
| // Teardown is idempotent, so return nil if already torn down. | ||
| log.G(ctx).Info("network already torn down, skipping") | ||
| return nil | ||
| } | ||
|
|
||
| if c.netState == StateNotConfigured { | ||
| // Nothing was configured; nothing to clean up. | ||
| log.G(ctx).Info("network not configured, skipping") | ||
| return nil | ||
| } | ||
|
|
||
| // Remove all endpoints from the guest. | ||
| // Use a continue-on-error strategy: attempt every NIC regardless of individual | ||
| // failures, then collect all errors. | ||
| var teardownErrs []error | ||
| for nicID, endpoint := range c.vmEndpoints { | ||
| // add the nicID and endpointID to the context for trace. | ||
| nicCtx, _ := log.WithContext(ctx, logrus.WithFields(logrus.Fields{"vm_nic_id": nicID, "hns_endpoint_id": endpoint.Id})) | ||
|
|
||
| if err := c.removeEndpointFromGuestNamespace(nicCtx, nicID, endpoint); err != nil { | ||
| teardownErrs = append(teardownErrs, fmt.Errorf("remove endpoint %s from guest: %w", endpoint.Name, err)) | ||
| continue // continue attempting to remove other endpoints | ||
| } | ||
|
|
||
| delete(c.vmEndpoints, nicID) | ||
| } | ||
|
|
||
| if len(teardownErrs) > 0 { | ||
| // If any errors were encountered during teardown, mark the state as invalid. | ||
| c.netState = StateInvalid | ||
| return errors.Join(teardownErrs...) | ||
| } | ||
|
|
||
| if err := c.removeNetNSInsideGuest(ctx, c.namespaceID); err != nil { | ||
| // Mark the state as invalid so that we can retry teardown. | ||
| c.netState = StateInvalid | ||
| return fmt.Errorf("remove network namespace from guest: %w", err) | ||
| } | ||
|
|
||
| // Mark as torn down if we do not encounter any errors. | ||
| // No further Setup or Teardown calls are allowed. | ||
| c.netState = StateTornDown | ||
|
|
||
| log.G(ctx).Info("network teardown completed successfully") | ||
|
|
||
| return nil | ||
| } | ||
|
|
||
| // fetchEndpointsInNamespace retrieves all HCN endpoints present in | ||
| // the given namespace. | ||
| // Endpoints are sorted so that those with names ending in "eth0" appear first. | ||
| func (c *Controller) fetchEndpointsInNamespace(ctx context.Context, ns *hcn.HostComputeNamespace) ([]*hcn.HostComputeEndpoint, error) { | ||
| log.G(ctx).Info("fetching endpoints from the network namespace") | ||
|
|
||
| ids, err := hcn.GetNamespaceEndpointIds(ns.Id) | ||
| if err != nil { | ||
| return nil, fmt.Errorf("get endpoint IDs for namespace %s: %w", ns.Id, err) | ||
| } | ||
| endpoints := make([]*hcn.HostComputeEndpoint, 0, len(ids)) | ||
| for _, id := range ids { | ||
| ep, err := hcn.GetEndpointByID(id) | ||
| if err != nil { | ||
| return nil, fmt.Errorf("get endpoint %s: %w", id, err) | ||
| } | ||
| endpoints = append(endpoints, ep) | ||
| } | ||
|
|
||
| // Ensure the endpoint named "eth0" is added first when multiple endpoints are present, | ||
| // so it maps to eth0 inside the pod network namespace within guest. | ||
| // CNI results aren't available here, so we rely on the endpoint name suffix as a heuristic. | ||
| cmp := func(a, b *hcn.HostComputeEndpoint) int { | ||
jterry75 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| if strings.HasSuffix(a.Name, "eth0") { | ||
| return -1 | ||
| } | ||
| if strings.HasSuffix(b.Name, "eth0") { | ||
| return 1 | ||
| } | ||
| return 0 | ||
| } | ||
|
|
||
| slices.SortStableFunc(endpoints, cmp) | ||
|
|
||
| log.G(ctx).Tracef("fetched endpoints from the network namespace %+v", endpoints) | ||
|
|
||
| return endpoints, nil | ||
| } | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,93 @@ | ||
| //go:build windows && !wcow | ||
|
|
||
| package network | ||
|
|
||
| import ( | ||
| "context" | ||
| "fmt" | ||
|
|
||
| "github.com/Microsoft/hcsshim/hcn" | ||
| hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" | ||
| "github.com/Microsoft/hcsshim/internal/log" | ||
| "github.com/Microsoft/hcsshim/internal/protocol/guestresource" | ||
| ) | ||
|
|
||
| // addNetNSInsideGuest maps a host network namespace into the guest as a managed Guest Network Namespace. | ||
| // This is a no-op for LCOW as the network namespace is created via pause container | ||
| // and the adapters are added dynamically. | ||
| func (c *Controller) addNetNSInsideGuest(_ context.Context, _ *hcn.HostComputeNamespace) error { | ||
| return nil | ||
| } | ||
|
|
||
| // removeNetNSInsideGuest is a no-op for LCOW; the guest-managed namespace | ||
| // is torn down automatically when pause container exits. | ||
| func (c *Controller) removeNetNSInsideGuest(_ context.Context, _ string) error { | ||
| return nil | ||
| } | ||
|
|
||
| // addEndpointToGuestNamespace hot-adds an HCN endpoint to the UVM and, | ||
| // configures it inside the LCOW guest. | ||
| func (c *Controller) addEndpointToGuestNamespace(ctx context.Context, nicID string, endpoint *hcn.HostComputeEndpoint, isPolicyBasedRoutingSupported bool) error { | ||
| log.G(ctx).Info("adding endpoint to guest namespace") | ||
|
|
||
| // 1. Host-side hot-add. | ||
| if err := c.vmNetManager.AddNIC(ctx, nicID, &hcsschema.NetworkAdapter{ | ||
| EndpointId: endpoint.Id, | ||
| MacAddress: endpoint.MacAddress, | ||
| }); err != nil { | ||
| return fmt.Errorf("add NIC %s to host (endpoint %s): %w", nicID, endpoint.Id, err) | ||
| } | ||
|
|
||
| log.G(ctx).Debug("added NIC to host") | ||
|
|
||
| // Track early so Teardown cleans up even if the guest Add call fails. | ||
| c.vmEndpoints[nicID] = endpoint | ||
|
|
||
| // 2. Guest-side add. | ||
| if c.isNamespaceSupportedByGuest { | ||
| lcowAdapter, err := guestresource.BuildLCOWNetworkAdapter(nicID, endpoint, isPolicyBasedRoutingSupported) | ||
| if err != nil { | ||
| return fmt.Errorf("build LCOW network adapter for endpoint %s: %w", endpoint.Id, err) | ||
| } | ||
|
|
||
| log.G(ctx).Tracef("built LCOW network adapter: %+v", lcowAdapter) | ||
|
|
||
| if err := c.linuxGuestMgr.AddLCOWNetworkInterface(ctx, lcowAdapter); err != nil { | ||
| return fmt.Errorf("add NIC %s to guest (endpoint %s): %w", nicID, endpoint.Id, err) | ||
| } | ||
|
|
||
| log.G(ctx).Debug("nic configured in guest") | ||
| } | ||
|
|
||
| return nil | ||
| } | ||
|
|
||
| // removeEndpointFromGuestNamespace removes an endpoint from the LCOW guest | ||
| // and then hot-removes the NIC from the host. | ||
| func (c *Controller) removeEndpointFromGuestNamespace(ctx context.Context, nicID string, endpoint *hcn.HostComputeEndpoint) error { | ||
| log.G(ctx).Info("removing endpoint from guest namespace") | ||
|
|
||
| if c.isNamespaceSupportedByGuest { | ||
| // 1. LCOW guest-side removal. | ||
| if err := c.linuxGuestMgr.RemoveLCOWNetworkInterface(ctx, &guestresource.LCOWNetworkAdapter{ | ||
| NamespaceID: c.namespaceID, | ||
| ID: nicID, | ||
| }); err != nil { | ||
| return fmt.Errorf("remove NIC %s from guest: %w", nicID, err) | ||
| } | ||
|
|
||
| log.G(ctx).Debug("removed NIC from guest") | ||
| } | ||
|
|
||
| // 2. Host-side removal. | ||
| if err := c.vmNetManager.RemoveNIC(ctx, nicID, &hcsschema.NetworkAdapter{ | ||
| EndpointId: endpoint.Id, | ||
| MacAddress: endpoint.MacAddress, | ||
| }); err != nil { | ||
| return fmt.Errorf("remove NIC %s from host (endpoint %s): %w", nicID, endpoint.Id, err) | ||
| } | ||
|
|
||
| log.G(ctx).Debug("removed NIC from host") | ||
|
|
||
| return nil | ||
| } |
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
How can the caller call this with an unexported interface type? I didn't know that was possible?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, this relies on Go's implicit interfaces. As long as the provided argument implements the required method set, Go verifies the structural match at compile time.