Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions pkg/app/app.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ func (c *App) Handler() http.Handler {
// `Get`/`Create`/`Update`, the response should be the relevant resource.
router.Handle("/v1/zones/{zone}/operations/{operation}/:wait", c.Authenticate(c.waitOperation)).Methods("POST")
router.Handle("/v1/zones/{zone}/hosts/{host}", c.Authenticate(c.deleteHost)).Methods("DELETE")
router.Handle("/v1/zones/{zone}/hosts/{host}/:wait-host-availability", c.Authenticate(c.waitCreateHost)).Methods("POST")
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The CO API is designed to have only one :wait endpoint /v1/zones/{zone}/operations/{operation}/:wait, there are no specific wait endpoints like: /:wait-for-create or /:wait-for-delete for example.

For the purpose of this PR specifically wondering whether can you can extend the wait logic when the operation was a create host operation and wait a bit longer till the HO is reachable rather than adding a new endpoint.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In that case, creating another endpoint for creating operation for waiting host orchestrator's settlement and wait it using previous :wait would be a proper way?

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The purpose of this PR was to simplify the client logic. Creating a new operation for waiting on HO readiness doesn't simplify the client implementation, it makes things even more complicated than what we have right now.

The way to make client logic simpler is for the client to make two calls only:

POST /host
POST /operation/foo/:wait

When the operation was a "create host operation". POST /operation/foo/:wait should be resolved when the host was created and the HO is ready. Right now we resolve "create host operation" the moment the host was created without waiting for the HO to be ready, we need to extend this waiting logic for create host operations and not add a new endpoint.


// Infra route
router.HandleFunc("/v1/zones/{zone}/hosts/{host}/infra_config", func(w http.ResponseWriter, r *http.Request) {
Expand Down Expand Up @@ -250,6 +251,16 @@ func (c *App) waitOperation(w http.ResponseWriter, r *http.Request, user account
return nil
}

func (c *App) waitCreateHost(w http.ResponseWriter, r *http.Request, user accounts.User) error {
name := mux.Vars(r)["host"]
res, err := c.instanceManager.WaitHostAvailability(getZone(r), user, name)
if err != nil {
return err
}
replyJSON(w, res, http.StatusOK)
return nil
}

func (c *App) AuthHandler(w http.ResponseWriter, r *http.Request) error {
state := randomHexString()
s := session.Session{
Expand Down
8 changes: 8 additions & 0 deletions pkg/app/app_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,10 @@ func (m *testInstanceManager) GetHostClient(zone string, host string) (instances
return m.hostClientFactory(zone, host), nil
}

func (m *testInstanceManager) WaitHostAvailability(zone string, user accounts.User, host string) (*apiv1.HostInstance, error) {
return &apiv1.HostInstance{Name: host}, nil
}

type testHostClient struct {
url *url.URL
}
Expand All @@ -111,6 +115,10 @@ func (hc *testHostClient) GetReverseProxy() *httputil.ReverseProxy {
return httputil.NewSingleHostReverseProxy(hc.url)
}

func (hc *testHostClient) WaitForHostReady() error {
return nil
}

func TestListZonesSucceeds(t *testing.T) {
controller := NewApp(&testInstanceManager{}, &testAccountManager{}, nil, nil, nil, "", nil, apiv1.InfraConfig{}, &config.Config{})
ts := httptest.NewServer(controller.Handler())
Expand Down
23 changes: 23 additions & 0 deletions pkg/app/instances/docker.go
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,13 @@ func (m *DockerInstanceManager) waitCreateHostOperation(host string) (*apiv1.Hos
return nil, fmt.Errorf("failed to inspect docker container: %w", err)
}
if res.State.Running {
client, err := m.GetHostClient("local", host)
if err != nil {
return nil, err
}
if err := client.WaitForHostReady(); err != nil {
return nil, err
}
return &apiv1.HostInstance{
Name: host,
}, nil
Expand Down Expand Up @@ -231,6 +238,22 @@ func (m *DockerInstanceManager) WaitOperation(zone string, _ accounts.User, name
}
}

func (m *DockerInstanceManager) WaitHostAvailability(zone string, user accounts.User, host string) (*apiv1.HostInstance, error) {
if zone != "local" {
return nil, errors.NewBadRequestError("Invalid zone. It should be 'local'.", nil)
}
client, err := m.GetHostClient(zone, host)
if err != nil {
return nil, err
}
if err := client.WaitForHostReady(); err != nil {
return nil, err
}
return &apiv1.HostInstance{
Name: host,
}, nil
}

func (m *DockerInstanceManager) getIpAddr(container *types.Container) (string, error) {
bridgeNetwork := container.NetworkSettings.Networks["bridge"]
if bridgeNetwork == nil {
Expand Down
110 changes: 95 additions & 15 deletions pkg/app/instances/gce.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,11 @@ import (
"context"
"fmt"
"log"
"net/http"
"net/url"
"path"
"regexp"
"time"

apiv1 "github.com/google/cloud-android-orchestration/api/v1"
"github.com/google/cloud-android-orchestration/pkg/app/accounts"
Expand All @@ -41,6 +43,7 @@ type GCPIMConfig struct {
UseExternalIP bool
// If true, instances created should be compatible with `acloud CLI`.
AcloudCompatible bool
HostReadyTimeout time.Duration
}

const (
Expand Down Expand Up @@ -78,28 +81,40 @@ func (m *GCEInstanceManager) ListZones() (*apiv1.ListZonesResponse, error) {
}, nil
}

func (m *GCEInstanceManager) GetHostAddr(zone string, host string) (string, error) {
instance, err := m.getHostInstance(zone, host)
if err != nil {
return "", err
}
ilen := len(instance.NetworkInterfaces)
func getHostAddrWithIns(ins *compute.Instance) (string, error) {
ilen := len(ins.NetworkInterfaces)
if ilen == 0 {
log.Printf("host instance %s in zone %s is missing a network interface", host, zone)
log.Printf("host instance %s in zone %s is missing a network interface", ins.Name, ins.Zone)
return "", errors.NewInternalError("host instance missing a network interface", nil)
}
if ilen > 1 {
log.Printf("host instance %s in zone %s has %d network interfaces", host, zone, ilen)
log.Printf("host instance %s in zone %s has %d network interfaces", ins.Name, ins.Zone, ilen)
}
return instance.NetworkInterfaces[0].NetworkIP, nil
return ins.NetworkInterfaces[0].NetworkIP, nil
}

func (m *GCEInstanceManager) GetHostAddr(zone string, host string) (string, error) {
ins, err := m.getHostInstance(zone, host)
if err != nil {
return "", err
}
return getHostAddrWithIns(ins)
}

func getHostURLWithIns(ins *compute.Instance, config *Config) (*url.URL, error) {
addr, err := getHostAddrWithIns(ins)
if err != nil {
return nil, err
}
return url.Parse(fmt.Sprintf("%s://%s:%d", config.HostOrchestratorProtocol, addr, config.GCP.HostOrchestratorPort))
}

func (m *GCEInstanceManager) GetHostURL(zone string, host string) (*url.URL, error) {
addr, err := m.GetHostAddr(zone, host)
ins, err := m.getHostInstance(zone, host)
if err != nil {
return nil, err
}
return url.Parse(fmt.Sprintf("%s://%s:%d", m.Config.HostOrchestratorProtocol, addr, m.Config.GCP.HostOrchestratorPort))
return getHostURLWithIns(ins, &m.Config)
}

const operationStatusDone = "DONE"
Expand Down Expand Up @@ -253,16 +268,74 @@ func (m *GCEInstanceManager) WaitOperation(zone string, user accounts.User, name
if op.Status != operationStatusDone {
return nil, errors.NewServiceUnavailableError("Wait for operation timed out", nil)
}
getter := opResultGetter{Service: m.Service, Op: op}
getter := opResultGetter{
Service: m.Service,
Op: op,
Config: &m.Config,
}
return getter.Get()
}

func (m *GCEInstanceManager) WaitHostAvailability(zone string, user accounts.User, host string) (*apiv1.HostInstance, error) {
ins, err := m.getHostInstance(zone, host)
if err != nil {
return nil, err
}
hostInstance, err := BuildHostInstance(ins)
if err != nil {
return nil, err
}
client, err := getHostClientWithIns(ins, &m.Config)
if err != nil {
return nil, err
}
if err := m.waitForOrchestrator(zone, hostInstance, client); err != nil {
return nil, err
}
return hostInstance, nil
}

func (m *GCEInstanceManager) waitForOrchestrator(zone string, host *apiv1.HostInstance, client HostClient) error {
timeout := m.Config.GCP.HostReadyTimeout
if timeout == 0 {
timeout = 5 * time.Minute
}
retryDelay := 5 * time.Second
deadline := time.Now().Add(timeout)

for time.Now().Before(deadline) {
_, err := m.Service.Instances.Get(m.Config.GCP.ProjectID, zone, host.Name).Context(context.TODO()).Do()
if err != nil {
if apiErr, ok := err.(*googleapi.Error); ok && apiErr.Code == http.StatusNotFound {
return errors.NewNotFoundError("Host was deleted concurrently", err)
}
return fmt.Errorf("failed to check host existence: %w", err)
}

status, err := client.Get("/", "", nil)
if err == nil && status != http.StatusBadGateway {
return nil
}

time.Sleep(retryDelay)
}
return errors.NewServiceUnavailableError("Wait for host orchestrator timed out", nil)
}

func getHostClientWithIns(ins *compute.Instance, config *Config) (HostClient, error) {
url, err := getHostURLWithIns(ins, config)
if err != nil {
return nil, err
}
return NewNetHostClient(url, config.AllowSelfSignedHostSSLCertificate), nil
}

func (m *GCEInstanceManager) GetHostClient(zone string, host string) (HostClient, error) {
url, err := m.GetHostURL(zone, host)
ins, err := m.getHostInstance(zone, host)
if err != nil {
return nil, err
}
return NewNetHostClient(url, m.Config.AllowSelfSignedHostSSLCertificate), nil
return getHostClientWithIns(ins, &m.Config)
}

func (m *GCEInstanceManager) getHostInstance(zone string, host string) (*compute.Instance, error) {
Expand Down Expand Up @@ -326,6 +399,7 @@ var (
type opResultGetter struct {
Service *compute.Service
Op *compute.Operation
Config *Config
}

func (g *opResultGetter) Get() (any, error) {
Expand Down Expand Up @@ -362,7 +436,13 @@ func (g *opResultGetter) buildCreateInstanceResult() (*apiv1.HostInstance, error
if err != nil {
return nil, toAppError(err)
}
return BuildHostInstance(ins)

host, err := BuildHostInstance(ins)
if err != nil {
return nil, err
}

return host, nil
}

// Converts compute API errors to AppError if relevant, return the same error otherwise
Expand Down
Loading
Loading