Skip to content

Commit 0edbc58

Browse files
committed
Improvements on Retry Logic
1 parent e8e2e4a commit 0edbc58

5 files changed

Lines changed: 535 additions & 75 deletions

internal/controller/serverbootconfig_helpers.go

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ import (
2727
apimeta "k8s.io/apimachinery/pkg/api/meta"
2828
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2929
"k8s.io/apimachinery/pkg/types"
30+
"k8s.io/client-go/util/retry"
3031
ctrl "sigs.k8s.io/controller-runtime"
3132
"sigs.k8s.io/controller-runtime/pkg/client"
3233
"sigs.k8s.io/controller-runtime/pkg/reconcile"
@@ -145,24 +146,27 @@ func PatchServerBootConfigWithError(
145146
}
146147

147148
// PatchServerBootConfigCondition patches a single condition on the ServerBootConfiguration status.
148-
// Callers should only set condition types they own.
149+
// Callers should only set condition types they own. Retries on conflict so concurrent condition
150+
// writes from HTTP and PXE controllers do not lose each other's updates.
149151
func PatchServerBootConfigCondition(
150152
ctx context.Context,
151153
c client.Client,
152154
namespacedName types.NamespacedName,
153155
condition metav1.Condition,
154156
) error {
155-
var cur metalv1alpha1.ServerBootConfiguration
156-
if fetchErr := c.Get(ctx, namespacedName, &cur); fetchErr != nil {
157-
return fmt.Errorf("failed to fetch ServerBootConfiguration: %w", fetchErr)
158-
}
159-
base := cur.DeepCopy()
157+
return retry.RetryOnConflict(retry.DefaultRetry, func() error {
158+
var cur metalv1alpha1.ServerBootConfiguration
159+
if fetchErr := c.Get(ctx, namespacedName, &cur); fetchErr != nil {
160+
return fmt.Errorf("failed to fetch ServerBootConfiguration: %w", fetchErr)
161+
}
162+
base := cur.DeepCopy()
160163

161-
// Default to current generation if caller didn't set it.
162-
if condition.ObservedGeneration == 0 {
163-
condition.ObservedGeneration = cur.Generation
164-
}
165-
apimeta.SetStatusCondition(&cur.Status.Conditions, condition)
164+
// Default to current generation if caller didn't set it.
165+
if condition.ObservedGeneration == 0 {
166+
condition.ObservedGeneration = cur.Generation
167+
}
168+
apimeta.SetStatusCondition(&cur.Status.Conditions, condition)
166169

167-
return c.Status().Patch(ctx, &cur, client.MergeFrom(base))
170+
return c.Status().Patch(ctx, &cur, client.MergeFrom(base))
171+
})
168172
}

internal/controller/serverbootconfiguration_http_controller.go

Lines changed: 28 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ import (
2424
bootv1alpha1 "github.com/ironcore-dev/boot-operator/api/v1alpha1"
2525
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2626
"k8s.io/apimachinery/pkg/runtime"
27+
"k8s.io/client-go/util/retry"
2728
ctrl "sigs.k8s.io/controller-runtime"
2829
"sigs.k8s.io/controller-runtime/pkg/client"
2930
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
@@ -153,33 +154,35 @@ func (r *ServerBootConfigurationHTTPReconciler) reconcile(ctx context.Context, l
153154

154155
func (r *ServerBootConfigurationHTTPReconciler) patchHTTPBootReadyConditionFromHTTPState(ctx context.Context, httpBootConfig *bootv1alpha1.HTTPBootConfig, cfg *metalv1alpha1.ServerBootConfiguration) error {
155156
key := types.NamespacedName{Name: cfg.Name, Namespace: cfg.Namespace}
156-
var cur metalv1alpha1.ServerBootConfiguration
157-
if err := r.Get(ctx, key, &cur); err != nil {
158-
return err
159-
}
160-
base := cur.DeepCopy()
157+
return retry.RetryOnConflict(retry.DefaultRetry, func() error {
158+
var cur metalv1alpha1.ServerBootConfiguration
159+
if err := r.Get(ctx, key, &cur); err != nil {
160+
return err
161+
}
162+
base := cur.DeepCopy()
161163

162-
cond := metav1.Condition{
163-
Type: HTTPBootReadyConditionType,
164-
ObservedGeneration: cur.Generation,
165-
}
166-
switch httpBootConfig.Status.State {
167-
case bootv1alpha1.HTTPBootConfigStateReady:
168-
cond.Status = metav1.ConditionTrue
169-
cond.Reason = "BootConfigReady"
170-
cond.Message = "HTTP boot configuration is ready."
171-
case bootv1alpha1.HTTPBootConfigStateError:
172-
cond.Status = metav1.ConditionFalse
173-
cond.Reason = "BootConfigError"
174-
cond.Message = "HTTPBootConfig reported an error."
175-
default:
176-
cond.Status = metav1.ConditionUnknown
177-
cond.Reason = "BootConfigPending"
178-
cond.Message = "Waiting for HTTPBootConfig to become Ready."
179-
}
164+
cond := metav1.Condition{
165+
Type: HTTPBootReadyConditionType,
166+
ObservedGeneration: cur.Generation,
167+
}
168+
switch httpBootConfig.Status.State {
169+
case bootv1alpha1.HTTPBootConfigStateReady:
170+
cond.Status = metav1.ConditionTrue
171+
cond.Reason = "BootConfigReady"
172+
cond.Message = "HTTP boot configuration is ready."
173+
case bootv1alpha1.HTTPBootConfigStateError:
174+
cond.Status = metav1.ConditionFalse
175+
cond.Reason = "BootConfigError"
176+
cond.Message = "HTTPBootConfig reported an error."
177+
default:
178+
cond.Status = metav1.ConditionUnknown
179+
cond.Reason = "BootConfigPending"
180+
cond.Message = "Waiting for HTTPBootConfig to become Ready."
181+
}
180182

181-
apimeta.SetStatusCondition(&cur.Status.Conditions, cond)
182-
return r.Status().Patch(ctx, &cur, client.MergeFrom(base))
183+
apimeta.SetStatusCondition(&cur.Status.Conditions, cond)
184+
return r.Status().Patch(ctx, &cur, client.MergeFrom(base))
185+
})
183186
}
184187

185188
// getSystemUUIDFromServer fetches the UUID from the referenced Server object.

internal/controller/serverbootconfiguration_pxe_controller.go

Lines changed: 28 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ import (
3131
"github.com/ironcore-dev/boot-operator/internal/registry"
3232
apimeta "k8s.io/apimachinery/pkg/api/meta"
3333
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
34+
"k8s.io/client-go/util/retry"
3435
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
3536

3637
"github.com/go-logr/logr"
@@ -172,33 +173,35 @@ func (r *ServerBootConfigurationPXEReconciler) reconcile(ctx context.Context, lo
172173

173174
func (r *ServerBootConfigurationPXEReconciler) patchIPXEBootReadyConditionFromIPXEState(ctx context.Context, config *v1alpha1.IPXEBootConfig, bootConfig *metalv1alpha1.ServerBootConfiguration) error {
174175
key := types.NamespacedName{Name: bootConfig.Name, Namespace: bootConfig.Namespace}
175-
var cur metalv1alpha1.ServerBootConfiguration
176-
if err := r.Get(ctx, key, &cur); err != nil {
177-
return err
178-
}
179-
base := cur.DeepCopy()
176+
return retry.RetryOnConflict(retry.DefaultRetry, func() error {
177+
var cur metalv1alpha1.ServerBootConfiguration
178+
if err := r.Get(ctx, key, &cur); err != nil {
179+
return err
180+
}
181+
base := cur.DeepCopy()
180182

181-
cond := metav1.Condition{
182-
Type: IPXEBootReadyConditionType,
183-
ObservedGeneration: cur.Generation,
184-
}
185-
switch config.Status.State {
186-
case v1alpha1.IPXEBootConfigStateReady:
187-
cond.Status = metav1.ConditionTrue
188-
cond.Reason = "BootConfigReady"
189-
cond.Message = "IPXE boot configuration is ready."
190-
case v1alpha1.IPXEBootConfigStateError:
191-
cond.Status = metav1.ConditionFalse
192-
cond.Reason = "BootConfigError"
193-
cond.Message = "IPXEBootConfig reported an error."
194-
default:
195-
cond.Status = metav1.ConditionUnknown
196-
cond.Reason = "BootConfigPending"
197-
cond.Message = "Waiting for IPXEBootConfig to become Ready."
198-
}
183+
cond := metav1.Condition{
184+
Type: IPXEBootReadyConditionType,
185+
ObservedGeneration: cur.Generation,
186+
}
187+
switch config.Status.State {
188+
case v1alpha1.IPXEBootConfigStateReady:
189+
cond.Status = metav1.ConditionTrue
190+
cond.Reason = "BootConfigReady"
191+
cond.Message = "IPXE boot configuration is ready."
192+
case v1alpha1.IPXEBootConfigStateError:
193+
cond.Status = metav1.ConditionFalse
194+
cond.Reason = "BootConfigError"
195+
cond.Message = "IPXEBootConfig reported an error."
196+
default:
197+
cond.Status = metav1.ConditionUnknown
198+
cond.Reason = "BootConfigPending"
199+
cond.Message = "Waiting for IPXEBootConfig to become Ready."
200+
}
199201

200-
apimeta.SetStatusCondition(&cur.Status.Conditions, cond)
201-
return r.Status().Patch(ctx, &cur, client.MergeFrom(base))
202+
apimeta.SetStatusCondition(&cur.Status.Conditions, cond)
203+
return r.Status().Patch(ctx, &cur, client.MergeFrom(base))
204+
})
202205
}
203206

204207
func (r *ServerBootConfigurationPXEReconciler) getSystemUUIDFromBootConfig(ctx context.Context, config *metalv1alpha1.ServerBootConfiguration) (string, error) {

internal/controller/serverbootconfiguration_readiness_controller.go

Lines changed: 34 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99
apimeta "k8s.io/apimachinery/pkg/api/meta"
1010
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1111
"k8s.io/apimachinery/pkg/runtime"
12+
"k8s.io/client-go/util/retry"
1213
ctrl "sigs.k8s.io/controller-runtime"
1314
"sigs.k8s.io/controller-runtime/pkg/client"
1415

@@ -47,12 +48,42 @@ func (r *ServerBootConfigurationReadinessReconciler) Reconcile(ctx context.Conte
4748
return ctrl.Result{}, nil
4849
}
4950

51+
desired := computeDesiredState(cfg, r.RequireHTTPBoot, r.RequireIPXEBoot)
52+
53+
if cfg.Status.State == desired {
54+
return ctrl.Result{}, nil
55+
}
56+
57+
// Re-fetch immediately before patching so that we use the freshest resourceVersion and do not
58+
// overwrite conditions that HTTP/PXE controllers may have written since our initial Get above.
59+
if err := retry.RetryOnConflict(retry.DefaultRetry, func() error {
60+
var fresh metalv1alpha1.ServerBootConfiguration
61+
if err := r.Get(ctx, req.NamespacedName, &fresh); err != nil {
62+
return err
63+
}
64+
// Recompute desired from the freshest conditions so we never apply a stale decision.
65+
freshDesired := computeDesiredState(&fresh, r.RequireHTTPBoot, r.RequireIPXEBoot)
66+
if fresh.Status.State == freshDesired {
67+
return nil
68+
}
69+
base := fresh.DeepCopy()
70+
fresh.Status.State = freshDesired
71+
return r.Status().Patch(ctx, &fresh, client.MergeFrom(base))
72+
}); err != nil {
73+
return ctrl.Result{}, err
74+
}
75+
76+
return ctrl.Result{}, nil
77+
}
78+
79+
// computeDesiredState derives the ServerBootConfiguration state from the mode-specific conditions.
80+
func computeDesiredState(cfg *metalv1alpha1.ServerBootConfiguration, requireHTTP, requireIPXE bool) metalv1alpha1.ServerBootConfigurationState {
5081
desired := metalv1alpha1.ServerBootConfigurationStatePending
5182

5283
allReady := true
5384
hasError := false
5485

55-
if r.RequireHTTPBoot {
86+
if requireHTTP {
5687
c := apimeta.FindStatusCondition(cfg.Status.Conditions, HTTPBootReadyConditionType)
5788
switch {
5889
case c == nil:
@@ -64,7 +95,7 @@ func (r *ServerBootConfigurationReadinessReconciler) Reconcile(ctx context.Conte
6495
}
6596
}
6697

67-
if r.RequireIPXEBoot {
98+
if requireIPXE {
6899
c := apimeta.FindStatusCondition(cfg.Status.Conditions, IPXEBootReadyConditionType)
69100
switch {
70101
case c == nil:
@@ -83,17 +114,7 @@ func (r *ServerBootConfigurationReadinessReconciler) Reconcile(ctx context.Conte
83114
desired = metalv1alpha1.ServerBootConfigurationStateReady
84115
}
85116

86-
if cfg.Status.State == desired {
87-
return ctrl.Result{}, nil
88-
}
89-
90-
base := cfg.DeepCopy()
91-
cfg.Status.State = desired
92-
if err := r.Status().Patch(ctx, cfg, client.MergeFrom(base)); err != nil {
93-
return ctrl.Result{}, err
94-
}
95-
96-
return ctrl.Result{}, nil
117+
return desired
97118
}
98119

99120
func (r *ServerBootConfigurationReadinessReconciler) SetupWithManager(mgr ctrl.Manager) error {

0 commit comments

Comments
 (0)