diff --git a/internal/operator-controller/controllers/boxcutter_reconcile_steps.go b/internal/operator-controller/controllers/boxcutter_reconcile_steps.go index dfb5dad14..6a6ee67d1 100644 --- a/internal/operator-controller/controllers/boxcutter_reconcile_steps.go +++ b/internal/operator-controller/controllers/boxcutter_reconcile_steps.go @@ -62,10 +62,11 @@ func (d *BoxcutterRevisionStatesGetter) GetRevisionStates(ctx context.Context, e // is fairly decoupled from this code where we get the annotations back out. We may want to co-locate // the set/get logic a bit better to make it more maintainable and less likely to get out of sync. rm := &RevisionMetadata{ - RevisionName: rev.Name, - Package: rev.Annotations[labels.PackageNameKey], - Image: rev.Annotations[labels.BundleReferenceKey], - Conditions: rev.Status.Conditions, + RevisionName: rev.Name, + Package: rev.Annotations[labels.PackageNameKey], + Image: rev.Annotations[labels.BundleReferenceKey], + SourceSpecHash: rev.Annotations[labels.SourceSpecHashKey], + Conditions: rev.Status.Conditions, BundleMetadata: ocv1.BundleMetadata{ Name: rev.Annotations[labels.BundleNameKey], Version: rev.Annotations[labels.BundleVersionKey], @@ -104,6 +105,7 @@ func ApplyBundleWithBoxcutter(apply func(ctx context.Context, contentFS fs.FS, e labels.PackageNameKey: state.resolvedRevisionMetadata.Package, labels.BundleVersionKey: state.resolvedRevisionMetadata.Version, labels.BundleReferenceKey: state.resolvedRevisionMetadata.Image, + labels.SourceSpecHashKey: CatalogSpecHash(ext), } objLbls := map[string]string{ labels.OwnerKindKey: ocv1.ClusterExtensionKind, diff --git a/internal/operator-controller/controllers/clusterextension_controller.go b/internal/operator-controller/controllers/clusterextension_controller.go index a3fcf48f3..46fd59901 100644 --- a/internal/operator-controller/controllers/clusterextension_controller.go +++ b/internal/operator-controller/controllers/clusterextension_controller.go @@ -497,9 +497,10 @@ func clusterExtensionRequestsForCatalog(c client.Reader, logger logr.Logger) crh } type RevisionMetadata struct { - RevisionName string - Package string - Image string + RevisionName string + Package string + Image string + SourceSpecHash string ocv1.BundleMetadata Conditions []metav1.Condition } diff --git a/internal/operator-controller/controllers/clusterextension_controller_test.go b/internal/operator-controller/controllers/clusterextension_controller_test.go index 6aa55c5f9..117e52dc7 100644 --- a/internal/operator-controller/controllers/clusterextension_controller_test.go +++ b/internal/operator-controller/controllers/clusterextension_controller_test.go @@ -678,28 +678,6 @@ func TestClusterExtensionBoxcutterApplierFailsDoesNotLeakDeprecationErrors(t *te require.NoError(t, features.OperatorControllerFeatureGate.Set(fmt.Sprintf("%s=false", features.BoxcutterRuntime))) }) - cl, reconciler := newClientAndReconciler(t, func(d *deps) { - // Boxcutter keeps a rolling revision when apply fails. We mirror that state so the test uses - // the same inputs the runtime would see. - d.RevisionStatesGetter = &MockRevisionStatesGetter{ - RevisionStates: &controllers.RevisionStates{ - RollingOut: []*controllers.RevisionMetadata{{}}, - }, - } - d.Resolver = resolve.Func(func(ctx context.Context, ext *ocv1.ClusterExtension, installedBundle *ocv1.BundleMetadata) (*declcfg.Bundle, *bundle.VersionRelease, *declcfg.Deprecation, error) { - v := bundle.VersionRelease{ - Version: bsemver.MustParse("1.0.0"), - } - return &declcfg.Bundle{ - Name: "prometheus.v1.0.0", - Package: "prometheus", - Image: "quay.io/operatorhubio/prometheus@fake1.0.0", - }, &v, nil, nil - }) - d.ImagePuller = &imageutil.MockPuller{ImageFS: fstest.MapFS{}} - d.Applier = &MockApplier{err: errors.New("boxcutter apply failure")} - }) - ctx := context.Background() extKey := types.NamespacedName{Name: fmt.Sprintf("cluster-extension-test-%s", rand.String(8))} @@ -721,6 +699,38 @@ func TestClusterExtensionBoxcutterApplierFailsDoesNotLeakDeprecationErrors(t *te }, }, } + specHash := specHashFor(clusterExtension.Spec.Source.Catalog) + + cl, reconciler := newClientAndReconciler(t, func(d *deps) { + // Boxcutter keeps a rolling revision when apply fails. We mirror that state so the test uses + // the same inputs the runtime would see. The revision's SourceSpecHash must match the spec so + // the controller recognizes this as a still-valid rollout (not a spec change). + d.RevisionStatesGetter = &MockRevisionStatesGetter{ + RevisionStates: &controllers.RevisionStates{ + RollingOut: []*controllers.RevisionMetadata{{ + Package: "prometheus", + Image: "quay.io/operatorhubio/prometheus@fake1.0.0", + SourceSpecHash: specHash, + BundleMetadata: ocv1.BundleMetadata{ + Name: "prometheus.v1.0.0", + Version: "1.0.0", + }, + }}, + }, + } + d.Resolver = resolve.Func(func(ctx context.Context, ext *ocv1.ClusterExtension, installedBundle *ocv1.BundleMetadata) (*declcfg.Bundle, *bundle.VersionRelease, *declcfg.Deprecation, error) { + v := bundle.VersionRelease{ + Version: bsemver.MustParse("1.0.0"), + } + return &declcfg.Bundle{ + Name: "prometheus.v1.0.0", + Package: "prometheus", + Image: "quay.io/operatorhubio/prometheus@fake1.0.0", + }, &v, nil, nil + }) + d.ImagePuller = &imageutil.MockPuller{ImageFS: fstest.MapFS{}} + d.Applier = &MockApplier{err: errors.New("boxcutter apply failure")} + }) require.NoError(t, cl.Create(ctx, clusterExtension)) res, err := reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: extKey}) @@ -768,6 +778,619 @@ func TestClusterExtensionBoxcutterApplierFailsDoesNotLeakDeprecationErrors(t *te require.NoError(t, cl.DeleteAllOf(ctx, &ocv1.ClusterExtension{})) } +// TestClusterExtensionSpecMismatchWhileRollingOutReResolvesBundle verifies that when a +// ClusterExtension's spec requests a different version than the revision currently rolling out, +// the controller re-resolves the bundle from the catalog instead of continuing to use the stale revision. +// +// Scenario: +// - A revision for v1.0.2 is rolling out (e.g., stuck due to deployment probe failure) +// - The ClusterExtension spec already requests v1.0.3 at reconcile time (e.g., previously updated by the user) +// - The controller detects the spec/revision mismatch and re-resolves from the catalog +// - The resolver returns v1.0.3, which is used for the new rollout +func TestClusterExtensionSpecMismatchWhileRollingOutReResolvesBundle(t *testing.T) { + require.NoError(t, features.OperatorControllerFeatureGate.Set(fmt.Sprintf("%s=true", features.BoxcutterRuntime))) + t.Cleanup(func() { + require.NoError(t, features.OperatorControllerFeatureGate.Set(fmt.Sprintf("%s=false", features.BoxcutterRuntime))) + }) + + oldSpecHash := specHashFor(&ocv1.CatalogFilter{PackageName: "nginx88138", Version: "1.0.2"}) + + resolverCalled := false + cl, reconciler := newClientAndReconciler(t, func(d *deps) { + d.RevisionStatesGetter = &MockRevisionStatesGetter{ + RevisionStates: &controllers.RevisionStates{ + Installed: &controllers.RevisionMetadata{ + Package: "nginx88138", + Image: "quay.io/olmqe/nginxolm-operator-bundle:v1.0.1-nginxolm88138", + BundleMetadata: ocv1.BundleMetadata{ + Name: "nginx88138.v1.0.1", + Version: "1.0.1", + }, + }, + RollingOut: []*controllers.RevisionMetadata{{ + RevisionName: "extension-88138-2", + Package: "nginx88138", + Image: "quay.io/olmqe/nginxolm-operator-bundle:v1.0.2-nginx88138", + SourceSpecHash: oldSpecHash, + BundleMetadata: ocv1.BundleMetadata{ + Name: "nginx88138.v1.0.2", + Version: "1.0.2", + }, + }}, + }, + } + d.Resolver = resolve.Func(func(ctx context.Context, ext *ocv1.ClusterExtension, installedBundle *ocv1.BundleMetadata) (*declcfg.Bundle, *bundle.VersionRelease, *declcfg.Deprecation, error) { + resolverCalled = true + v := bundle.VersionRelease{ + Version: bsemver.MustParse("1.0.3"), + } + return &declcfg.Bundle{ + Name: "nginx88138.v1.0.3", + Package: "nginx88138", + Image: "quay.io/olmqe/nginxolm-operator-bundle:v1.0.3-nginxolm88138", + }, &v, nil, nil + }) + d.ImagePuller = &imageutil.MockPuller{ImageFS: fstest.MapFS{}} + d.Applier = &MockApplier{installCompleted: true} + }) + + ctx := context.Background() + extKey := types.NamespacedName{Name: fmt.Sprintf("cluster-extension-test-%s", rand.String(8))} + + clusterExtension := &ocv1.ClusterExtension{ + ObjectMeta: metav1.ObjectMeta{Name: extKey.Name}, + Spec: ocv1.ClusterExtensionSpec{ + Source: ocv1.SourceConfig{ + SourceType: "Catalog", + Catalog: &ocv1.CatalogFilter{ + PackageName: "nginx88138", + Version: "1.0.3", + }, + }, + Namespace: "default", + ServiceAccount: ocv1.ServiceAccountReference{ + Name: "default", + }, + }, + } + require.NoError(t, cl.Create(ctx, clusterExtension)) + + res, err := reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: extKey}) + require.Equal(t, ctrl.Result{}, res) + require.NoError(t, err) + + require.True(t, resolverCalled, + "resolver should be called because the rolling out revision (v1.0.2) does not match the spec (v1.0.3)") + + require.NoError(t, cl.Get(ctx, extKey, clusterExtension)) + + installedCond := apimeta.FindStatusCondition(clusterExtension.Status.Conditions, ocv1.TypeInstalled) + require.NotNil(t, installedCond) + require.Equal(t, metav1.ConditionTrue, installedCond.Status) + require.Contains(t, installedCond.Message, "1.0.3") + + require.NoError(t, cl.DeleteAllOf(ctx, &ocv1.ClusterExtension{})) +} + +// TestClusterExtensionRollingOutRevisionMatchesSpecNoReResolve verifies that when a +// revision is rolling out and the spec hasn't changed, the controller uses the existing +// rolling out revision without re-resolving from the catalog. +func TestClusterExtensionRollingOutRevisionMatchesSpecNoReResolve(t *testing.T) { + require.NoError(t, features.OperatorControllerFeatureGate.Set(fmt.Sprintf("%s=true", features.BoxcutterRuntime))) + t.Cleanup(func() { + require.NoError(t, features.OperatorControllerFeatureGate.Set(fmt.Sprintf("%s=false", features.BoxcutterRuntime))) + }) + + specHash := specHashFor(&ocv1.CatalogFilter{PackageName: "nginx88138", Version: "1.0.2"}) + + resolverCalled := false + cl, reconciler := newClientAndReconciler(t, func(d *deps) { + d.RevisionStatesGetter = &MockRevisionStatesGetter{ + RevisionStates: &controllers.RevisionStates{ + RollingOut: []*controllers.RevisionMetadata{{ + Package: "nginx88138", + Image: "quay.io/olmqe/nginxolm-operator-bundle:v1.0.2-nginx88138", + SourceSpecHash: specHash, + BundleMetadata: ocv1.BundleMetadata{ + Name: "nginx88138.v1.0.2", + Version: "1.0.2", + }, + }}, + }, + } + d.Resolver = resolve.Func(func(ctx context.Context, ext *ocv1.ClusterExtension, installedBundle *ocv1.BundleMetadata) (*declcfg.Bundle, *bundle.VersionRelease, *declcfg.Deprecation, error) { + resolverCalled = true + v := bundle.VersionRelease{ + Version: bsemver.MustParse("1.0.2"), + } + return &declcfg.Bundle{ + Name: "nginx88138.v1.0.2", + Package: "nginx88138", + Image: "quay.io/olmqe/nginxolm-operator-bundle:v1.0.2-nginx88138", + }, &v, nil, nil + }) + d.ImagePuller = &imageutil.MockPuller{ImageFS: fstest.MapFS{}} + d.Applier = &MockApplier{installCompleted: true} + }) + + ctx := context.Background() + extKey := types.NamespacedName{Name: fmt.Sprintf("cluster-extension-test-%s", rand.String(8))} + + clusterExtension := &ocv1.ClusterExtension{ + ObjectMeta: metav1.ObjectMeta{Name: extKey.Name}, + Spec: ocv1.ClusterExtensionSpec{ + Source: ocv1.SourceConfig{ + SourceType: "Catalog", + Catalog: &ocv1.CatalogFilter{ + PackageName: "nginx88138", + Version: "1.0.2", + }, + }, + Namespace: "default", + ServiceAccount: ocv1.ServiceAccountReference{ + Name: "default", + }, + }, + } + require.NoError(t, cl.Create(ctx, clusterExtension)) + + _, err := reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: extKey}) + require.NoError(t, err) + + require.False(t, resolverCalled, + "resolver should NOT be called because the rolling out revision (v1.0.2) matches the spec (v1.0.2)") + + require.NoError(t, cl.DeleteAllOf(ctx, &ocv1.ClusterExtension{})) +} + +// TestClusterExtensionMultipleRollingOutRevisionsUsesLatestMatch verifies that when multiple +// revisions are rolling out (e.g., a stuck revision and a new one created after re-resolution), +// the controller picks the latest revision that matches the spec. +// +// This covers the lifecycle after re-resolution: the stuck revision (v1.0.2) is still active, +// a new revision (v1.0.1) was created. The revision controller will archive the old one once +// the new one succeeds, but in the meantime both are in the RollingOut list. +func TestClusterExtensionMultipleRollingOutRevisionsUsesLatestMatch(t *testing.T) { + require.NoError(t, features.OperatorControllerFeatureGate.Set(fmt.Sprintf("%s=true", features.BoxcutterRuntime))) + t.Cleanup(func() { + require.NoError(t, features.OperatorControllerFeatureGate.Set(fmt.Sprintf("%s=false", features.BoxcutterRuntime))) + }) + + oldSpecHash := specHashFor(&ocv1.CatalogFilter{PackageName: "nginx88138", Version: "1.0.2"}) + currentSpecHash := specHashFor(&ocv1.CatalogFilter{PackageName: "nginx88138", Version: "1.0.1"}) + + resolverCalled := false + cl, reconciler := newClientAndReconciler(t, func(d *deps) { + d.RevisionStatesGetter = &MockRevisionStatesGetter{ + RevisionStates: &controllers.RevisionStates{ + Installed: &controllers.RevisionMetadata{ + Package: "nginx88138", + Image: "quay.io/olmqe/nginxolm-operator-bundle:v1.0.0-nginxolm88138", + BundleMetadata: ocv1.BundleMetadata{ + Name: "nginx88138.v1.0.0", + Version: "1.0.0", + }, + }, + RollingOut: []*controllers.RevisionMetadata{ + { + RevisionName: "extension-88138-2", + Package: "nginx88138", + Image: "quay.io/olmqe/nginxolm-operator-bundle:v1.0.2-nginx88138", + SourceSpecHash: oldSpecHash, + BundleMetadata: ocv1.BundleMetadata{ + Name: "nginx88138.v1.0.2", + Version: "1.0.2", + }, + }, + { + RevisionName: "extension-88138-3", + Package: "nginx88138", + Image: "quay.io/olmqe/nginxolm-operator-bundle:v1.0.1-nginxolm88138", + SourceSpecHash: currentSpecHash, + BundleMetadata: ocv1.BundleMetadata{ + Name: "nginx88138.v1.0.1", + Version: "1.0.1", + }, + }, + }, + }, + } + d.Resolver = resolve.Func(func(ctx context.Context, ext *ocv1.ClusterExtension, installedBundle *ocv1.BundleMetadata) (*declcfg.Bundle, *bundle.VersionRelease, *declcfg.Deprecation, error) { + resolverCalled = true + return nil, nil, nil, fmt.Errorf("should not be called") + }) + d.ImagePuller = &imageutil.MockPuller{ImageFS: fstest.MapFS{}} + d.Applier = &MockApplier{installCompleted: true} + }) + + ctx := context.Background() + extKey := types.NamespacedName{Name: fmt.Sprintf("cluster-extension-test-%s", rand.String(8))} + + clusterExtension := &ocv1.ClusterExtension{ + ObjectMeta: metav1.ObjectMeta{Name: extKey.Name}, + Spec: ocv1.ClusterExtensionSpec{ + Source: ocv1.SourceConfig{ + SourceType: "Catalog", + Catalog: &ocv1.CatalogFilter{ + PackageName: "nginx88138", + Version: "1.0.1", + }, + }, + Namespace: "default", + ServiceAccount: ocv1.ServiceAccountReference{ + Name: "default", + }, + }, + } + require.NoError(t, cl.Create(ctx, clusterExtension)) + + _, err := reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: extKey}) + require.NoError(t, err) + + require.False(t, resolverCalled, + "resolver should NOT be called: the latest rolling-out revision (v1.0.1) matches the spec, "+ + "even though an older stuck revision (v1.0.2) does not match") + + require.NoError(t, cl.DeleteAllOf(ctx, &ocv1.ClusterExtension{})) +} + +// TestClusterExtensionReResolvesWithInstalledAsFromVersion verifies that when re-resolving +// during a stuck rollout, the resolver receives the Installed revision (A) as the +// installedBundle parameter — not the rolling-out revision (B). This ensures upgrade +// constraints are checked against the last known-good state. +func TestClusterExtensionReResolvesWithInstalledAsFromVersion(t *testing.T) { + require.NoError(t, features.OperatorControllerFeatureGate.Set(fmt.Sprintf("%s=true", features.BoxcutterRuntime))) + t.Cleanup(func() { + require.NoError(t, features.OperatorControllerFeatureGate.Set(fmt.Sprintf("%s=false", features.BoxcutterRuntime))) + }) + + oldSpecHash := specHashFor(&ocv1.CatalogFilter{PackageName: "nginx88138", Version: "1.0.2"}) + + var capturedInstalledBundle *ocv1.BundleMetadata + cl, reconciler := newClientAndReconciler(t, func(d *deps) { + d.RevisionStatesGetter = &MockRevisionStatesGetter{ + RevisionStates: &controllers.RevisionStates{ + Installed: &controllers.RevisionMetadata{ + Package: "nginx88138", + Image: "quay.io/olmqe/nginxolm-operator-bundle:v1.0.0-nginxolm88138", + BundleMetadata: ocv1.BundleMetadata{ + Name: "nginx88138.v1.0.0", + Version: "1.0.0", + }, + }, + RollingOut: []*controllers.RevisionMetadata{{ + RevisionName: "extension-88138-2", + Package: "nginx88138", + Image: "quay.io/olmqe/nginxolm-operator-bundle:v1.0.2-nginx88138", + SourceSpecHash: oldSpecHash, + BundleMetadata: ocv1.BundleMetadata{ + Name: "nginx88138.v1.0.2", + Version: "1.0.2", + }, + }}, + }, + } + d.Resolver = resolve.Func(func(ctx context.Context, ext *ocv1.ClusterExtension, installedBundle *ocv1.BundleMetadata) (*declcfg.Bundle, *bundle.VersionRelease, *declcfg.Deprecation, error) { + capturedInstalledBundle = installedBundle + v := bundle.VersionRelease{ + Version: bsemver.MustParse("1.0.1"), + } + return &declcfg.Bundle{ + Name: "nginx88138.v1.0.1", + Package: "nginx88138", + Image: "quay.io/olmqe/nginxolm-operator-bundle:v1.0.1-nginxolm88138", + }, &v, nil, nil + }) + d.ImagePuller = &imageutil.MockPuller{ImageFS: fstest.MapFS{}} + d.Applier = &MockApplier{installCompleted: true} + }) + + ctx := context.Background() + extKey := types.NamespacedName{Name: fmt.Sprintf("cluster-extension-test-%s", rand.String(8))} + + clusterExtension := &ocv1.ClusterExtension{ + ObjectMeta: metav1.ObjectMeta{Name: extKey.Name}, + Spec: ocv1.ClusterExtensionSpec{ + Source: ocv1.SourceConfig{ + SourceType: "Catalog", + Catalog: &ocv1.CatalogFilter{ + PackageName: "nginx88138", + Version: "1.0.1", + }, + }, + Namespace: "default", + ServiceAccount: ocv1.ServiceAccountReference{ + Name: "default", + }, + }, + } + require.NoError(t, cl.Create(ctx, clusterExtension)) + + _, err := reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: extKey}) + require.NoError(t, err) + + require.NotNil(t, capturedInstalledBundle, + "resolver should receive an installedBundle") + require.Equal(t, "nginx88138.v1.0.0", capturedInstalledBundle.Name, + "resolver should receive the Installed version (v1.0.0), not the rolling-out version (v1.0.2)") + require.Equal(t, "1.0.0", capturedInstalledBundle.Version) + + require.NoError(t, cl.DeleteAllOf(ctx, &ocv1.ClusterExtension{})) +} + +// TestClusterExtensionNoInstalledRevisionSpecMismatchReResolves verifies that when there +// is no installed revision (first install attempt got stuck) and the spec changes, +// the controller re-resolves with nil as the installedBundle. +func TestClusterExtensionNoInstalledRevisionSpecMismatchReResolves(t *testing.T) { + require.NoError(t, features.OperatorControllerFeatureGate.Set(fmt.Sprintf("%s=true", features.BoxcutterRuntime))) + t.Cleanup(func() { + require.NoError(t, features.OperatorControllerFeatureGate.Set(fmt.Sprintf("%s=false", features.BoxcutterRuntime))) + }) + + oldSpecHash := specHashFor(&ocv1.CatalogFilter{PackageName: "nginx88138", Version: "1.0.2"}) + + resolverCalled := false + var capturedInstalledBundle *ocv1.BundleMetadata + cl, reconciler := newClientAndReconciler(t, func(d *deps) { + d.RevisionStatesGetter = &MockRevisionStatesGetter{ + RevisionStates: &controllers.RevisionStates{ + RollingOut: []*controllers.RevisionMetadata{{ + RevisionName: "extension-1", + Package: "nginx88138", + Image: "quay.io/olmqe/nginxolm-operator-bundle:v1.0.2-nginx88138", + SourceSpecHash: oldSpecHash, + BundleMetadata: ocv1.BundleMetadata{ + Name: "nginx88138.v1.0.2", + Version: "1.0.2", + }, + }}, + }, + } + d.Resolver = resolve.Func(func(ctx context.Context, ext *ocv1.ClusterExtension, installedBundle *ocv1.BundleMetadata) (*declcfg.Bundle, *bundle.VersionRelease, *declcfg.Deprecation, error) { + resolverCalled = true + capturedInstalledBundle = installedBundle + v := bundle.VersionRelease{ + Version: bsemver.MustParse("1.0.1"), + } + return &declcfg.Bundle{ + Name: "nginx88138.v1.0.1", + Package: "nginx88138", + Image: "quay.io/olmqe/nginxolm-operator-bundle:v1.0.1-nginxolm88138", + }, &v, nil, nil + }) + d.ImagePuller = &imageutil.MockPuller{ImageFS: fstest.MapFS{}} + d.Applier = &MockApplier{installCompleted: true} + }) + + ctx := context.Background() + extKey := types.NamespacedName{Name: fmt.Sprintf("cluster-extension-test-%s", rand.String(8))} + + clusterExtension := &ocv1.ClusterExtension{ + ObjectMeta: metav1.ObjectMeta{Name: extKey.Name}, + Spec: ocv1.ClusterExtensionSpec{ + Source: ocv1.SourceConfig{ + SourceType: "Catalog", + Catalog: &ocv1.CatalogFilter{ + PackageName: "nginx88138", + Version: "1.0.1", + }, + }, + Namespace: "default", + ServiceAccount: ocv1.ServiceAccountReference{ + Name: "default", + }, + }, + } + require.NoError(t, cl.Create(ctx, clusterExtension)) + + _, err := reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: extKey}) + require.NoError(t, err) + + require.True(t, resolverCalled, + "resolver should be called: first install stuck (v1.0.2) and spec requests v1.0.1") + require.Nil(t, capturedInstalledBundle, + "installedBundle should be nil since no revision has been successfully installed yet") + + require.NoError(t, cl.DeleteAllOf(ctx, &ocv1.ClusterExtension{})) +} + +// TestClusterExtensionRollingOutSameSpecHashReuses verifies that when a revision's +// source spec hash matches the current spec (i.e., the spec hasn't changed since +// the revision was resolved), the controller reuses the revision without re-resolving. +func TestClusterExtensionRollingOutSameSpecHashReuses(t *testing.T) { + require.NoError(t, features.OperatorControllerFeatureGate.Set(fmt.Sprintf("%s=true", features.BoxcutterRuntime))) + t.Cleanup(func() { + require.NoError(t, features.OperatorControllerFeatureGate.Set(fmt.Sprintf("%s=false", features.BoxcutterRuntime))) + }) + + specHash := specHashFor(&ocv1.CatalogFilter{PackageName: "nginx88138"}) + + resolverCalled := false + cl, reconciler := newClientAndReconciler(t, func(d *deps) { + d.RevisionStatesGetter = &MockRevisionStatesGetter{ + RevisionStates: &controllers.RevisionStates{ + RollingOut: []*controllers.RevisionMetadata{{ + Package: "nginx88138", + Image: "quay.io/olmqe/nginxolm-operator-bundle:v1.0.2-nginx88138", + SourceSpecHash: specHash, + BundleMetadata: ocv1.BundleMetadata{ + Name: "nginx88138.v1.0.2", + Version: "1.0.2", + }, + }}, + }, + } + d.Resolver = resolve.Func(func(ctx context.Context, ext *ocv1.ClusterExtension, installedBundle *ocv1.BundleMetadata) (*declcfg.Bundle, *bundle.VersionRelease, *declcfg.Deprecation, error) { + resolverCalled = true + return nil, nil, nil, fmt.Errorf("should not be called") + }) + d.ImagePuller = &imageutil.MockPuller{ImageFS: fstest.MapFS{}} + d.Applier = &MockApplier{installCompleted: true} + }) + + ctx := context.Background() + extKey := types.NamespacedName{Name: fmt.Sprintf("cluster-extension-test-%s", rand.String(8))} + + clusterExtension := &ocv1.ClusterExtension{ + ObjectMeta: metav1.ObjectMeta{Name: extKey.Name}, + Spec: ocv1.ClusterExtensionSpec{ + Source: ocv1.SourceConfig{ + SourceType: "Catalog", + Catalog: &ocv1.CatalogFilter{ + PackageName: "nginx88138", + }, + }, + Namespace: "default", + ServiceAccount: ocv1.ServiceAccountReference{ + Name: "default", + }, + }, + } + require.NoError(t, cl.Create(ctx, clusterExtension)) + + _, err := reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: extKey}) + require.NoError(t, err) + + require.False(t, resolverCalled, + "resolver should NOT be called: revision's source spec hash matches the current spec") + + require.NoError(t, cl.DeleteAllOf(ctx, &ocv1.ClusterExtension{})) +} + +// TestClusterExtensionRollingOutVersionRangeMatchReuses verifies that when the spec has +// a version range constraint and the rolling-out revision was resolved from the same spec, +// the controller reuses the revision without re-resolving. +func TestClusterExtensionRollingOutVersionRangeMatchReuses(t *testing.T) { + require.NoError(t, features.OperatorControllerFeatureGate.Set(fmt.Sprintf("%s=true", features.BoxcutterRuntime))) + t.Cleanup(func() { + require.NoError(t, features.OperatorControllerFeatureGate.Set(fmt.Sprintf("%s=false", features.BoxcutterRuntime))) + }) + + specHash := specHashFor(&ocv1.CatalogFilter{PackageName: "nginx88138", Version: ">=1.0.0, <2.0.0"}) + + resolverCalled := false + cl, reconciler := newClientAndReconciler(t, func(d *deps) { + d.RevisionStatesGetter = &MockRevisionStatesGetter{ + RevisionStates: &controllers.RevisionStates{ + RollingOut: []*controllers.RevisionMetadata{{ + Package: "nginx88138", + Image: "quay.io/olmqe/nginxolm-operator-bundle:v1.0.2-nginx88138", + SourceSpecHash: specHash, + BundleMetadata: ocv1.BundleMetadata{ + Name: "nginx88138.v1.0.2", + Version: "1.0.2", + }, + }}, + }, + } + d.Resolver = resolve.Func(func(ctx context.Context, ext *ocv1.ClusterExtension, installedBundle *ocv1.BundleMetadata) (*declcfg.Bundle, *bundle.VersionRelease, *declcfg.Deprecation, error) { + resolverCalled = true + return nil, nil, nil, fmt.Errorf("should not be called") + }) + d.ImagePuller = &imageutil.MockPuller{ImageFS: fstest.MapFS{}} + d.Applier = &MockApplier{installCompleted: true} + }) + + ctx := context.Background() + extKey := types.NamespacedName{Name: fmt.Sprintf("cluster-extension-test-%s", rand.String(8))} + + clusterExtension := &ocv1.ClusterExtension{ + ObjectMeta: metav1.ObjectMeta{Name: extKey.Name}, + Spec: ocv1.ClusterExtensionSpec{ + Source: ocv1.SourceConfig{ + SourceType: "Catalog", + Catalog: &ocv1.CatalogFilter{ + PackageName: "nginx88138", + Version: ">=1.0.0, <2.0.0", + }, + }, + Namespace: "default", + ServiceAccount: ocv1.ServiceAccountReference{ + Name: "default", + }, + }, + } + require.NoError(t, cl.Create(ctx, clusterExtension)) + + _, err := reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: extKey}) + require.NoError(t, err) + + require.False(t, resolverCalled, + "resolver should NOT be called: revision's source spec hash matches the current spec (range unchanged)") + + require.NoError(t, cl.DeleteAllOf(ctx, &ocv1.ClusterExtension{})) +} + +func TestCatalogSpecHashSelectorNormalization(t *testing.T) { + makeExt := func(sel *metav1.LabelSelector) *ocv1.ClusterExtension { + return &ocv1.ClusterExtension{ + Spec: ocv1.ClusterExtensionSpec{ + Source: ocv1.SourceConfig{ + SourceType: "Catalog", + Catalog: &ocv1.CatalogFilter{ + PackageName: "pkg", + UpgradeConstraintPolicy: ocv1.UpgradeConstraintPolicyCatalogProvided, + Selector: sel, + }, + }, + }, + } + } + + t.Run("reordered MatchExpressions produce the same hash", func(t *testing.T) { + sel1 := &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + {Key: "env", Operator: metav1.LabelSelectorOpIn, Values: []string{"prod", "staging"}}, + {Key: "arch", Operator: metav1.LabelSelectorOpIn, Values: []string{"amd64"}}, + }, + } + sel2 := &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + {Key: "arch", Operator: metav1.LabelSelectorOpIn, Values: []string{"amd64"}}, + {Key: "env", Operator: metav1.LabelSelectorOpIn, Values: []string{"prod", "staging"}}, + }, + } + require.Equal(t, controllers.CatalogSpecHash(makeExt(sel1)), controllers.CatalogSpecHash(makeExt(sel2))) + }) + + t.Run("reordered Values within MatchExpression produce the same hash", func(t *testing.T) { + sel1 := &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + {Key: "env", Operator: metav1.LabelSelectorOpIn, Values: []string{"staging", "prod"}}, + }, + } + sel2 := &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + {Key: "env", Operator: metav1.LabelSelectorOpIn, Values: []string{"prod", "staging"}}, + }, + } + require.Equal(t, controllers.CatalogSpecHash(makeExt(sel1)), controllers.CatalogSpecHash(makeExt(sel2))) + }) + + t.Run("different selectors produce different hashes", func(t *testing.T) { + sel1 := &metav1.LabelSelector{ + MatchLabels: map[string]string{"env": "prod"}, + } + sel2 := &metav1.LabelSelector{ + MatchLabels: map[string]string{"env": "staging"}, + } + require.NotEqual(t, controllers.CatalogSpecHash(makeExt(sel1)), controllers.CatalogSpecHash(makeExt(sel2))) + }) + + t.Run("nil selector is stable", func(t *testing.T) { + h1 := controllers.CatalogSpecHash(makeExt(nil)) + h2 := controllers.CatalogSpecHash(makeExt(nil)) + require.Equal(t, h1, h2) + require.NotEmpty(t, h1) + }) + + t.Run("nil and empty selector produce the same hash", func(t *testing.T) { + hNil := controllers.CatalogSpecHash(makeExt(nil)) + hEmpty := controllers.CatalogSpecHash(makeExt(&metav1.LabelSelector{})) + require.Equal(t, hNil, hEmpty, "nil and empty selectors are semantically equivalent and must hash identically") + }) +} + func TestValidateClusterExtension(t *testing.T) { tests := []struct { name string diff --git a/internal/operator-controller/controllers/clusterextension_reconcile_steps.go b/internal/operator-controller/controllers/clusterextension_reconcile_steps.go index 14ad71785..fe0af69bf 100644 --- a/internal/operator-controller/controllers/clusterextension_reconcile_steps.go +++ b/internal/operator-controller/controllers/clusterextension_reconcile_steps.go @@ -18,9 +18,15 @@ package controllers import ( "context" + "crypto/sha256" + "encoding/hex" + "encoding/json" "errors" "fmt" + "slices" + "sort" + bsemver "github.com/blang/semver/v4" apierrors "k8s.io/apimachinery/pkg/api/errors" apimeta "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -32,6 +38,7 @@ import ( ocv1 "github.com/operator-framework/operator-controller/api/v1" "github.com/operator-framework/operator-controller/internal/operator-controller/bundleutil" + "github.com/operator-framework/operator-controller/internal/operator-controller/catalogmetadata/compare" "github.com/operator-framework/operator-controller/internal/operator-controller/labels" "github.com/operator-framework/operator-controller/internal/operator-controller/resolve" imageutil "github.com/operator-framework/operator-controller/internal/shared/util/image" @@ -140,14 +147,14 @@ func ResolveBundle(r resolve.Resolver, c client.Client) ReconcileStepFunc { return func(ctx context.Context, state *reconcileState, ext *ocv1.ClusterExtension) (*ctrl.Result, error) { l := log.FromContext(ctx) - // If already rolling out, use existing revision and set deprecation to Unknown (no catalog check) - if len(state.revisionStates.RollingOut) > 0 { - installedBundleName := "" - if state.revisionStates.Installed != nil { - installedBundleName = state.revisionStates.Installed.Name - } - SetDeprecationStatus(ext, installedBundleName, nil, false) - state.resolvedRevisionMetadata = state.revisionStates.RollingOut[0] + // When revisions are actively rolling out, we check whether the admin has + // changed the resolution-relevant spec fields (version, channels, selector, + // upgradeConstraintPolicy). If so, we re-engage the resolver to honor + // the new spec immediately. + // + // If the spec hasn't changed, we reuse the matching rolling-out revision + // to avoid unnecessary catalog lookups and provide resilience during catalog outages. + if reuseRollingOutRevision(ctx, state, ext) { return nil, nil } @@ -199,6 +206,172 @@ func ResolveBundle(r resolve.Resolver, c client.Client) ReconcileStepFunc { } } +// reuseRollingOutRevision checks whether an active rollout should continue using its +// current revision. Returns true if a matching revision was found and set on the state +// (caller should return early). Returns false if the resolver should be called instead. +// +// Re-resolution is triggered when no rolling-out revision was resolved from the same +// catalog spec as the current ClusterExtension spec. This detects any change to +// resolution-relevant fields (version, channels, selector, upgradeConstraintPolicy) +// — even when the rolling-out version still satisfies the new version constraint. +func reuseRollingOutRevision(ctx context.Context, state *reconcileState, ext *ocv1.ClusterExtension) bool { + if len(state.revisionStates.RollingOut) == 0 { + return false + } + + l := log.FromContext(ctx) + + // RollingOut is ordered by revision number ascending (oldest first, newest last). + latestRollingOut := state.revisionStates.RollingOut[len(state.revisionStates.RollingOut)-1] + + currentHash := CatalogSpecHash(ext) + + // Guard: if the hash can't be computed (e.g., non-catalog source), preserve the + // previous behavior and reuse the latest rolling-out revision. + if currentHash == "" { + installedBundleName := "" + if state.revisionStates.Installed != nil { + installedBundleName = state.revisionStates.Installed.Name + } + SetDeprecationStatus(ext, installedBundleName, nil, false) + state.resolvedRevisionMetadata = latestRollingOut + return true + } + + anyHasHash := false + for i := len(state.revisionStates.RollingOut) - 1; i >= 0; i-- { + rollingOut := state.revisionStates.RollingOut[i] + if rollingOut.SourceSpecHash != "" { + anyHasHash = true + if rollingOut.SourceSpecHash == currentHash { + installedBundleName := "" + if state.revisionStates.Installed != nil { + installedBundleName = state.revisionStates.Installed.Name + } + SetDeprecationStatus(ext, installedBundleName, nil, false) + state.resolvedRevisionMetadata = rollingOut + return true + } + } + } + + // Backward compatibility: if no rolling-out revision has a hash (pre-upgrade + // revisions), reuse the latest to avoid unnecessary catalog churn. + if !anyHasHash { + installedBundleName := "" + if state.revisionStates.Installed != nil { + installedBundleName = state.revisionStates.Installed.Name + } + SetDeprecationStatus(ext, installedBundleName, nil, false) + state.resolvedRevisionMetadata = latestRollingOut + return true + } + + l.Info("no rolling-out revision matches current catalog spec hash, re-resolving bundle", + "rollingOutCount", len(state.revisionStates.RollingOut), + "currentSpecHash", currentHash, + ) + return false +} + +// versionMatchesSpec checks whether a given version string satisfies the version constraint +// in the ClusterExtension spec. Returns true if the spec has no version constraint, or if +// the version falls within the specified range. Returns false if there is a mismatch or +// if either the constraint or the version string is unparseable. +func versionMatchesSpec(version string, ext *ocv1.ClusterExtension) bool { + if ext.Spec.Source.Catalog == nil { + return true + } + specVersion := ext.Spec.Source.Catalog.Version + if specVersion == "" { + return true + } + if version == "" { + return false + } + versionRange, err := compare.NewVersionRange(specVersion) + if err != nil { + return false + } + v, err := bsemver.Parse(version) + if err != nil { + return false + } + return versionRange(v) +} + +// normalizeLabelSelector returns a deep copy of the selector with MatchExpressions +// and their Values sorted so that semantically equivalent selectors produce the +// same JSON serialization (and therefore the same hash). +func normalizeLabelSelector(sel *metav1.LabelSelector) *metav1.LabelSelector { + if sel == nil { + return nil + } + // A selector with no MatchLabels and no MatchExpressions is semantically + // equivalent to nil (both match everything). Canonicalize to nil so they + // hash identically. + if len(sel.MatchLabels) == 0 && len(sel.MatchExpressions) == 0 { + return nil + } + n := sel.DeepCopy() + for i := range n.MatchExpressions { + sort.Strings(n.MatchExpressions[i].Values) + } + sort.Slice(n.MatchExpressions, func(i, j int) bool { + a, b := n.MatchExpressions[i], n.MatchExpressions[j] + if a.Key != b.Key { + return a.Key < b.Key + } + if a.Operator != b.Operator { + return string(a.Operator) < string(b.Operator) + } + if len(a.Values) != len(b.Values) { + return len(a.Values) < len(b.Values) + } + for k := range a.Values { + if a.Values[k] != b.Values[k] { + return a.Values[k] < b.Values[k] + } + } + return false + }) + return n +} + +// CatalogSpecHash returns a SHA-256 hex digest of the resolution-relevant fields +// from the ClusterExtension's catalog spec. Two specs that would drive the resolver +// to evaluate different candidate sets produce different hashes. +func CatalogSpecHash(ext *ocv1.ClusterExtension) string { + if ext.Spec.Source.Catalog == nil { + return "" + } + cat := ext.Spec.Source.Catalog + + // Sort a copy of channels so that order differences don't change the hash. + channels := slices.Clone(cat.Channels) + sort.Strings(channels) + + data := struct { + PackageName string `json:"p"` + Version string `json:"v,omitempty"` + Channels []string `json:"ch,omitempty"` + Selector *metav1.LabelSelector `json:"s,omitempty"` + UpgradeConstraintPolicy ocv1.UpgradeConstraintPolicy `json:"u,omitempty"` + }{ + PackageName: cat.PackageName, + Version: cat.Version, + Channels: channels, + Selector: normalizeLabelSelector(cat.Selector), + UpgradeConstraintPolicy: cat.UpgradeConstraintPolicy, + } + raw, err := json.Marshal(data) + if err != nil { + return "" + } + h := sha256.Sum256(raw) + return hex.EncodeToString(h[:]) +} + // handleResolutionError handles the case when bundle resolution fails. // // Decision logic (evaluated in order): @@ -224,15 +397,15 @@ func handleResolutionError(ctx context.Context, c client.Client, state *reconcil return nil, err } - // Check if the spec is requesting a specific version that differs from installed - specVersion := "" - if ext.Spec.Source.Catalog != nil { - specVersion = ext.Spec.Source.Catalog.Version - } - installedVersion := state.revisionStates.Installed.Version - - // If spec requests a different version, we cannot fall back - must fail and retry - if specVersion != "" && specVersion != installedVersion { + // Check if the spec is requesting a version that differs from what's installed. + // Uses semver range matching so that ranges like ">=1.0.0, <2.0.0" are correctly + // recognized as satisfied by "1.0.0". + if !versionMatchesSpec(state.revisionStates.Installed.Version, ext) { + specVersion := "" + if ext.Spec.Source.Catalog != nil { + specVersion = ext.Spec.Source.Catalog.Version + } + installedVersion := state.revisionStates.Installed.Version msg := fmt.Sprintf("unable to upgrade to version %s: %v (currently installed: %s)", specVersion, err, installedVersion) l.Error(err, "resolution failed and spec requests version change - cannot fall back", "requestedVersion", specVersion, @@ -411,6 +584,7 @@ func ApplyBundle(a Applier) ReconcileStepFunc { labels.PackageNameKey: state.resolvedRevisionMetadata.Package, labels.BundleVersionKey: state.resolvedRevisionMetadata.Version, labels.BundleReferenceKey: state.resolvedRevisionMetadata.Image, + labels.SourceSpecHashKey: CatalogSpecHash(ext), } objLbls := map[string]string{ labels.OwnerKindKey: ocv1.ClusterExtensionKind, diff --git a/internal/operator-controller/controllers/suite_test.go b/internal/operator-controller/controllers/suite_test.go index 109f0b66c..598dcd05c 100644 --- a/internal/operator-controller/controllers/suite_test.go +++ b/internal/operator-controller/controllers/suite_test.go @@ -91,6 +91,19 @@ type deps struct { Validators []controllers.ClusterExtensionValidator } +// specHashFor computes the CatalogSpecHash for the given catalog filter with CRD-level +// defaults applied (UpgradeConstraintPolicy defaults to CatalogProvided). This mirrors +// the state the API server produces when the ClusterExtension is created. +func specHashFor(cat *ocv1.CatalogFilter) string { + cf := *cat + if cf.UpgradeConstraintPolicy == "" { + cf.UpgradeConstraintPolicy = ocv1.UpgradeConstraintPolicyCatalogProvided + } + return controllers.CatalogSpecHash(&ocv1.ClusterExtension{ + Spec: ocv1.ClusterExtensionSpec{Source: ocv1.SourceConfig{Catalog: &cf}}, + }) +} + func newClientAndReconciler(t *testing.T, opts ...reconcilerOption) (client.Client, *controllers.ClusterExtensionReconciler) { cl := newClient(t) diff --git a/internal/operator-controller/labels/labels.go b/internal/operator-controller/labels/labels.go index 16f45ecbb..b6d515de2 100644 --- a/internal/operator-controller/labels/labels.go +++ b/internal/operator-controller/labels/labels.go @@ -44,4 +44,11 @@ const ( // that were created during migration from Helm releases. This label is used // to distinguish migrated revisions from those created by normal Boxcutter operation. MigratedFromHelmKey = "olm.operatorframework.io/migrated-from-helm" + + // SourceSpecHashKey is the annotation key used to record a SHA-256 fingerprint + // of the resolution-relevant fields from the ClusterExtension spec at the time + // the revision was resolved. This allows the controller to detect spec changes + // (e.g. version constraint, channels, selector, upgradeConstraintPolicy) + // even when the rolling-out version still satisfies the new constraint. + SourceSpecHashKey = "olm.operatorframework.io/source-spec-hash" ) diff --git a/test/e2e/features/update.feature b/test/e2e/features/update.feature index e1b4becca..3c46760e1 100644 --- a/test/e2e/features/update.feature +++ b/test/e2e/features/update.feature @@ -243,3 +243,30 @@ Feature: Update ClusterExtension And ClusterExtensionRevision "${NAME}-2" reports Progressing as True with Reason RollingOut And ClusterExtensionRevision "${NAME}-2" reports Available as False with Reason ProbeFailure + @BoxcutterRuntime + Scenario: Version change while a revision is stuck rolling out re-resolves from catalog + Given ClusterExtension is applied + """ + apiVersion: olm.operatorframework.io/v1 + kind: ClusterExtension + metadata: + name: ${NAME} + spec: + namespace: ${TEST_NAMESPACE} + serviceAccount: + name: olm-sa + source: + sourceType: Catalog + catalog: + packageName: test + selector: + matchLabels: + "olm.operatorframework.io/metadata.name": test-catalog + version: 1.0.2 + """ + And ClusterExtensionRevision "${NAME}-1" reports Available as False with Reason ProbeFailure + When ClusterExtension is updated to version "1.2.0" + Then ClusterExtension is rolled out + And ClusterExtension is available + And bundle "test-operator.1.2.0" is installed in version "1.2.0" +