feat(slice 2B.1.1): sync waves — kill the cert-manager-webhook race (chart 0.5.5)
This commit is contained in:
@@ -23,8 +23,8 @@ description: |
|
|||||||
Git).
|
Git).
|
||||||
|
|
||||||
type: application
|
type: application
|
||||||
version: 0.5.4
|
version: 0.5.5
|
||||||
appVersion: "0.5.4"
|
appVersion: "0.5.5"
|
||||||
|
|
||||||
dependencies:
|
dependencies:
|
||||||
- name: cert-manager
|
- name: cert-manager
|
||||||
|
|||||||
@@ -18,13 +18,26 @@
|
|||||||
# tenant's per-tenant Vault credential (v3/tenants/<id>/cloudflare-token).
|
# tenant's per-tenant Vault credential (v3/tenants/<id>/cloudflare-token).
|
||||||
# The chart references it by name only.
|
# The chart references it by name only.
|
||||||
#
|
#
|
||||||
# Sync wave: needs to land AFTER cert-manager's CRDs are installed
|
# Sync wave 1 (Slice 2B.1.1, 2026-05-04). cert-manager itself
|
||||||
# (cert-manager dep installs first); Argo's default ordering by kind
|
# installs at the default wave 0; Argo waits for ALL wave-0
|
||||||
# handles this.
|
# resources (cert-manager Deployments + webhook Service) to be
|
||||||
|
# Healthy before applying wave 1. Without this we hit a race:
|
||||||
|
# Argo applied this ClusterIssuer in the same wave as cert-manager
|
||||||
|
# Deployments → cert-manager-webhook wasn't Ready yet → admission
|
||||||
|
# webhook rejected the resource → Argo backed off exponentially
|
||||||
|
# 30-90s before retrying. retries=2 was the smoking gun in the
|
||||||
|
# demo-server105 timing analysis (3 min ready instead of ~45 s).
|
||||||
|
#
|
||||||
|
# Note ordering: ClusterIssuer at wave 1, Certificate at wave 2
|
||||||
|
# (in tenants-wildcard-cert.yaml) — Certificate references the
|
||||||
|
# ClusterIssuer by name, so the resource graph also reflects the
|
||||||
|
# logical dependency.
|
||||||
apiVersion: cert-manager.io/v1
|
apiVersion: cert-manager.io/v1
|
||||||
kind: ClusterIssuer
|
kind: ClusterIssuer
|
||||||
metadata:
|
metadata:
|
||||||
name: letsencrypt-prod
|
name: letsencrypt-prod
|
||||||
|
annotations:
|
||||||
|
argocd.argoproj.io/sync-wave: "1"
|
||||||
labels:
|
labels:
|
||||||
app.kubernetes.io/managed-by: cluster-platform-v3
|
app.kubernetes.io/managed-by: cluster-platform-v3
|
||||||
spec:
|
spec:
|
||||||
|
|||||||
@@ -74,6 +74,14 @@ metadata:
|
|||||||
# registry) are up, instead of waiting 5–10 min for LE to
|
# registry) are up, instead of waiting 5–10 min for LE to
|
||||||
# finish the wildcard issuance.
|
# finish the wildcard issuance.
|
||||||
argocd.argoproj.io/sync-options: SkipHealthCheck=true
|
argocd.argoproj.io/sync-options: SkipHealthCheck=true
|
||||||
|
# Slice 2B.1.1 — wave 2: apply AFTER the ClusterIssuer
|
||||||
|
# (wave 1) which depends on cert-manager (wave 0 default).
|
||||||
|
# Argo enforces strict wave ordering with health-gating
|
||||||
|
# between waves, so the Certificate never lands before its
|
||||||
|
# ClusterIssuer exists or before cert-manager-webhook is
|
||||||
|
# accepting admission requests. Eliminates the retries=2
|
||||||
|
# exponential-backoff penalty observed on demo-server105.
|
||||||
|
argocd.argoproj.io/sync-wave: "2"
|
||||||
spec:
|
spec:
|
||||||
secretName: {{ printf "tenants-wildcard%s-tls" $suffix | quote }}
|
secretName: {{ printf "tenants-wildcard%s-tls" $suffix | quote }}
|
||||||
issuerRef:
|
issuerRef:
|
||||||
|
|||||||
Reference in New Issue
Block a user