feat(slice 2B.1.1): sync waves — kill the cert-manager-webhook race (chart 0.5.5)
This commit is contained in:
@@ -23,8 +23,8 @@ description: |
|
||||
Git).
|
||||
|
||||
type: application
|
||||
version: 0.5.4
|
||||
appVersion: "0.5.4"
|
||||
version: 0.5.5
|
||||
appVersion: "0.5.5"
|
||||
|
||||
dependencies:
|
||||
- name: cert-manager
|
||||
|
||||
@@ -18,13 +18,26 @@
|
||||
# tenant's per-tenant Vault credential (v3/tenants/<id>/cloudflare-token).
|
||||
# The chart references it by name only.
|
||||
#
|
||||
# Sync wave: needs to land AFTER cert-manager's CRDs are installed
|
||||
# (cert-manager dep installs first); Argo's default ordering by kind
|
||||
# handles this.
|
||||
# Sync wave 1 (Slice 2B.1.1, 2026-05-04). cert-manager itself
|
||||
# installs at the default wave 0; Argo waits for ALL wave-0
|
||||
# resources (cert-manager Deployments + webhook Service) to be
|
||||
# Healthy before applying wave 1. Without this we hit a race:
|
||||
# Argo applied this ClusterIssuer in the same wave as cert-manager
|
||||
# Deployments → cert-manager-webhook wasn't Ready yet → admission
|
||||
# webhook rejected the resource → Argo backed off exponentially
|
||||
# 30-90s before retrying. retries=2 was the smoking gun in the
|
||||
# demo-server105 timing analysis (3 min ready instead of ~45 s).
|
||||
#
|
||||
# Note ordering: ClusterIssuer at wave 1, Certificate at wave 2
|
||||
# (in tenants-wildcard-cert.yaml) — Certificate references the
|
||||
# ClusterIssuer by name, so the resource graph also reflects the
|
||||
# logical dependency.
|
||||
apiVersion: cert-manager.io/v1
|
||||
kind: ClusterIssuer
|
||||
metadata:
|
||||
name: letsencrypt-prod
|
||||
annotations:
|
||||
argocd.argoproj.io/sync-wave: "1"
|
||||
labels:
|
||||
app.kubernetes.io/managed-by: cluster-platform-v3
|
||||
spec:
|
||||
|
||||
@@ -74,6 +74,14 @@ metadata:
|
||||
# registry) are up, instead of waiting 5–10 min for LE to
|
||||
# finish the wildcard issuance.
|
||||
argocd.argoproj.io/sync-options: SkipHealthCheck=true
|
||||
# Slice 2B.1.1 — wave 2: apply AFTER the ClusterIssuer
|
||||
# (wave 1) which depends on cert-manager (wave 0 default).
|
||||
# Argo enforces strict wave ordering with health-gating
|
||||
# between waves, so the Certificate never lands before its
|
||||
# ClusterIssuer exists or before cert-manager-webhook is
|
||||
# accepting admission requests. Eliminates the retries=2
|
||||
# exponential-backoff penalty observed on demo-server105.
|
||||
argocd.argoproj.io/sync-wave: "2"
|
||||
spec:
|
||||
secretName: {{ printf "tenants-wildcard%s-tls" $suffix | quote }}
|
||||
issuerRef:
|
||||
|
||||
Reference in New Issue
Block a user