feat(slice 2B.1.1): sync waves — kill the cert-manager-webhook race (chart 0.5.5)

This commit is contained in:
OdooSky v3
2026-05-04 13:09:47 +03:00
parent 46e8309153
commit 252ac78888
3 changed files with 26 additions and 5 deletions

View File

@@ -23,8 +23,8 @@ description: |
Git).
type: application
version: 0.5.4
appVersion: "0.5.4"
version: 0.5.5
appVersion: "0.5.5"
dependencies:
- name: cert-manager

View File

@@ -18,13 +18,26 @@
# tenant's per-tenant Vault credential (v3/tenants/<id>/cloudflare-token).
# The chart references it by name only.
#
# Sync wave: needs to land AFTER cert-manager's CRDs are installed
# (cert-manager dep installs first); Argo's default ordering by kind
# handles this.
# Sync wave 1 (Slice 2B.1.1, 2026-05-04). cert-manager itself
# installs at the default wave 0; Argo waits for ALL wave-0
# resources (cert-manager Deployments + webhook Service) to be
# Healthy before applying wave 1. Without this we hit a race:
# Argo applied this ClusterIssuer in the same wave as cert-manager
# Deployments → cert-manager-webhook wasn't Ready yet → admission
# webhook rejected the resource → Argo backed off exponentially
# 30-90s before retrying. retries=2 was the smoking gun in the
# demo-server105 timing analysis (3 min ready instead of ~45 s).
#
# Note ordering: ClusterIssuer at wave 1, Certificate at wave 2
# (in tenants-wildcard-cert.yaml) — Certificate references the
# ClusterIssuer by name, so the resource graph also reflects the
# logical dependency.
apiVersion: cert-manager.io/v1
kind: ClusterIssuer
metadata:
name: letsencrypt-prod
annotations:
argocd.argoproj.io/sync-wave: "1"
labels:
app.kubernetes.io/managed-by: cluster-platform-v3
spec:

View File

@@ -74,6 +74,14 @@ metadata:
# registry) are up, instead of waiting 510 min for LE to
# finish the wildcard issuance.
argocd.argoproj.io/sync-options: SkipHealthCheck=true
# Slice 2B.1.1 — wave 2: apply AFTER the ClusterIssuer
# (wave 1) which depends on cert-manager (wave 0 default).
# Argo enforces strict wave ordering with health-gating
# between waves, so the Certificate never lands before its
# ClusterIssuer exists or before cert-manager-webhook is
# accepting admission requests. Eliminates the retries=2
# exponential-backoff penalty observed on demo-server105.
argocd.argoproj.io/sync-wave: "2"
spec:
secretName: {{ printf "tenants-wildcard%s-tls" $suffix | quote }}
issuerRef: