feat(slice 2B.1): SkipHealthCheck on tenants-wildcard Cert (chart 0.5.4)
This commit is contained in:
@@ -23,8 +23,8 @@ description: |
|
||||
Git).
|
||||
|
||||
type: application
|
||||
version: 0.5.3
|
||||
appVersion: "0.5.3"
|
||||
version: 0.5.4
|
||||
appVersion: "0.5.4"
|
||||
|
||||
dependencies:
|
||||
- name: cert-manager
|
||||
|
||||
@@ -16,11 +16,28 @@
|
||||
# DNS not yet pointed) waits in the data layer; the chart doesn't
|
||||
# try to issue and stall the whole sync.
|
||||
#
|
||||
# DNS-01 takes 30–90 s in normal Cloudflare conditions; cert-manager
|
||||
# retries forever on transient failures. The Argo Application that
|
||||
# deploys this chart is "Healthy" only when EVERY Certificate's
|
||||
# Ready condition flips to True — multi-domain deploys take a
|
||||
# proportionally longer first sync.
|
||||
# DNS-01 takes 30–90 s on a fast day, 5–10 min on a slow one
|
||||
# (Cloudflare zone propagation + LE order processing). Until Slice
|
||||
# 2B.1 (2026-05-04) the wildcard Certificate's Ready status gated
|
||||
# the entire Argo Application's Health — meaning Connect Server
|
||||
# sat at "Provisioning…" for the full 5–10 min before substrate
|
||||
# became "Ready", even though all the BASE infra (longhorn,
|
||||
# cert-manager, traefik, registry) was up within ~30 s.
|
||||
#
|
||||
# The annotation `argocd.argoproj.io/sync-options: SkipHealthCheck=true`
|
||||
# below tells Argo "still sync this resource, but don't include
|
||||
# its Ready status when computing the parent Application's Health".
|
||||
# Result: substrate becomes Ready in ~30 s; the wildcard issues in
|
||||
# the background.
|
||||
#
|
||||
# Tradeoff: an instance deployed inside the first ~5 min after
|
||||
# Connect references a Secret (`tenants-wildcard-tls`) that doesn't
|
||||
# exist yet — its IngressRoute is healthy but TLS is unavailable.
|
||||
# Slice 2B.2 will plumb a per-host HTTP-01 fallback so the very
|
||||
# first deploy is also fast. Until then the operator should know:
|
||||
# Substrate Ready ≠ wildcard ready. Watch for the Secret to appear
|
||||
# (`kubectl -n tenants get secret tenants-wildcard-tls`) before the
|
||||
# first deploy on a fresh cluster.
|
||||
{{- $domains := .Values.tenant.domains | default (list) }}
|
||||
{{- if and (eq (len $domains) 0) .Values.tenant.wildcardHost }}
|
||||
{{- $domains = list (dict
|
||||
@@ -47,6 +64,16 @@ metadata:
|
||||
{{- if $d.primary }}
|
||||
odoosky.io/domain-primary: "true"
|
||||
{{- end }}
|
||||
annotations:
|
||||
# Slice 2B.1 — substrate Ready in ~30 s. Argo will still
|
||||
# sync this Certificate (cert-manager will issue it via
|
||||
# DNS-01 in the background), but its Ready condition does
|
||||
# NOT gate the parent Application's Health calculation. So
|
||||
# the cluster-platform-v3 App flips Healthy as soon as the
|
||||
# base components (longhorn + cert-manager + traefik +
|
||||
# registry) are up, instead of waiting 5–10 min for LE to
|
||||
# finish the wildcard issuance.
|
||||
argocd.argoproj.io/sync-options: SkipHealthCheck=true
|
||||
spec:
|
||||
secretName: {{ printf "tenants-wildcard%s-tls" $suffix | quote }}
|
||||
issuerRef:
|
||||
|
||||
Reference in New Issue
Block a user