feat: loop tenant.domains[] for N wildcard certs (#320.C)

This commit is contained in:
Tower Bot
2026-05-03 13:58:48 +02:00
parent 0213a0b513
commit b6d5b29f3e
2 changed files with 68 additions and 23 deletions

View File

@@ -1,33 +1,63 @@
{{- if .Values.tenant.wildcardHost }}
# tenants-wildcard Certificate(s) — one per VERIFIED domain in
# tenant.domains[] (#320.C). The primary entry keeps the canonical
# `tenants-wildcard` / `tenants-wildcard-tls` names so existing
# instances (whose IngressRoute references that exact secret) keep
# serving without re-deploy. Each non-primary domain gets its own
# Certificate + Secret named after the root with `.` → `-`, so the
# cluster ends up with N TLS Secrets — one per tenant domain — and
# instances can pick the right one based on their host.
#
# Legacy fallback: when tenant.domains[] is empty (a chart consumer
# from before #320.A), synthesize a single entry from the scalar
# tenant.wildcardHost so this template stays one-pass.
#
# Verified=false entries are skipped on purpose — that's the safety
# valve called out in #320.A. A half-configured add-domain (root set,
# DNS not yet pointed) waits in the data layer; the chart doesn't
# try to issue and stall the whole sync.
#
# DNS-01 takes 3090 s in normal Cloudflare conditions; cert-manager
# retries forever on transient failures. The Argo Application that
# deploys this chart is "Healthy" only when EVERY Certificate's
# Ready condition flips to True — multi-domain deploys take a
# proportionally longer first sync.
{{- $domains := .Values.tenant.domains | default (list) }}
{{- if and (eq (len $domains) 0) .Values.tenant.wildcardHost }}
{{- $domains = list (dict
"root" .Values.tenant.domain
"wildcardHost" .Values.tenant.wildcardHost
"primary" true
"verified" true) }}
{{- end }}
{{- range $i, $d := $domains }}
{{- if and $d.verified $d.wildcardHost }}
{{- $suffix := "" }}
{{- if not $d.primary }}
{{- $suffix = printf "-%s" (replace "." "-" $d.root) }}
{{- end }}
---
apiVersion: cert-manager.io/v1
kind: Certificate
metadata:
name: tenants-wildcard
name: {{ printf "tenants-wildcard%s" $suffix | quote }}
namespace: tenants
labels:
app.kubernetes.io/managed-by: cluster-platform-v3
annotations:
# See cluster-issuer.yaml for sync-wave rationale. Certificate
# also references the cert-manager.io CRD that lands via the
# subchart; without a wave bump Argo discovery fails on first sync.
argocd.argoproj.io/sync-wave: "5"
odoosky.io/domain-root: {{ $d.root | quote }}
{{- if $d.primary }}
odoosky.io/domain-primary: "true"
{{- end }}
spec:
secretName: tenants-wildcard-tls
secretName: {{ printf "tenants-wildcard%s-tls" $suffix | quote }}
issuerRef:
name: letsencrypt-prod
kind: ClusterIssuer
commonName: {{ .Values.tenant.wildcardHost | quote }}
commonName: {{ $d.wildcardHost | quote }}
dnsNames:
- {{ .Values.tenant.wildcardHost | quote }}
{{- if .Values.cluster.name }}
# Per-cluster differentiator. Same Registered Domain, but a unique
# SAN-list per cluster so Let's Encrypt's "Duplicate Certificate"
# rate limit (5 per identical SAN list per Registered Domain per
# week) doesn't trip when a tenant runs multiple clusters. The
# wildcard SAN above stays in every cert, so customer-facing
# routing (`<instance>.tenants.<domain>`) is unchanged. Only the
# per-domain rate limit (50/week) bounds tenant capacity now.
- {{ printf "%s.platform.%s" .Values.cluster.name .Values.tenant.domain | quote }}
{{- end }}
- {{ $d.wildcardHost | quote }}
# Renew 30 days before expiry — Let's Encrypt certs are 90-day, so
# this gives cert-manager a 30-day window to retry if Cloudflare
# has a bad day during renewal.
renewBefore: 720h
{{- end }}
{{- end }}