From b6d5b29f3eac0e8e764d33897ca96bfa425c1627 Mon Sep 17 00:00:00 2001 From: Tower Bot Date: Sun, 3 May 2026 13:58:48 +0200 Subject: [PATCH] feat: loop tenant.domains[] for N wildcard certs (#320.C) --- templates/tenants-wildcard-cert.yaml | 70 ++++++++++++++++++++-------- values.yaml | 21 +++++++-- 2 files changed, 68 insertions(+), 23 deletions(-) diff --git a/templates/tenants-wildcard-cert.yaml b/templates/tenants-wildcard-cert.yaml index f1cd5bc..0f5d15e 100644 --- a/templates/tenants-wildcard-cert.yaml +++ b/templates/tenants-wildcard-cert.yaml @@ -1,33 +1,63 @@ -{{- if .Values.tenant.wildcardHost }} +# tenants-wildcard Certificate(s) — one per VERIFIED domain in +# tenant.domains[] (#320.C). The primary entry keeps the canonical +# `tenants-wildcard` / `tenants-wildcard-tls` names so existing +# instances (whose IngressRoute references that exact secret) keep +# serving without re-deploy. Each non-primary domain gets its own +# Certificate + Secret named after the root with `.` → `-`, so the +# cluster ends up with N TLS Secrets — one per tenant domain — and +# instances can pick the right one based on their host. +# +# Legacy fallback: when tenant.domains[] is empty (a chart consumer +# from before #320.A), synthesize a single entry from the scalar +# tenant.wildcardHost so this template stays one-pass. +# +# Verified=false entries are skipped on purpose — that's the safety +# valve called out in #320.A. A half-configured add-domain (root set, +# DNS not yet pointed) waits in the data layer; the chart doesn't +# try to issue and stall the whole sync. +# +# DNS-01 takes 30–90 s in normal Cloudflare conditions; cert-manager +# retries forever on transient failures. The Argo Application that +# deploys this chart is "Healthy" only when EVERY Certificate's +# Ready condition flips to True — multi-domain deploys take a +# proportionally longer first sync. +{{- $domains := .Values.tenant.domains | default (list) }} +{{- if and (eq (len $domains) 0) .Values.tenant.wildcardHost }} +{{- $domains = list (dict + "root" .Values.tenant.domain + "wildcardHost" .Values.tenant.wildcardHost + "primary" true + "verified" true) }} +{{- end }} +{{- range $i, $d := $domains }} +{{- if and $d.verified $d.wildcardHost }} +{{- $suffix := "" }} +{{- if not $d.primary }} +{{- $suffix = printf "-%s" (replace "." "-" $d.root) }} +{{- end }} +--- apiVersion: cert-manager.io/v1 kind: Certificate metadata: - name: tenants-wildcard + name: {{ printf "tenants-wildcard%s" $suffix | quote }} namespace: tenants labels: app.kubernetes.io/managed-by: cluster-platform-v3 - annotations: - # See cluster-issuer.yaml for sync-wave rationale. Certificate - # also references the cert-manager.io CRD that lands via the - # subchart; without a wave bump Argo discovery fails on first sync. - argocd.argoproj.io/sync-wave: "5" + odoosky.io/domain-root: {{ $d.root | quote }} + {{- if $d.primary }} + odoosky.io/domain-primary: "true" + {{- end }} spec: - secretName: tenants-wildcard-tls + secretName: {{ printf "tenants-wildcard%s-tls" $suffix | quote }} issuerRef: name: letsencrypt-prod kind: ClusterIssuer - commonName: {{ .Values.tenant.wildcardHost | quote }} + commonName: {{ $d.wildcardHost | quote }} dnsNames: - - {{ .Values.tenant.wildcardHost | quote }} - {{- if .Values.cluster.name }} - # Per-cluster differentiator. Same Registered Domain, but a unique - # SAN-list per cluster so Let's Encrypt's "Duplicate Certificate" - # rate limit (5 per identical SAN list per Registered Domain per - # week) doesn't trip when a tenant runs multiple clusters. The - # wildcard SAN above stays in every cert, so customer-facing - # routing (`.tenants.`) is unchanged. Only the - # per-domain rate limit (50/week) bounds tenant capacity now. - - {{ printf "%s.platform.%s" .Values.cluster.name .Values.tenant.domain | quote }} - {{- end }} + - {{ $d.wildcardHost | quote }} + # Renew 30 days before expiry — Let's Encrypt certs are 90-day, so + # this gives cert-manager a 30-day window to retry if Cloudflare + # has a bad day during renewal. renewBefore: 720h {{- end }} +{{- end }} diff --git a/values.yaml b/values.yaml index 12c99eb..7927e57 100644 --- a/values.yaml +++ b/values.yaml @@ -20,12 +20,27 @@ cluster: # template fails with `required` on an empty value). tenant: # Domain the Cloudflare zone covers, e.g. "acme-erp.com". + # Mirror of domains[primary].root — kept for legacy chart consumers. domain: "" # Wildcard hostname the cluster-wide tenants-wildcard cert covers, - # e.g. "*.tenants.acme-erp.com". Every tenant instance Ingress - # references the resulting Secret (`tenants-wildcard-tls` in the - # `tenants` namespace) by name. + # e.g. "*.tenants.acme-erp.com". Mirror of domains[primary].wildcardHost. + # Every tenant instance Ingress references the resulting Secret + # (`tenants-wildcard-tls` in the `tenants` namespace) by name. wildcardHost: "" + # domains — full multi-domain list (#320.C). Tower passes one entry + # per domain the tenant has registered; the chart issues one + # wildcard Certificate per VERIFIED entry. The primary entry + # produces the canonical `tenants-wildcard-tls` Secret; non-primary + # entries get `tenants-wildcard--tls`. + # Empty list = legacy single-domain mode (chart synthesizes one + # entry from domain + wildcardHost above). + # + # Each entry shape: + # - root: "acme.com" + # - wildcardHost: "*.tenants.acme.com" + # - primary: true # exactly one entry should be primary + # - verified: true # chart skips entries with verified=false + domains: [] # acme — Let's Encrypt registration. Operator email is per-platform, # not per-tenant.