From d60206344808cc6d3feeef5979a89d2c96ff9f06 Mon Sep 17 00:00:00 2001 From: OdooSky v3 Date: Sat, 9 May 2026 21:30:36 +0300 Subject: [PATCH] =?UTF-8?q?chart=200.7.3=20=E2=80=94=20slug-suffix=20per-t?= =?UTF-8?q?enant=20ClusterIssuer=20(qsoft2=20SSL=20fix)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit cluster-issuer.yaml: name → letsencrypt-prod-{{ tenant.slug }}, hard-pin apiTokenSecretRef.name to cloudflare-api-token-{{ tenant.slug }} so it matches the ESO-created Secret. ACME account key also slug-suffixed for tenant isolation. Pre-0.7.3 the unsuffixed letsencrypt-prod mismatched what instance.go:504 stamps into per-instance Certificates (letsencrypt-prod-), so cert-manager logged 'Referenced ClusterIssuer not found' and erp2 served Traefik default cert forever. tenants-wildcard-cert.yaml: issuerRef.name → letsencrypt-prod-{{ $.Values.tenant.slug }} to match the renamed ClusterIssuer. values.yaml: secrets.cloudflareTokenSecret block deprecated (the chart no longer reads it; kept for back-compat with external overrides). Diagnosed in the qsoft2 migrate test 2026-05-09. --- Chart.yaml | 4 +-- templates/cluster-issuer.yaml | 41 +++++++++++++++++++++------- templates/tenants-wildcard-cert.yaml | 6 +++- values.yaml | 15 ++++++---- 4 files changed, 48 insertions(+), 18 deletions(-) diff --git a/Chart.yaml b/Chart.yaml index 1a97f4a..6e8b5ea 100644 --- a/Chart.yaml +++ b/Chart.yaml @@ -23,8 +23,8 @@ description: | Git). type: application -version: 0.7.2 -appVersion: "0.7.2" +version: 0.7.3 +appVersion: "0.7.3" # All 4 subcharts now resolve from registry.odoosky.cloud (mirrored # 2026-05-08). Mirror-first discipline + China-region readiness: a diff --git a/templates/cluster-issuer.yaml b/templates/cluster-issuer.yaml index d421d52..162c35c 100644 --- a/templates/cluster-issuer.yaml +++ b/templates/cluster-issuer.yaml @@ -1,5 +1,15 @@ -{{- if .Values.tenant.domain }} -# letsencrypt-prod ClusterIssuer — DNS-01 challenge via Cloudflare. +{{- if and .Values.tenant.domain .Values.tenant.slug }} +# letsencrypt-prod- ClusterIssuer — DNS-01 challenge via Cloudflare, +# scoped to THIS tenant via the per-tenant CF token Secret. The +# `letsencrypt-prod-` naming MUST match tenantClusterIssuerName() +# in backend/cmd/api/tenant_substrate.go — the per-instance overlay +# renderer in instance.go:504 stamps that exact name into every +# Certificate's issuerRef. Pre-0.7.3 charts used the unsuffixed name +# `letsencrypt-prod`, which broke for any instance asking for the +# slugged form (the qsoft2 migrate test on 2026-05-09 surfaced this: +# erp2's Certificate referenced letsencrypt-prod-qsoft, the chart only +# rendered letsencrypt-prod, cert-manager logged "Referenced ClusterIssuer +# not found", erp2 served the Traefik default cert forever). # # Multi-zone: the solver has NO `selector.dnsZones` restriction. The # tenant's Cloudflare token typically covers many zones (a tenant with @@ -13,10 +23,10 @@ # `4th.online`). Dropping the selector unifies single-zone and # multi-zone tenants under one issuer. # -# The cloudflare-api-token Secret is NOT in this chart. Tower -# kubectl-applies it into cert-manager ns at Connect time using the -# tenant's per-tenant Vault credential (v3/tenants//cloudflare-token). -# The chart references it by name only. +# The cloudflare-api-token- Secret is now chart-managed via the +# ESO ExternalSecret in cloudflare-api-token-externalsecret.yaml (which +# pulls the token from OpenBao at v3/tenants//cloudflare-token). +# Naming kept symmetric with that template. # # Sync wave 1 (Slice 2B.1.1, 2026-05-04). cert-manager itself # installs at the default wave 0; Argo waits for ALL wave-0 @@ -32,24 +42,35 @@ # (in tenants-wildcard-cert.yaml) — Certificate references the # ClusterIssuer by name, so the resource graph also reflects the # logical dependency. +# +# Multi-tenant clusters (visiting tenants on a host tenant's cluster) +# remain a known gap (Item #9 follow-up): the ESO ExternalSecret loop +# only iterates the cluster-owner tenant. When a future deploy lands a +# non-owner tenant on a cluster, that tenant's CF Secret + Issuer must +# be applied out-of-band until this template grows a `Values.tenants[]` +# loop and Tower's onboarding code populates it. apiVersion: cert-manager.io/v1 kind: ClusterIssuer metadata: - name: letsencrypt-prod + name: letsencrypt-prod-{{ .Values.tenant.slug }} annotations: argocd.argoproj.io/sync-wave: "1" labels: app.kubernetes.io/managed-by: cluster-platform-v3 + odoosky.io/tenant: {{ .Values.tenant.id | quote }} spec: acme: email: {{ required "acme.email is required" .Values.acme.email | quote }} server: {{ .Values.acme.server | quote }} privateKeySecretRef: - name: letsencrypt-prod-account-key + # Slug-suffixed so each tenant has its own ACME account key — + # cleaner isolation if a tenant rotates / audits, and avoids + # implicit shared state if two tenants ever land on one cluster. + name: letsencrypt-prod-account-key-{{ .Values.tenant.slug }} solvers: - dns01: cloudflare: apiTokenSecretRef: - name: {{ .Values.secrets.cloudflareTokenSecret.name | quote }} - key: {{ .Values.secrets.cloudflareTokenSecret.key | quote }} + name: cloudflare-api-token-{{ .Values.tenant.slug }} + key: api-token {{- end }} diff --git a/templates/tenants-wildcard-cert.yaml b/templates/tenants-wildcard-cert.yaml index 09d19b2..f75a3c5 100644 --- a/templates/tenants-wildcard-cert.yaml +++ b/templates/tenants-wildcard-cert.yaml @@ -96,7 +96,11 @@ metadata: spec: secretName: {{ printf "tenants-wildcard%s-tls" $suffix | quote }} issuerRef: - name: letsencrypt-prod + # Slug-suffixed since chart 0.7.3 — matches the ClusterIssuer + # name rendered by cluster-issuer.yaml. Pre-0.7.3 this was the + # unsuffixed `letsencrypt-prod`. See cluster-issuer.yaml's + # docstring for the why. + name: letsencrypt-prod-{{ $.Values.tenant.slug }} kind: ClusterIssuer commonName: {{ $d.wildcardHost | quote }} dnsNames: diff --git a/values.yaml b/values.yaml index 55d3150..fe42ad2 100644 --- a/values.yaml +++ b/values.yaml @@ -143,14 +143,19 @@ traefik: port: websecure priority: 10 -# secrets — Tower applies these out-of-band via the registered -# kubeconfig at Connect time (B2). The chart references them by -# name only; values never enter Git. +# secrets — DEPRECATED for cloudflareTokenSecret as of chart 0.7.3. +# The cluster-issuer.yaml template now hard-references +# `cloudflare-api-token-` (matches the ESO-created Secret +# in cloudflare-api-token-externalsecret.yaml) and ignores this block. +# Kept here as no-op back-compat for any external chart consumer that +# overrides these values; chart templates no longer read +# secrets.cloudflareTokenSecret. s3CredentialsSecret is still consumed +# by the per-instance backup CronJob path and remains live. secrets: cloudflareTokenSecret: namespace: odoosky-system - name: cloudflare-api-token - key: api-token + name: cloudflare-api-token # unused since 0.7.3; chart computes from tenant.slug + key: api-token # unused since 0.7.3 s3CredentialsSecret: namespace: tenants name: s3-backup-creds