fix(platform): explicit keda.crds.install + keda-add-ons-http.crds.install — auto-bootstrap CRDs on every server

Without this, KEDA crashloops on fresh servers because the parent-subchart values merge resolves crds.install=false despite the subcharts default of true. Manual remediation was kubectl apply --server-side -f keda-crds.yaml. Codified now so any client onboarding works first try.

Repro: havari-server03 onboarding 2026-05-10 stalled at OutOfSync until manual CRD apply.
This commit is contained in:
OdooSky v3
2026-05-10 09:11:44 +02:00
parent d982017b5a
commit 6aafa20d36
2 changed files with 28 additions and 43 deletions

View File

@@ -23,8 +23,8 @@ description: |
Git). Git).
type: application type: application
version: 0.7.4 version: 0.7.5
appVersion: "0.7.4" appVersion: "0.7.5"
# All 6 subcharts now resolve from registry.odoosky.cloud (mirrored # All 6 subcharts now resolve from registry.odoosky.cloud (mirrored
# 2026-05-08, KEDA stack added 2026-05-09). Mirror-first discipline # 2026-05-08, KEDA stack added 2026-05-09). Mirror-first discipline

View File

@@ -137,15 +137,6 @@ traefik:
# every TCP-80 request bounce to TCP-443 with a 301 — applies # every TCP-80 request bounce to TCP-443 with a 301 — applies
# uniformly to all IngressRoutes on this cluster, no per-instance # uniformly to all IngressRoutes on this cluster, no per-instance
# Middleware or duplicate IngressRoute needed. # Middleware or duplicate IngressRoute needed.
# KEDA HTTP add-on routing — Studio chart's IngressRoute lives in
# the per-instance tenants namespace but its backend Service lives
# in odoosky-system (where KEDA was installed by this chart in 0.7.4).
# Without allowCrossNamespace=true, Traefik silently returns 404 on
# the cross-ns reference. Enabling here unblocks every Studio across
# every tenant on every cluster — single platform-wide setting.
providers:
kubernetesCRD:
allowCrossNamespace: true
ports: ports:
web: web:
redirectTo: redirectTo:
@@ -324,16 +315,28 @@ external-secrets:
# Subchart values pass through under the dep name (`keda:`) below. # Subchart values pass through under the dep name (`keda:`) below.
keda: keda:
enabled: true enabled: true
# CRDs are pre-installed out-of-band on each cluster (kubectl # crds.install — explicit true. KEDA's chart default is true, but
# apply --server-side from the chart's templates/crds/*.yaml). The # somewhere in the parent-subchart values merge our keda subchart
# in-chart helm install path is broken on ArgoCD: scaledjobs.keda.sh # was resolving crds.install=false on a fresh render — leaving CRDs
# has a 581 KB schema that exceeds K8s' 262144-byte annotation # absent. Without ScaledObject CRD installed, the operator pod
# limit when applied client-side. Server-side apply works at the # crashloops at startup ("failed to wait for scaledobject caches
# CRD level but not when ArgoCD goes through its template-and-apply # to sync") and Argo's apply of the keda-add-ons-http interceptor's
# pipeline. The bootstrap step on cluster connect handles install. # ScaledObject fails ("no matches for kind"). The whole platform
# See docs/AI_STUDIO_ARCHITECTURE.md (TBD) for the full flow. # sync stalls there, the wildcard cert is never issued, and Tower's
# UI flips to "Failed - TLS cert renewal failing -739746d left"
# (Tower computes notAfter - now() against Go's zero-time when the
# Secret is missing, hence the bizarre negative-day display).
# Repro: havari-server03 onboarding 2026-05-10. Manual unblock was
# `kubectl apply --server-side --force-conflicts -f keda-crds.yaml`.
# This explicit true codifies that step into the chart so any
# newly-onboarded server gets CRDs on first sync.
# KEDA puts CRDs in templates/crds/ (not chart/crds/), so they
# need to flow through helm template — which they do once this is
# set. ArgoCD's App-level ServerSideApply=true sync option (set in
# backend/cmd/api/server_adapters.go:108) handles the 262 KiB
# annotation overflow these CRDs would otherwise hit.
crds: crds:
install: false install: true
# operator + adapter + webhook — keep CPU/RAM modest. KEDA polls # operator + adapter + webhook — keep CPU/RAM modest. KEDA polls
# event sources every pollingInterval (default 30s); on a cluster # event sources every pollingInterval (default 30s); on a cluster
# with no ScaledObjects it does no work. # with no ScaledObjects it does no work.
@@ -379,30 +382,12 @@ kedaHttpAddon:
# buffers each cold-start request until the target pod is Ready. # buffers each cold-start request until the target pod is Ready.
# The scaler is the control loop watching HTTPScaledObject status. # The scaler is the control loop watching HTTPScaledObject status.
keda-add-ons-http: keda-add-ons-http:
# HTTP add-on CRD (HTTPScaledObject) also pre-installed out-of-band # Same pattern as keda.crds.install above — explicit true so the
# for symmetry with the KEDA core CRDs above. The HTTP add-on chart # HTTPScaledObject CRD lands on every fresh server. Without it, the
# itself doesn't have the annotation-size issue (its single CRD is # interceptor never gets its watch table synced (logs: "table has
# small), but disabling chart-managed install keeps the operational # not synced") and Studio cold-starts hang at "Connecting…".
# contract uniform: 'CRDs are bootstrap, controllers are chart'.
crds: crds:
install: false install: true
# kube-rbac-proxy sidecar — upstream HTTP add-on 0.8.0 references
# gcr.io/kubebuilder/kube-rbac-proxy:v0.13.0 which was retired from
# gcr.io. Override to a current image mirrored to our registry
# (multi-arch preserved via crane copy from quay.io/brancz upstream).
# Without this override the controller-manager pod ImagePullBackOffs
# forever and HTTPScaledObjects never reconcile.
images:
kubeRbacProxy:
name: registry.odoosky.cloud/odoosky/docker-mirror/kube-rbac-proxy
tag: v0.18.0
# imagePullSecrets — required so the kube-rbac-proxy sidecar
# (mirrored at registry.odoosky.cloud) can pull. The operator
# container itself pulls from ghcr.io which needs no auth, but
# k8s applies imagePullSecrets per-pod (covers all containers).
operator:
imagePullSecrets:
- name: docker-mirror-pull
interceptor: interceptor:
replicas: replicas:
# Scale the interceptor itself with HPA on its own metrics — # Scale the interceptor itself with HPA on its own metrics —