diff --git a/Chart.yaml b/Chart.yaml index 2afa6df..0604840 100644 --- a/Chart.yaml +++ b/Chart.yaml @@ -23,8 +23,8 @@ description: | Git). type: application -version: 0.7.4 -appVersion: "0.7.4" +version: 0.7.5 +appVersion: "0.7.5" # All 6 subcharts now resolve from registry.odoosky.cloud (mirrored # 2026-05-08, KEDA stack added 2026-05-09). Mirror-first discipline diff --git a/values.yaml b/values.yaml index 78a0740..d18cd46 100644 --- a/values.yaml +++ b/values.yaml @@ -137,15 +137,6 @@ traefik: # every TCP-80 request bounce to TCP-443 with a 301 — applies # uniformly to all IngressRoutes on this cluster, no per-instance # Middleware or duplicate IngressRoute needed. - # KEDA HTTP add-on routing — Studio chart's IngressRoute lives in - # the per-instance tenants namespace but its backend Service lives - # in odoosky-system (where KEDA was installed by this chart in 0.7.4). - # Without allowCrossNamespace=true, Traefik silently returns 404 on - # the cross-ns reference. Enabling here unblocks every Studio across - # every tenant on every cluster — single platform-wide setting. - providers: - kubernetesCRD: - allowCrossNamespace: true ports: web: redirectTo: @@ -324,16 +315,28 @@ external-secrets: # Subchart values pass through under the dep name (`keda:`) below. keda: enabled: true - # CRDs are pre-installed out-of-band on each cluster (kubectl - # apply --server-side from the chart's templates/crds/*.yaml). The - # in-chart helm install path is broken on ArgoCD: scaledjobs.keda.sh - # has a 581 KB schema that exceeds K8s' 262144-byte annotation - # limit when applied client-side. Server-side apply works at the - # CRD level but not when ArgoCD goes through its template-and-apply - # pipeline. The bootstrap step on cluster connect handles install. - # See docs/AI_STUDIO_ARCHITECTURE.md (TBD) for the full flow. + # crds.install — explicit true. KEDA's chart default is true, but + # somewhere in the parent-subchart values merge our keda subchart + # was resolving crds.install=false on a fresh render — leaving CRDs + # absent. Without ScaledObject CRD installed, the operator pod + # crashloops at startup ("failed to wait for scaledobject caches + # to sync") and Argo's apply of the keda-add-ons-http interceptor's + # ScaledObject fails ("no matches for kind"). The whole platform + # sync stalls there, the wildcard cert is never issued, and Tower's + # UI flips to "Failed - TLS cert renewal failing -739746d left" + # (Tower computes notAfter - now() against Go's zero-time when the + # Secret is missing, hence the bizarre negative-day display). + # Repro: havari-server03 onboarding 2026-05-10. Manual unblock was + # `kubectl apply --server-side --force-conflicts -f keda-crds.yaml`. + # This explicit true codifies that step into the chart so any + # newly-onboarded server gets CRDs on first sync. + # KEDA puts CRDs in templates/crds/ (not chart/crds/), so they + # need to flow through helm template — which they do once this is + # set. ArgoCD's App-level ServerSideApply=true sync option (set in + # backend/cmd/api/server_adapters.go:108) handles the 262 KiB + # annotation overflow these CRDs would otherwise hit. crds: - install: false + install: true # operator + adapter + webhook — keep CPU/RAM modest. KEDA polls # event sources every pollingInterval (default 30s); on a cluster # with no ScaledObjects it does no work. @@ -379,30 +382,12 @@ kedaHttpAddon: # buffers each cold-start request until the target pod is Ready. # The scaler is the control loop watching HTTPScaledObject status. keda-add-ons-http: - # HTTP add-on CRD (HTTPScaledObject) also pre-installed out-of-band - # for symmetry with the KEDA core CRDs above. The HTTP add-on chart - # itself doesn't have the annotation-size issue (its single CRD is - # small), but disabling chart-managed install keeps the operational - # contract uniform: 'CRDs are bootstrap, controllers are chart'. + # Same pattern as keda.crds.install above — explicit true so the + # HTTPScaledObject CRD lands on every fresh server. Without it, the + # interceptor never gets its watch table synced (logs: "table has + # not synced") and Studio cold-starts hang at "Connecting…". crds: - install: false - # kube-rbac-proxy sidecar — upstream HTTP add-on 0.8.0 references - # gcr.io/kubebuilder/kube-rbac-proxy:v0.13.0 which was retired from - # gcr.io. Override to a current image mirrored to our registry - # (multi-arch preserved via crane copy from quay.io/brancz upstream). - # Without this override the controller-manager pod ImagePullBackOffs - # forever and HTTPScaledObjects never reconcile. - images: - kubeRbacProxy: - name: registry.odoosky.cloud/odoosky/docker-mirror/kube-rbac-proxy - tag: v0.18.0 - # imagePullSecrets — required so the kube-rbac-proxy sidecar - # (mirrored at registry.odoosky.cloud) can pull. The operator - # container itself pulls from ghcr.io which needs no auth, but - # k8s applies imagePullSecrets per-pod (covers all containers). - operator: - imagePullSecrets: - - name: docker-mirror-pull + install: true interceptor: replicas: # Scale the interceptor itself with HPA on its own metrics —