Revert "fix: chart 0.7.5 — raise KEDA interceptor cold-start timeouts (waitTimeout 180s, responseHeaderTimeout 30s)"
This reverts commit f64c374eda.
This commit is contained in:
10
Chart.yaml
10
Chart.yaml
@@ -23,14 +23,14 @@ description: |
|
|||||||
Git).
|
Git).
|
||||||
|
|
||||||
type: application
|
type: application
|
||||||
version: 0.7.5
|
version: 0.7.7
|
||||||
appVersion: "0.7.5"
|
appVersion: "0.7.7"
|
||||||
|
|
||||||
# All 6 subcharts now resolve from registry.odoosky.cloud (mirrored
|
# All 6 subcharts now resolve from registry.odoosky.cloud (mirrored
|
||||||
# 2026-05-08, KEDA stack added 2026-05-09). Mirror-first discipline
|
# 2026-05-08, KEDA stack added 2026-05-09). Mirror-first discipline
|
||||||
# for regional / air-gapped readiness: a Jetstack / Traefik /
|
# + China-region readiness: a Jetstack / Traefik / Longhorn /
|
||||||
# Longhorn / external-secrets-io / KEDA outage no longer blocks new
|
# external-secrets-io / KEDA outage no longer blocks new tenant
|
||||||
# tenant cluster bootstrap.
|
# cluster bootstrap.
|
||||||
#
|
#
|
||||||
# Original upstream sources (for re-sync if a chart bumps):
|
# Original upstream sources (for re-sync if a chart bumps):
|
||||||
# cert-manager → https://charts.jetstack.io
|
# cert-manager → https://charts.jetstack.io
|
||||||
|
|||||||
81
values.yaml
81
values.yaml
@@ -148,6 +148,15 @@ traefik:
|
|||||||
# every TCP-80 request bounce to TCP-443 with a 301 — applies
|
# every TCP-80 request bounce to TCP-443 with a 301 — applies
|
||||||
# uniformly to all IngressRoutes on this cluster, no per-instance
|
# uniformly to all IngressRoutes on this cluster, no per-instance
|
||||||
# Middleware or duplicate IngressRoute needed.
|
# Middleware or duplicate IngressRoute needed.
|
||||||
|
# KEDA HTTP add-on routing — Studio chart's IngressRoute lives in
|
||||||
|
# the per-instance tenants namespace but its backend Service lives
|
||||||
|
# in odoosky-system (where KEDA was installed by this chart in 0.7.4).
|
||||||
|
# Without allowCrossNamespace=true, Traefik silently returns 404 on
|
||||||
|
# the cross-ns reference. Enabling here unblocks every Studio across
|
||||||
|
# every tenant on every cluster — single platform-wide setting.
|
||||||
|
providers:
|
||||||
|
kubernetesCRD:
|
||||||
|
allowCrossNamespace: true
|
||||||
ports:
|
ports:
|
||||||
web:
|
web:
|
||||||
redirectTo:
|
redirectTo:
|
||||||
@@ -326,6 +335,35 @@ external-secrets:
|
|||||||
# Subchart values pass through under the dep name (`keda:`) below.
|
# Subchart values pass through under the dep name (`keda:`) below.
|
||||||
keda:
|
keda:
|
||||||
enabled: true
|
enabled: true
|
||||||
|
# crds.install — explicit true. KEDA's chart default is true, but
|
||||||
|
# somewhere in the parent-subchart values merge our keda subchart
|
||||||
|
# was resolving crds.install=false on a fresh render — leaving CRDs
|
||||||
|
# absent. Without ScaledObject CRD installed, the operator pod
|
||||||
|
# crashloops at startup ("failed to wait for scaledobject caches
|
||||||
|
# to sync") and Argo's apply of the keda-add-ons-http interceptor's
|
||||||
|
# ScaledObject fails ("no matches for kind"). The whole platform
|
||||||
|
# sync stalls there, the wildcard cert is never issued, and Tower's
|
||||||
|
# UI flips to "Failed - TLS cert renewal failing -739746d left"
|
||||||
|
# (Tower computes notAfter - now() against Go's zero-time when the
|
||||||
|
# Secret is missing, hence the bizarre negative-day display).
|
||||||
|
# Repro: havari-server03 onboarding 2026-05-10. Manual unblock was
|
||||||
|
# `kubectl apply --server-side --force-conflicts -f keda-crds.yaml`.
|
||||||
|
# This explicit true codifies that step into the chart so any
|
||||||
|
# newly-onboarded server gets CRDs on first sync.
|
||||||
|
# KEDA puts CRDs in templates/crds/ (not chart/crds/), so they
|
||||||
|
# need to flow through helm template — which they do once this is
|
||||||
|
# set. ArgoCD's App-level ServerSideApply=true sync option SHOULD
|
||||||
|
# cover the 262 KiB annotation overflow these CRDs would hit with
|
||||||
|
# client-side apply (scaledjobs + scaledobjects each blow the limit
|
||||||
|
# because their schemas are huge), but ArgoCD's App-level setting
|
||||||
|
# falls back to client-side for resources that already exist with
|
||||||
|
# a different field-manager. Belt-and-suspenders: also annotate
|
||||||
|
# each CRD per-resource so the SSA path is taken regardless of
|
||||||
|
# who owned the field-manager first.
|
||||||
|
crds:
|
||||||
|
install: true
|
||||||
|
additionalAnnotations:
|
||||||
|
argocd.argoproj.io/sync-options: ServerSideApply=true
|
||||||
# operator + adapter + webhook — keep CPU/RAM modest. KEDA polls
|
# operator + adapter + webhook — keep CPU/RAM modest. KEDA polls
|
||||||
# event sources every pollingInterval (default 30s); on a cluster
|
# event sources every pollingInterval (default 30s); on a cluster
|
||||||
# with no ScaledObjects it does no work.
|
# with no ScaledObjects it does no work.
|
||||||
@@ -371,6 +409,29 @@ kedaHttpAddon:
|
|||||||
# buffers each cold-start request until the target pod is Ready.
|
# buffers each cold-start request until the target pod is Ready.
|
||||||
# The scaler is the control loop watching HTTPScaledObject status.
|
# The scaler is the control loop watching HTTPScaledObject status.
|
||||||
keda-add-ons-http:
|
keda-add-ons-http:
|
||||||
|
# kube-rbac-proxy sidecar — upstream HTTP add-on 0.8.0 references
|
||||||
|
# gcr.io/kubebuilder/kube-rbac-proxy:v0.13.0 which was retired from
|
||||||
|
# gcr.io. Override to a current image mirrored to our registry
|
||||||
|
# (multi-arch preserved via crane copy from quay.io/brancz upstream).
|
||||||
|
# Without this override the controller-manager pod ImagePullBackOffs
|
||||||
|
# forever and HTTPScaledObjects never reconcile.
|
||||||
|
images:
|
||||||
|
kubeRbacProxy:
|
||||||
|
name: registry.odoosky.cloud/odoosky/docker-mirror/kube-rbac-proxy
|
||||||
|
tag: v0.18.0
|
||||||
|
# imagePullSecrets — required so the kube-rbac-proxy sidecar
|
||||||
|
# (mirrored at registry.odoosky.cloud) can pull. The operator
|
||||||
|
# container itself pulls from ghcr.io which needs no auth, but
|
||||||
|
# k8s applies imagePullSecrets per-pod (covers all containers).
|
||||||
|
operator:
|
||||||
|
imagePullSecrets:
|
||||||
|
- name: docker-mirror-pull
|
||||||
|
# Same pattern as keda.crds.install above — explicit true so the
|
||||||
|
# HTTPScaledObject CRD lands on every fresh server. Without it, the
|
||||||
|
# interceptor never gets its watch table synced (logs: "table has
|
||||||
|
# not synced") and Studio cold-starts hang at "Connecting…".
|
||||||
|
crds:
|
||||||
|
install: true
|
||||||
interceptor:
|
interceptor:
|
||||||
replicas:
|
replicas:
|
||||||
# Scale the interceptor itself with HPA on its own metrics —
|
# Scale the interceptor itself with HPA on its own metrics —
|
||||||
@@ -380,26 +441,6 @@ keda-add-ons-http:
|
|||||||
# handles bursts above that.
|
# handles bursts above that.
|
||||||
min: 1
|
min: 1
|
||||||
max: 3
|
max: 3
|
||||||
# waitTimeout (→ KEDA_CONDITION_WAIT_TIMEOUT) — how long the
|
|
||||||
# interceptor holds a request waiting for the target Deployment
|
|
||||||
# to scale 0→1 and become Ready. Upstream default is 20s; an AI
|
|
||||||
# Studio cold-start is image pull (~33s for the ~900 MB
|
|
||||||
# opencode-odoo image) + app boot (~30-60s) ≈ 60-90s, so 20s
|
|
||||||
# 502s the operator's first click ("context deadline exceeded
|
|
||||||
# while waiting for workload reach > 0 replicas"). 180s lets the
|
|
||||||
# first click WAIT through the cold-start instead of erroring;
|
|
||||||
# a warm pod is unaffected (this only caps the 0→1 wait).
|
|
||||||
# NOTE: the studio-template-v3 chart's per-HSO `spec.timeouts`
|
|
||||||
# block does NOT work — HTTPScaledObject CRD 0.8.0 has no such
|
|
||||||
# field; interceptor timeouts are GLOBAL, set here.
|
|
||||||
waitTimeout: "180s"
|
|
||||||
# responseHeaderTimeout (→ KEDA_RESPONSE_HEADER_TIMEOUT) — how
|
|
||||||
# long the interceptor waits for response headers after forwarding
|
|
||||||
# to a warm backend. Upstream default 500ms is far too tight for a
|
|
||||||
# just-woken Studio's first route load (OpenHands boots its
|
|
||||||
# browser env lazily); 30s is generous headroom without masking a
|
|
||||||
# genuinely hung backend.
|
|
||||||
responseHeaderTimeout: "30s"
|
|
||||||
resources:
|
resources:
|
||||||
requests:
|
requests:
|
||||||
cpu: 50m
|
cpu: 50m
|
||||||
|
|||||||
Reference in New Issue
Block a user