Files
cluster-platform-v3/values.yaml

162 lines
6.4 KiB
YAML

# cluster-platform-v3 — defaults.
#
# Most knobs you'd flip live here so customer-cluster overlays can
# tune sizing without forking the chart.
namespace: odoosky-system
# cluster — per-cluster identity passed by Tower as helm.values on each
# per-cluster Application. The chart uses cluster.name to add a
# differentiator SAN to the tenants-wildcard Certificate so Lets
# Encrypts duplicate-cert rate limit doesnt collide across one
# tenants multiple clusters.
cluster:
name: ""
# tenant — per-tenant identity injected by Tower as helm.values on
# the per-cluster Argo Application. Empty defaults are safe to lint
# but a real deploy MUST set domain + wildcardHost (the Certificate
# template fails with `required` on an empty value).
tenant:
# Domain the Cloudflare zone covers, e.g. "acme-erp.com".
domain: ""
# Wildcard hostname the cluster-wide tenants-wildcard cert covers,
# e.g. "*.tenants.acme-erp.com". Every tenant instance Ingress
# references the resulting Secret (`tenants-wildcard-tls` in the
# `tenants` namespace) by name.
wildcardHost: ""
# acme — Let's Encrypt registration. Operator email is per-platform,
# not per-tenant.
acme:
email: m@havari.me
server: https://acme-v02.api.letsencrypt.org/directory
# certManager — gate for the conditional in Chart.yaml dependencies.
# Helm reads this for the `condition: certManager.enabled` flag only;
# the actual subchart values live below under the dep name `cert-manager`.
certManager:
enabled: true
# cert-manager — values passed THROUGH to the upstream jetstack subchart
# (Chart.yaml dependency name = "cert-manager"). Subchart values must
# nest under the dep name, not under our top-level `certManager` alias —
# putting them under `certManager:` does nothing.
#
# crds.enabled — install the cert-manager CRDs in the same release. The
# v1.14+ jetstack chart renamed `installCRDs` to `crds.enabled`; the
# old key is silently ignored, leaving the CRDs absent and any
# Certificate / ClusterIssuer manifest failing with "no matches for kind".
# crds.keep — leave CRDs in place if the chart is uninstalled. Safer for
# disconnect flows where the customer might re-add the cluster later.
cert-manager:
crds:
enabled: false
keep: false # ignored when enabled=false
# traefik — upstream chart. LoadBalancer Service so the customer's
# k3s servicelb maps :80/:443 to the host. Tower currently doesn't
# rely on Traefik's IngressRoute features here; instances are on
# their own per-tenant Traefik later. This Traefik gives the cluster
# a default ingress for the registry + future platform endpoints.
traefik:
enabled: true
service:
type: LoadBalancer
# secrets — Tower applies these out-of-band via the registered
# kubeconfig at Connect time (B2). The chart references them by
# name only; values never enter Git.
secrets:
cloudflareTokenSecret:
namespace: odoosky-system
name: cloudflare-api-token
key: api-token
s3CredentialsSecret:
namespace: tenants
name: s3-backup-creds
registry:
enabled: true
image:
repository: registry
tag: "2.8"
pullPolicy: IfNotPresent
# ClusterIP service hostname:
# registry.odoosky-system.svc.cluster.local:5000
# Used internally by build Jobs (push) and the Odoo Deployment's
# image volumes (pull). Plain HTTP — the registry never sees
# off-cluster traffic; node-side k3s registries.yaml whitelists
# the hostname for HTTP image pulls.
service:
port: 5000
# NodePort the kubelet on each node uses to reach the registry
# (via the host-side 127.0.0.1:<nodePort> mirror entry in
# /etc/rancher/k3s/registries.yaml). Picked outside the default
# 30000-32767 NodePort range's busy zone; change if the cluster
# already uses 30500 for something else.
nodePort: 30500
# Storage. The registry survives node restarts but is recreatable —
# if the PVC is wiped, Tower's ensureAddonImage will rebuild any
# missing images from Gitea source on demand. So we don't need a
# large or replicated PV here.
persistence:
enabled: false
size: 10Gi
storageClass: "" # "" = use the cluster's default; on k3s that's local-path
resources:
requests:
cpu: 50m
memory: 64Mi
limits:
cpu: 500m
memory: 256Mi
# longhorn — CSI block storage. See ADR 0003 (odooskyv3 monorepo) for
# the full design. Phase 1 (this commit): declared but disabled.
# Per-server enablement happens via the per-cluster Argo Application's
# helm.parameters (set `longhorn.enabled=true`).
#
# Host prerequisites (already satisfied on bootstrap.sh-Connect'd
# servers): `open-iscsi` package + `iscsi_tcp` kernel module +
# `iscsid` service. Servers provisioned out-of-band must run
# `apt-get install -y open-iscsi && modprobe iscsi_tcp &&
# systemctl enable --now iscsid` before flipping enabled=true.
#
# When `longhorn.enabled=true`, the chart additionally renders:
# - StorageClass `longhorn-tenants` (replicaCount = .replicas)
# - VolumeSnapshotClass `longhorn-snapshot-class` for the future
# VolumeClone Refresh ↓ path
# Existing instances on `local-path` are unaffected — Longhorn
# co-exists, doesn't replace local-path.
longhorn:
enabled: false
# Replicas per Longhorn volume. Standard tier (single server) =
# 1 — durability story is hourly S3 backup, not local replicas.
# HA-Active sets this to 2 across the cluster's worker nodes.
replicas: 1
# Default data path. k3s nodes get `/var/lib/longhorn` by default;
# production servers may want this on a separate disk for IOPS
# isolation from the OS root volume.
defaultDataPath: /var/lib/longhorn
# S3 backup target for Longhorn's own block-level backups (DR
# layer alongside Tower's application-level pg_dump path). When
# set, Longhorn writes block-incremental backups to this prefix
# daily. Empty = block-level backup disabled, application backup
# only.
backupTarget: ""
# Same S3 secret Tower's application backup already uses.
backupCredsSecret:
namespace: tenants
name: s3-backup-creds
# Disable the Helm pre-upgrade checker Job. It's annotated as a
# `helm.sh/hook: pre-upgrade,pre-install` which Argo translates to
# PreSync — but the Job's ServiceAccount lives in the regular sync
# phase, so the Job fails ("ServiceAccount not found") before the
# SA gets created. Argo's sync model already gives us proper
# ordering on regular resources; the safety check is redundant.
preUpgradeChecker:
jobEnabled: false
upgradeVersionCheck: false