# cluster-platform-v3 — defaults.
#
# Most knobs you'd flip live here so customer-cluster overlays can
# tune sizing without forking the chart.

namespace: odoosky-system


# cluster — per-cluster identity passed by Tower as helm.values on each
# per-cluster Application. The chart uses cluster.name to add a
# differentiator SAN to the tenants-wildcard Certificate so Lets
# Encrypts duplicate-cert rate limit doesnt collide across one
# tenants multiple clusters.
cluster:
  name: ""

# tenant — per-tenant identity injected by Tower as helm.values on
# the per-cluster Argo Application. Empty defaults are safe to lint
# but a real deploy MUST set domain + wildcardHost (the Certificate
# template fails with `required` on an empty value).
tenant:
  # Domain the Cloudflare zone covers, e.g. "acme-erp.com".
  # Mirror of domains[primary].root — kept for legacy chart consumers.
  domain: ""
  # Wildcard hostname the cluster-wide tenants-wildcard cert covers,
  # e.g. "*.tenants.acme-erp.com". Mirror of domains[primary].wildcardHost.
  # Every tenant instance Ingress references the resulting Secret
  # (`tenants-wildcard-tls` in the `tenants` namespace) by name.
  wildcardHost: ""
  # domains — full multi-domain list (#320.C). Tower passes one entry
  # per domain the tenant has registered; the chart issues one
  # wildcard Certificate per VERIFIED entry. The primary entry
  # produces the canonical `tenants-wildcard-tls` Secret; non-primary
  # entries get `tenants-wildcard-<root-with-dots-as-dashes>-tls`.
  # Empty list = legacy single-domain mode (chart synthesizes one
  # entry from domain + wildcardHost above).
  #
  # Each entry shape:
  #   - root:         "acme.com"
  #   - wildcardHost: "*.tenants.acme.com"
  #   - primary:      true   # exactly one entry should be primary
  #   - verified:     true   # chart skips entries with verified=false
  domains: []

# acme — Let's Encrypt registration. Operator email is per-platform,
# not per-tenant.
acme:
  email: m@havari.me
  server: https://acme-v02.api.letsencrypt.org/directory

# certManager — gate for the conditional in Chart.yaml dependencies.
# Helm reads this for the `condition: certManager.enabled` flag only;
# the actual subchart values live below under the dep name `cert-manager`.
certManager:
  enabled: true

# cert-manager — values passed THROUGH to the upstream jetstack subchart
# (Chart.yaml dependency name = "cert-manager"). Subchart values must
# nest under the dep name, not under our top-level `certManager` alias —
# putting them under `certManager:` does nothing.
#
# crds.enabled — install the cert-manager CRDs in the same release. The
# v1.14+ jetstack chart renamed `installCRDs` to `crds.enabled`; the
# old key is silently ignored, leaving the CRDs absent and any
# Certificate / ClusterIssuer manifest failing with "no matches for kind".
# crds.keep — leave CRDs in place if the chart is uninstalled. Safer for
# disconnect flows where the customer might re-add the cluster later.
cert-manager:
  crds:
    enabled: false
    keep: false # ignored when enabled=false
  # startupapicheck — disabled (Slice 2B.1.2, 2026-05-04). The
  # subchart includes a Job that runs as a PostSync hook and tries
  # to verify cert-manager's API is responsive by issuing a test
  # cert through it. Two real costs once cert-manager is proven on
  # the platform:
  #   1. The Job's PostSync hook gates Argo's sync from completing.
  #      On every chart sync (not just install), Argo waits for the
  #      Job to succeed before flipping the App to Synced.
  #   2. When the wildcard Cert is in error (e.g. LE rate limit),
  #      the Job adds even more retry overhead — Argo loops forever.
  # We're not adopting cert-manager fresh — every connect ships the
  # same proven version, the install API surface is stable. The
  # check is dead-weight that masks the actual install timing.
  startupapicheck:
    enabled: false

# traefik — upstream chart. LoadBalancer Service so the customer's
# k3s servicelb maps :80/:443 to the host. Tower currently doesn't
# rely on Traefik's IngressRoute features here; instances are on
# their own per-tenant Traefik later. This Traefik gives the cluster
# a default ingress for the registry + future platform endpoints.
traefik:
  enabled: true
  service:
    type: LoadBalancer

# secrets — Tower applies these out-of-band via the registered
# kubeconfig at Connect time (B2). The chart references them by
# name only; values never enter Git.
secrets:
  cloudflareTokenSecret:
    namespace: odoosky-system
    name: cloudflare-api-token
    key: api-token
  s3CredentialsSecret:
    namespace: tenants
    name: s3-backup-creds

registry:
  enabled: true
  image:
    repository: registry
    tag: "2.8"
    pullPolicy: IfNotPresent
  # ClusterIP service hostname:
  #   registry.odoosky-system.svc.cluster.local:5000
  # Used internally by build Jobs (push) and the Odoo Deployment's
  # image volumes (pull). Plain HTTP — the registry never sees
  # off-cluster traffic; node-side k3s registries.yaml whitelists
  # the hostname for HTTP image pulls.
  service:
    port: 5000
    # NodePort the kubelet on each node uses to reach the registry
    # (via the host-side 127.0.0.1:<nodePort> mirror entry in
    # /etc/rancher/k3s/registries.yaml). Picked outside the default
    # 30000-32767 NodePort range's busy zone; change if the cluster
    # already uses 30500 for something else.
    nodePort: 30500
  # Storage. The registry survives node restarts but is recreatable —
  # if the PVC is wiped, Tower's ensureAddonImage will rebuild any
  # missing images from Gitea source on demand. So we don't need a
  # large or replicated PV here.
  persistence:
    enabled: false
    size: 10Gi
    storageClass: ""  # "" = use the cluster's default; on k3s that's local-path
  resources:
    requests:
      cpu: 50m
      memory: 64Mi
    limits:
      cpu: 500m
      memory: 256Mi

# longhorn — CSI block storage. See ADR 0003 (odooskyv3 monorepo) for
# the full design. Phase 1 (this commit): declared but disabled.
# Per-server enablement happens via the per-cluster Argo Application's
# helm.parameters (set `longhorn.enabled=true`).
#
# Host prerequisites (already satisfied on bootstrap.sh-Connect'd
# servers): `open-iscsi` package + `iscsi_tcp` kernel module +
# `iscsid` service. Servers provisioned out-of-band must run
# `apt-get install -y open-iscsi && modprobe iscsi_tcp &&
# systemctl enable --now iscsid` before flipping enabled=true.
#
# When `longhorn.enabled=true`, the chart additionally renders:
#   - StorageClass `longhorn-tenants` (replicaCount = .replicas)
#   - VolumeSnapshotClass `longhorn-snapshot-class` for the future
#     VolumeClone Refresh ↓ path
# Existing instances on `local-path` are unaffected — Longhorn
# co-exists, doesn't replace local-path.
# csiSnapshotter — vendored kubernetes-csi/external-snapshotter
# v8.1.0. Provides the standard `snapshot.storage.k8s.io/v1` CRDs
# + snapshot-controller. Required for Tower's CSI VolumeClone path
# (Refresh ↓ + spawn-env seed). See ADR 0003 phase 3.
#
# Only needed when Longhorn (or any other snapshot-capable CSI
# driver) is in use; default true so future server connects get the
# substrate ready out of the box.
csiSnapshotter:
  enabled: true

longhorn:
  enabled: false
  # Replicas per Longhorn volume. Standard tier (single server) =
  # 1 — durability story is async S3 backup, not local replicas.
  # HA-Active sets this to 2 across the cluster's worker nodes.
  replicas: 1
  # Phase 5 of ADR 0003 — Longhorn's own settings, passed straight
  # through to the subchart's `defaultSettings`. The two-layer design:
  #
  #   1. Local CoW snapshots (Longhorn `task: snapshot`) — instant,
  #      zero-blocking, hourly retention. Used for fast undo.
  #   2. Async S3 backup (Longhorn `task: backup`) — block-incremental
  #      upload to tenant's bucket, gradual, never blocks workflow.
  #      Daily retention. The DR layer alongside Tower's existing
  #      application-level pg_dump backup (which is for cross-cluster
  #      migration; Longhorn-S3 is for fast same-cluster restore).
  #
  # The RecurringJob CRDs that drive both layers live in
  # templates/longhorn-recurringjobs.yaml and bind to all volumes
  # via the `default` group automatically.
  defaultSettings:
    defaultDataPath: /var/lib/longhorn
    # backupTarget — set this per-server via the Argo App's helm
    # parameters to enable the async S3 backup channel. Format:
    # `s3://<bucket>@<region>/<prefix>/`. Empty = local snapshots
    # only (local layer still works; just no off-cluster copy).
    backupTarget: ""
    # backupTargetCredentialSecret — name of K8s Secret in the
    # `longhorn-system` namespace carrying AWS_ACCESS_KEY_ID +
    # AWS_SECRET_ACCESS_KEY. Operator kubectl-applies it once per
    # cluster (same pattern as cloudflare-api-token). Cross-namespace
    # Secret references aren't allowed by Longhorn.
    backupTargetCredentialSecret: ""
  # Disable the Helm pre-upgrade checker Job. It's annotated as a
  # `helm.sh/hook: pre-upgrade,pre-install` which Argo translates to
  # PreSync — but the Job's ServiceAccount lives in the regular sync
  # phase, so the Job fails ("ServiceAccount not found") before the
  # SA gets created. Argo's sync model already gives us proper
  # ordering on regular resources; the safety check is redundant.
  preUpgradeChecker:
    jobEnabled: false
    upgradeVersionCheck: false
  # Don't mark Longhorn's bundled StorageClass as cluster-default.
  # k3s ships local-path as default; we keep it that way. New
  # instances stay on local-path unless Tower explicitly stamps
  # storageClassName=longhorn-tenants on their PVCs (Phase 6 of
  # ADR 0003 will do that). Two `default` storage classes is a
  # k8s misconfig — silently picks one for unscoped PVCs.
  persistence:
    defaultClass: false
    defaultClassReplicaCount: 1