{{- if .Values.backups.enabled -}}
# Daily dump → S3.
#
# Architecture: pg_dump in postgres:alpine, pipe through gzip, then
# `aws s3 cp -` to push the stream straight to MEGA S4. We use a
# single multi-stage shell command (no init container) so the dump
# never lands on the customer-server's local disk — the instance
# data and the backup destination are deliberately separated.
#
# AWS credentials come from a K8s Secret (default `s3-backup-creds`)
# provisioned out-of-band by Tower's bootstrap. Endpoint + bucket +
# prefix are committed in this file's values; only the access/secret
# pair lives in the Secret.
apiVersion: batch/v1
kind: CronJob
metadata:
  name: {{ include "instance.fullname" . }}-backup
  labels:
    {{- include "instance.labels" . | nindent 4 }}
    odoosky.io/role: backup
spec:
  schedule: {{ .Values.backups.schedule | quote }}
  concurrencyPolicy: Forbid
  successfulJobsHistoryLimit: 5
  failedJobsHistoryLimit: 3
  jobTemplate:
    metadata:
      labels:
        {{- include "instance.labels" . | nindent 8 }}
        odoosky.io/role: backup
    spec:
      backoffLimit: 1
      template:
        metadata:
          labels:
            {{- include "instance.labels" . | nindent 12 }}
            odoosky.io/role: backup
        spec:
          restartPolicy: Never
          # Land on the same node as the running Odoo pod so the backup
          # container can mount the filestore PVC. The PVC is RWO,
          # which K8s reads as "one pod per node" — multiple pods on
          # the SAME node can mount the same volume simultaneously,
          # so this is safe and gives the backup direct read access
          # to /var/lib/odoo without disturbing Odoo.
          affinity:
            podAffinity:
              requiredDuringSchedulingIgnoredDuringExecution:
                - labelSelector:
                    matchLabels:
                      app.kubernetes.io/instance: {{ .Values.instance.code | quote }}
                      odoosky.io/role: odoo
                  topologyKey: kubernetes.io/hostname
          volumes:
            - name: filestore
              persistentVolumeClaim:
                claimName: {{ include "instance.fullname" . }}-odoo
          containers:
            - name: pgdump-s3
              # postgres:16-alpine + `apk add aws-cli` — alpine's
              # aws-cli package is ~30 MB and adds ~5 s to the first
              # job run on each node. Subsequent runs reuse the
              # already-installed binary because we keep the same
              # image (containerd's layer cache covers the apk index
              # download). This matches the postgres version of the
              # cluster's actual database container, so pg_dump's
              # client/server protocol always lines up.
              image: "{{ .Values.postgres.image }}:{{ .Values.postgres.tag }}"
              imagePullPolicy: IfNotPresent
              volumeMounts:
                - name: filestore
                  mountPath: /var/lib/odoo
                  readOnly: true
              env:
                - name: PGHOST
                  value: {{ include "instance.fullname" . }}-pg
                - name: PGUSER
                  valueFrom:
                    secretKeyRef:
                      name: {{ include "instance.fullname" . }}-pg
                      key: POSTGRES_USER
                - name: PGPASSWORD
                  valueFrom:
                    secretKeyRef:
                      name: {{ include "instance.fullname" . }}-pg
                      key: POSTGRES_PASSWORD
                # PGDATABASE is the instance's real Odoo database, named
                # after the release. The secret's POSTGRES_DB key points
                # at the system DB Postgres init creates ("postgres"),
                # which is empty — using it produced 374-byte useless
                # dumps. Hardcode the release name so backups capture
                # the real data regardless of chart-secret defaults.
                - name: PGDATABASE
                  value: {{ .Release.Name | quote }}
                - name: AWS_ACCESS_KEY_ID
                  valueFrom:
                    secretKeyRef:
                      name: {{ .Values.backups.credentialsSecret }}
                      key: AWS_ACCESS_KEY_ID
                - name: AWS_SECRET_ACCESS_KEY
                  valueFrom:
                    secretKeyRef:
                      name: {{ .Values.backups.credentialsSecret }}
                      key: AWS_SECRET_ACCESS_KEY
                - name: S3_ENDPOINT
                  value: {{ .Values.backups.s3.endpoint | quote }}
                - name: AWS_DEFAULT_REGION
                  value: {{ .Values.backups.s3.region | quote }}
                - name: S3_BUCKET
                  value: {{ .Values.backups.s3.bucket | quote }}
                # S3_PREFIX -- the instance's durable identity. Prefer
                # instance.id (`<code>-<shortid>`, unique per lifetime,
                # written by Tower at create + backfill); fall back to
                # the bare code for overlays not yet carrying an id
                # (transition window). Once every overlay has an id the
                # fallback is dead and can be dropped.
                - name: S3_PREFIX
                  value: {{ .Values.instance.id | default .Values.instance.code | quote }}
                - name: RETAIN
                  value: {{ .Values.backups.retain | quote }}
              command:
                - /bin/sh
                - -c
                - |
                  # `pipefail` is critical: without it, a failed
                  # pg_dump piped into `aws s3 cp` would still produce
                  # a 0-exit "successful" job (the cp succeeded
                  # uploading empty/garbage data). With pipefail any
                  # element of the pipe failing fails the whole thing.
                  set -euo pipefail
                  TS=$(date -u +%Y%m%dT%H%M%SZ)
                  SQL_KEY="${S3_PREFIX}/${TS}.sql.gz"
                  FS_KEY="${S3_PREFIX}/${TS}.filestore.tar.gz"
                  if ! command -v aws >/dev/null 2>&1; then
                    apk add --no-cache aws-cli tar >/dev/null
                  fi
                  echo ">>> dumping DB to s3://${S3_BUCKET}/${SQL_KEY}"
                  pg_dump --format=plain --clean --if-exists --no-owner --no-acl \
                    | gzip -9 \
                    | aws --endpoint-url "$S3_ENDPOINT" s3 cp - "s3://${S3_BUCKET}/${SQL_KEY}"
                  echo ">>> archiving filestore to s3://${S3_BUCKET}/${FS_KEY}"
                  # Tar the filestore tree from /var/lib/odoo. If the
                  # dir is empty (fresh instance) we still upload an
                  # empty tar so the snapshot is paired — restore code
                  # treats absent filestore object as "no filestore
                  # captured for this snapshot" (older backups).
                  if [ -d /var/lib/odoo ] && [ -n "$(ls -A /var/lib/odoo 2>/dev/null)" ]; then
                    # Exclude ./sessions — HTTP session cache,
                    # unique per cookie, expires on Odoo restart
                    # anyway. Including it bloats the archive and
                    # slows backups by minutes on busy instances.
                    tar -czf - --exclude=./sessions -C /var/lib/odoo . \
                      | aws --endpoint-url "$S3_ENDPOINT" s3 cp - "s3://${S3_BUCKET}/${FS_KEY}"
                  else
                    echo "(filestore empty; skipping archive)"
                  fi
                  echo ">>> rotating: keep last $RETAIN snapshots under ${S3_PREFIX}/"
                  # Rotation must scope to the TOP-LEVEL dated backup
                  # files only — never touch checkpoints/, exports/,
                  # or blobs/ subdirectories. The previous version
                  # listed everything recursively under ${S3_PREFIX}/
                  # and grep'd for *.sql.gz, which matched
                  # checkpoints/<id>/db.sql.gz too — those sort
                  # ahead of the bare dated keys in reverse-alpha
                  # ('c' > '2'), so legitimate top-level backups
                  # routinely fell out of the keep-window AND
                  # checkpoint safety copies got deleted as a side
                  # effect. Use --delimiter / so list-objects-v2
                  # returns ONLY direct children of the prefix; the
                  # subdirectory keys come back as CommonPrefixes
                  # (which we discard).
                  #
                  # Both .sql.gz and .filestore.tar.gz share the
                  # same timestamp stem, so prune-by-stem keeps
                  # pairs aligned.
                  aws --endpoint-url "$S3_ENDPOINT" s3api list-objects-v2 \
                    --bucket "$S3_BUCKET" --prefix "${S3_PREFIX}/" \
                    --delimiter / \
                    --query 'Contents[].Key' --output text 2>/dev/null \
                    | tr '\t' '\n' \
                    | grep -E "^${S3_PREFIX}/[0-9]{8}T[0-9]{6}Z\.sql\.gz$" \
                    | sort -r | tail -n +$((RETAIN + 1)) \
                    | while read OLDSQL; do
                        [ -n "$OLDSQL" ] || continue
                        OLDFS="${OLDSQL%.sql.gz}.filestore.tar.gz"
                        echo ">>> deleting: $OLDSQL + $OLDFS (if present)"
                        aws --endpoint-url "$S3_ENDPOINT" s3 rm "s3://${S3_BUCKET}/${OLDSQL}" || true
                        aws --endpoint-url "$S3_ENDPOINT" s3 rm "s3://${S3_BUCKET}/${OLDFS}" 2>/dev/null || true
                      done
                  echo ">>> done"
              resources:
                requests:
                  cpu: 100m
                  memory: 256Mi
                limits:
                  cpu: "1"
                  memory: 1Gi
{{- end }}