instance-template-v3/templates/backup-cronjob.yaml

{{- if .Values.backups.enabled -}}
# Daily dump → S3.
#
# Architecture: pg_dump in postgres:alpine, pipe through gzip, then
# `aws s3 cp -` to push the stream straight to MEGA S4. We use a
# single multi-stage shell command (no init container) so the dump
# never lands on the customer-server's local disk — the instance
# data and the backup destination are deliberately separated.
#
# AWS credentials come from a K8s Secret (default `s3-backup-creds`)
# provisioned out-of-band by Tower's bootstrap. Endpoint + bucket +
# prefix are committed in this file's values; only the access/secret
# pair lives in the Secret.
apiVersion: batch/v1
kind: CronJob
metadata:
  name: {{ include "instance.fullname" . }}-backup
  labels:
    {{- include "instance.labels" . | nindent 4 }}
    odoosky.io/role: backup
spec:
  schedule: {{ .Values.backups.schedule | quote }}
  concurrencyPolicy: Forbid
  successfulJobsHistoryLimit: 5
  failedJobsHistoryLimit: 3
  jobTemplate:
    metadata:
      labels:
        {{- include "instance.labels" . | nindent 8 }}
        odoosky.io/role: backup
    spec:
      backoffLimit: 1
      template:
        metadata:
          labels:
            {{- include "instance.labels" . | nindent 12 }}
            odoosky.io/role: backup
        spec:
          restartPolicy: Never
          # Land on the same node as the running Odoo pod so the backup
          # container can mount the filestore PVC. The PVC is RWO,
          # which K8s reads as "one pod per node" — multiple pods on
          # the SAME node can mount the same volume simultaneously,
          # so this is safe and gives the backup direct read access
          # to /var/lib/odoo without disturbing Odoo.
          affinity:
            podAffinity:
              requiredDuringSchedulingIgnoredDuringExecution:
                - labelSelector:
                    matchLabels:
                      app.kubernetes.io/instance: {{ .Values.instance.code | quote }}
                      odoosky.io/role: odoo
                  topologyKey: kubernetes.io/hostname
          volumes:
            - name: filestore
              persistentVolumeClaim:
                claimName: {{ include "instance.fullname" . }}-odoo
          containers:
            - name: pgdump-s3
              # postgres:16-alpine + `apk add aws-cli` — alpine's
              # aws-cli package is ~30 MB and adds ~5 s to the first
              # job run on each node. Subsequent runs reuse the
              # already-installed binary because we keep the same
              # image (containerd's layer cache covers the apk index
              # download). This matches the postgres version of the
              # cluster's actual database container, so pg_dump's
              # client/server protocol always lines up.
              image: "{{ .Values.postgres.image }}:{{ .Values.postgres.tag }}"
              imagePullPolicy: IfNotPresent
              volumeMounts:
                - name: filestore
                  mountPath: /var/lib/odoo
                  readOnly: true
              env:
                - name: PGHOST
                  value: {{ include "instance.fullname" . }}-pg
                - name: PGUSER
                  valueFrom:
                    secretKeyRef:
                      name: {{ include "instance.fullname" . }}-pg
                      key: POSTGRES_USER
                - name: PGPASSWORD
                  valueFrom:
                    secretKeyRef:
                      name: {{ include "instance.fullname" . }}-pg
                      key: POSTGRES_PASSWORD
                - name: PGDATABASE
                  valueFrom:
                    secretKeyRef:
                      name: {{ include "instance.fullname" . }}-pg
                      key: POSTGRES_DB
                - name: AWS_ACCESS_KEY_ID
                  valueFrom:
                    secretKeyRef:
                      name: {{ .Values.backups.credentialsSecret }}
                      key: AWS_ACCESS_KEY_ID
                - name: AWS_SECRET_ACCESS_KEY
                  valueFrom:
                    secretKeyRef:
                      name: {{ .Values.backups.credentialsSecret }}
                      key: AWS_SECRET_ACCESS_KEY
                - name: S3_ENDPOINT
                  value: {{ .Values.backups.s3.endpoint | quote }}
                - name: AWS_DEFAULT_REGION
                  value: {{ .Values.backups.s3.region | quote }}
                - name: S3_BUCKET
                  value: {{ .Values.backups.s3.bucket | quote }}
                - name: S3_PREFIX
                  value: {{ .Values.instance.code | quote }}
                - name: RETAIN
                  value: {{ .Values.backups.retain | quote }}
              command:
                - /bin/sh
                - -c
                - |
                  # `pipefail` is critical: without it, a failed
                  # pg_dump piped into `aws s3 cp` would still produce
                  # a 0-exit "successful" job (the cp succeeded
                  # uploading empty/garbage data). With pipefail any
                  # element of the pipe failing fails the whole thing.
                  set -euo pipefail
                  TS=$(date -u +%Y%m%dT%H%M%SZ)
                  SQL_KEY="${S3_PREFIX}/${TS}.sql.gz"
                  FS_KEY="${S3_PREFIX}/${TS}.filestore.tar.gz"
                  if ! command -v aws >/dev/null 2>&1; then
                    apk add --no-cache aws-cli tar >/dev/null
                  fi
                  echo ">>> dumping DB to s3://${S3_BUCKET}/${SQL_KEY}"
                  pg_dump --format=plain --clean --if-exists --no-owner --no-acl \
                    | gzip -9 \
                    | aws --endpoint-url "$S3_ENDPOINT" s3 cp - "s3://${S3_BUCKET}/${SQL_KEY}"
                  echo ">>> archiving filestore to s3://${S3_BUCKET}/${FS_KEY}"
                  # Tar the filestore tree from /var/lib/odoo. If the
                  # dir is empty (fresh instance) we still upload an
                  # empty tar so the snapshot is paired — restore code
                  # treats absent filestore object as "no filestore
                  # captured for this snapshot" (older backups).
                  if [ -d /var/lib/odoo ] && [ -n "$(ls -A /var/lib/odoo 2>/dev/null)" ]; then
                    tar -czf - -C /var/lib/odoo . \
                      | aws --endpoint-url "$S3_ENDPOINT" s3 cp - "s3://${S3_BUCKET}/${FS_KEY}"
                  else
                    echo "(filestore empty; skipping archive)"
                  fi
                  echo ">>> rotating: keep last $RETAIN snapshots under ${S3_PREFIX}/"
                  # Group keys by timestamp prefix (everything before
                  # the first dot after the date) and prune the oldest
                  # groups. Both .sql.gz and .filestore.tar.gz share
                  # the same timestamp prefix, so groups stay paired.
                  aws --endpoint-url "$S3_ENDPOINT" s3api list-objects-v2 \
                    --bucket "$S3_BUCKET" --prefix "${S3_PREFIX}/" \
                    --query 'Contents[].Key' --output text 2>/dev/null \
                    | tr '\t' '\n' \
                    | grep -E '\.sql\.gz$' \
                    | sort -r | tail -n +$((RETAIN + 1)) \
                    | while read OLDSQL; do
                        [ -n "$OLDSQL" ] || continue
                        OLDFS="${OLDSQL%.sql.gz}.filestore.tar.gz"
                        echo ">>> deleting: $OLDSQL + $OLDFS (if present)"
                        aws --endpoint-url "$S3_ENDPOINT" s3 rm "s3://${S3_BUCKET}/${OLDSQL}" || true
                        aws --endpoint-url "$S3_ENDPOINT" s3 rm "s3://${S3_BUCKET}/${OLDFS}" 2>/dev/null || true
                      done
                  echo ">>> done"
              resources:
                requests:
                  cpu: 100m
                  memory: 256Mi
                limits:
                  cpu: "1"
                  memory: 1Gi
{{- end }}