Backups: pg_dump → S3 (MEGA S4); drop local PVC

This commit is contained in:
git_admin
2026-04-26 21:17:34 +03:00
parent 24f879db01
commit 3a63da9609
3 changed files with 75 additions and 45 deletions

View File

@@ -1,10 +1,16 @@
{{- if .Values.backups.enabled -}}
# Daily dump job. Same image as the Postgres pod, so pg_dump is
# version-matched. Output goes to the dedicated backup PVC; the same
# job script prunes older dumps to honor `backups.retain`.
# Daily dump → S3.
#
# Tower's "Backup Now" feature creates a one-off Job from this same
# template at request time — see backend/cmd/api/backups.go.
# Architecture: pg_dump in postgres:alpine, pipe through gzip, then
# `aws s3 cp -` to push the stream straight to MEGA S4. We use a
# single multi-stage shell command (no init container) so the dump
# never lands on the customer-server's local disk — the instance
# data and the backup destination are deliberately separated.
#
# AWS credentials come from a K8s Secret (default `s3-backup-creds`)
# provisioned out-of-band by Tower's bootstrap. Endpoint + bucket +
# prefix are committed in this file's values; only the access/secret
# pair lives in the Secret.
apiVersion: batch/v1
kind: CronJob
metadata:
@@ -32,8 +38,11 @@ spec:
spec:
restartPolicy: Never
containers:
- name: pgdump
image: "{{ .Values.postgres.image }}:{{ .Values.postgres.tag }}"
- name: pgdump-s3
# Image carries both pg_dump (postgresql-client) and
# aws-cli. We build it from alpine + apk install on
# first run; for now bitnami's prebuilt covers both.
image: bitnami/postgresql:16
imagePullPolicy: IfNotPresent
env:
- name: PGHOST
@@ -53,29 +62,55 @@ spec:
secretKeyRef:
name: {{ include "instance.fullname" . }}-pg
key: POSTGRES_DB
- name: AWS_ACCESS_KEY_ID
valueFrom:
secretKeyRef:
name: {{ .Values.backups.credentialsSecret }}
key: AWS_ACCESS_KEY_ID
- name: AWS_SECRET_ACCESS_KEY
valueFrom:
secretKeyRef:
name: {{ .Values.backups.credentialsSecret }}
key: AWS_SECRET_ACCESS_KEY
- name: S3_ENDPOINT
value: {{ .Values.backups.s3.endpoint | quote }}
- name: AWS_DEFAULT_REGION
value: {{ .Values.backups.s3.region | quote }}
- name: S3_BUCKET
value: {{ .Values.backups.s3.bucket | quote }}
- name: S3_PREFIX
value: {{ .Values.instance.code | quote }}
- name: RETAIN
value: {{ .Values.backups.retain | quote }}
command:
- /bin/sh
- /bin/bash
- -c
- |
set -e
set -euo pipefail
TS=$(date -u +%Y%m%dT%H%M%SZ)
OUT=/backups/${TS}.sql.gz
mkdir -p /backups
echo ">>> pg_dump → $OUT"
KEY="${S3_PREFIX}/${TS}.sql.gz"
echo ">>> dumping to s3://${S3_BUCKET}/${KEY}"
# Install aws-cli on first run. bitnami/postgresql is
# debian-based so apt is available and fast.
if ! command -v aws >/dev/null 2>&1; then
apt-get update -qq && apt-get install -y -qq awscli >/dev/null
fi
pg_dump --format=plain --clean --if-exists --no-owner --no-acl \
| gzip -9 > "$OUT"
echo ">>> wrote $(du -h "$OUT" | cut -f1)"
# Rotate: keep only the newest $RETAIN dumps.
cd /backups
ls -1t *.sql.gz 2>/dev/null \
| awk -v n=$RETAIN 'NR > n' \
| xargs -r rm -v
ls -lh /backups
volumeMounts:
- name: backups
mountPath: /backups
| gzip -9 \
| aws --endpoint-url "$S3_ENDPOINT" s3 cp - "s3://${S3_BUCKET}/${KEY}"
echo ">>> uploaded"
echo ">>> rotating: keep last $RETAIN under ${S3_PREFIX}/"
# List, sort newest-first, drop the top N, delete the rest.
aws --endpoint-url "$S3_ENDPOINT" s3api list-objects-v2 \
--bucket "$S3_BUCKET" --prefix "${S3_PREFIX}/" \
--query 'Contents[].Key' --output text 2>/dev/null \
| tr '\t' '\n' | sort -r | tail -n +$((RETAIN + 1)) \
| while read OLDKEY; do
[ -n "$OLDKEY" ] || continue
echo ">>> deleting old: $OLDKEY"
aws --endpoint-url "$S3_ENDPOINT" s3 rm "s3://${S3_BUCKET}/${OLDKEY}"
done
echo ">>> done"
resources:
requests:
cpu: 100m
@@ -83,8 +118,4 @@ spec:
limits:
cpu: "1"
memory: 1Gi
volumes:
- name: backups
persistentVolumeClaim:
claimName: {{ include "instance.fullname" . }}-backups
{{- end }}

View File

@@ -1,14 +0,0 @@
{{- if .Values.backups.enabled -}}
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: {{ include "instance.fullname" . }}-backups
labels:
{{- include "instance.labels" . | nindent 4 }}
odoosky.io/role: backups
spec:
accessModes: [ReadWriteOnce]
resources:
requests:
storage: {{ .Values.backups.storage | quote }}
{{- end }}

View File

@@ -78,11 +78,24 @@ backups:
# Cron schedule for the automatic backup job. Default 03:00 UTC
# daily — quiet hour for most timezones, non-business in EU/US/AS.
schedule: "0 3 * * *"
# PVC size for retained dumps. Holds ~7 days of dumps for a small
# instance; scale up via overlay if the instance has a large DB.
storage: 10Gi
# How many dumps to retain. Older ones are pruned by the same Job.
# How many dumps to retain in S3. The backup job prunes older
# objects matching the instance's prefix on every successful run.
retain: 7
# S3-compatible destination. The endpoint + region + bucket are
# NON-secret and live in this committed values.yaml; the AWS
# credentials live in a K8s Secret named by `credentialsSecret`,
# provisioned out-of-band by Tower's bootstrap script (which reads
# from OpenBao). The chart never sees access/secret keys directly.
s3:
endpoint: https://s3.eu-central-1.s4.mega.io
region: eu-central-1
bucket: odoosky-v3-backups
# Per-instance S3 key prefix. Each instance writes under its own
# code/ subdirectory inside the shared bucket.
prefix: "{{ .Values.instance.code }}"
# Name of the K8s Secret holding AWS_ACCESS_KEY_ID +
# AWS_SECRET_ACCESS_KEY. Mounted via envFrom on the backup Job.
credentialsSecret: s3-backup-creds
ingress:
# Traefik entrypoint name (set on the Traefik install in the