zuzu-system-backup/zuzu-system-backup.py
2025-12-06 09:25:05 -05:00

417 lines
12 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
zuzu-system-backup
Snapshot + mirror backups from Arch hosts to NAS over SSH/rsync.
- Snapshot sources:
INCLUDE_PATHS
USER_INCLUDE_DIRS
USER_INCLUDE_FILES
-> rsyncd into a local temp tree per category
-> compressed locally (tar + optional zstd)
-> archives uploaded to NAS snapshot dir
- Single-copy mirrors:
SINGLE_COPY_MAPPINGS
-> rsync directly to NAS with --delete
"""
import os
import shlex
import shutil
import subprocess
import sys
import tempfile
from datetime import datetime
from pathlib import Path
import yaml
BASE_DIR = Path(__file__).resolve().parent
CONFIG_PATH = BASE_DIR / "backup.yaml"
SNAPSHOT_FMT = "%Y-%m-%d_%H-%M-%S"
# ---------------------------------------------------------------------------
# Load config (backup.yaml)
# ---------------------------------------------------------------------------
def _fatal(msg: str) -> None:
print(f"[FATAL] {msg}", file=sys.stderr)
sys.exit(1)
try:
with CONFIG_PATH.open("r", encoding="utf-8") as f:
cfg = yaml.safe_load(f) or {}
except FileNotFoundError:
_fatal(f"Config file not found: {CONFIG_PATH}")
remote_cfg = cfg.get("remote", {})
SSH_USER = remote_cfg.get("user")
SSH_HOST = remote_cfg.get("host")
SSH_PORT = int(remote_cfg.get("port", 22))
SSH_KEY = remote_cfg.get("key")
REMOTE_BASE = remote_cfg.get("base")
REMOTE_HOST_DIR = remote_cfg.get("host_dir")
if not all([SSH_USER, SSH_HOST, SSH_KEY, REMOTE_BASE, REMOTE_HOST_DIR]):
_fatal("remote.{user,host,key,base,host_dir} must all be set in backup.yaml")
# retention
RETENTION_DAYS = int(cfg.get("retention", {}).get("snapshots", 7))
# compression
compression_cfg = cfg.get("compression", {})
COMPRESSION_MODE = (compression_cfg.get("mode") or "high").lower()
if COMPRESSION_MODE not in ("high", "light", "none"):
COMPRESSION_MODE = "high"
COMPRESSION_PATH = compression_cfg.get("path")
# rsync
RSYNC_EXTRA_OPTS = cfg.get("rsync", {}).get("extra_opts", [])
user_cfg = cfg.get("user", {})
USER_HOME = user_cfg.get("home")
def expand_user_path(p: str) -> str:
if not p:
return p
if p.startswith("/"):
return p
if USER_HOME:
return str(Path(USER_HOME) / p)
return p
def expand_home_var(s: str) -> str:
if not isinstance(s, str):
return s
if USER_HOME:
return (
s.replace("${USER_HOME}", USER_HOME)
.replace("${HOME}", USER_HOME)
)
return s
# System trees
INCLUDE_PATHS = cfg.get("system", {}).get("include_paths", [])
# User trees
USER_INCLUDE_DIRS = [expand_user_path(p) for p in user_cfg.get("include_dirs", [])]
USER_INCLUDE_FILES = [expand_user_path(p) for p in user_cfg.get("include_files", [])]
# Exclude patterns
raw_excl = cfg.get("exclude_patterns", [])
EXCLUDE_PATTERNS = [expand_home_var(p) for p in raw_excl]
# Single-copy mappings
SINGLE_COPY_MAPPINGS = cfg.get("single_copy_mappings", [])
# Derived remote paths
REMOTE_ROOT = f"{REMOTE_BASE.rstrip('/')}/{REMOTE_HOST_DIR}"
REMOTE_SNAPSHOTS_DIR = f"{REMOTE_ROOT}/snapshots"
# Decide where local temp/compression work happens
if COMPRESSION_PATH:
compression_root = Path(COMPRESSION_PATH)
try:
compression_root.mkdir(parents=True, exist_ok=True)
except Exception as e:
print(
f"[WARN] COMPRESSION_PATH {compression_root} not usable ({e}); "
"falling back to system temp",
file=sys.stderr,
)
compression_root = Path(tempfile.gettempdir())
else:
compression_root = Path(tempfile.gettempdir())
COMPRESSION_ROOT = compression_root
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def log(msg: str) -> None:
ts = datetime.now().strftime(SNAPSHOT_FMT)
print(f"[{ts}] {msg}", file=sys.stderr)
def run(cmd, check: bool = True, capture_output: bool = False, text: bool = True):
if isinstance(cmd, list):
debug_cmd = " ".join(shlex.quote(str(c)) for c in cmd)
else:
debug_cmd = cmd
log(f"run: {debug_cmd}")
return subprocess.run(
cmd,
check=check,
capture_output=capture_output,
text=text,
)
def remote_shell(cmd_str: str, **kwargs):
ssh_cmd = [
"ssh",
"-i", SSH_KEY,
"-p", str(SSH_PORT),
"-o", "BatchMode=yes",
"-o", "StrictHostKeyChecking=accept-new",
f"{SSH_USER}@{SSH_HOST}",
cmd_str,
]
return run(ssh_cmd, **kwargs)
def build_excludes_file() -> str:
fd, path = tempfile.mkstemp(prefix="arch-rsync-backup-excludes-", text=True)
with os.fdopen(fd, "w") as fh:
for pat in EXCLUDE_PATTERNS:
fh.write(str(pat) + "\n")
return path
def ensure_remote_snapshot_root() -> None:
remote_shell(f"mkdir -p {shlex.quote(REMOTE_SNAPSHOTS_DIR)}")
def snapshot_name() -> str:
return datetime.now().strftime(SNAPSHOT_FMT)
def list_remote_snapshots():
cmd = f"ls -1 {shlex.quote(REMOTE_SNAPSHOTS_DIR)} || true"
result = remote_shell(cmd, capture_output=True)
names = []
if result.stdout:
for line in result.stdout.splitlines():
line = line.strip()
if line:
names.append(line)
return names
def prune_old_snapshots() -> None:
"""
Retention policy: keep at most RETENTION_DAYS snapshots
(by timestamp order). Older ones are deleted.
"""
max_keep = RETENTION_DAYS
if max_keep <= 0:
log("RETENTION_DAYS <= 0; skipping pruning")
return
names = list_remote_snapshots()
parsed = []
for name in names:
try:
dt = datetime.strptime(name, SNAPSHOT_FMT)
except ValueError:
# non-standard dirs, ignore
continue
parsed.append((dt, name))
parsed.sort()
if len(parsed) <= max_keep:
log(f"prune: {len(parsed)} snapshots <= {max_keep}; nothing to delete")
return
to_delete = [name for _, name in parsed[:-max_keep]]
base_q = shlex.quote(REMOTE_SNAPSHOTS_DIR)
del_str = " ".join(shlex.quote(n) for n in to_delete)
log(f"prune: deleting old snapshots: {', '.join(to_delete)}")
remote_shell(f"cd {base_q} && rm -rf -- {del_str}")
# ---------------------------------------------------------------------------
# Snapshot: build local tree per category, compress locally, upload archive
# ---------------------------------------------------------------------------
def rsync_to_local_category(category_root: Path, sources, excludes_file: str) -> None:
category_root.mkdir(parents=True, exist_ok=True)
for src in sources:
src = str(src).rstrip()
if not src:
continue
if not os.path.exists(src):
log(f"skip missing snapshot source: {src}")
continue
cmd = [
"rsync",
"-aHAXR",
"--relative",
"--human-readable",
f"--exclude-from={excludes_file}",
src,
str(category_root) + "/",
]
# insert extra options after -aHAXR
cmd[5:5] = RSYNC_EXTRA_OPTS
run(cmd)
def compress_category_local(category_name: str, category_root: Path, tmp_root: Path,
remote_snapshot_dir: str) -> None:
# If directory is empty, nothing to do
if not category_root.exists():
log(f"category {category_name}: no data, skipping")
return
any_content = False
for _ in category_root.rglob("*"):
any_content = True
break
if not any_content:
log(f"category {category_name}: empty tree, skipping")
return
mode = COMPRESSION_MODE
if mode == "none":
archive_name = f"{category_name}.tar"
else:
archive_name = f"{category_name}.tar.zst"
archive_path = tmp_root / archive_name
if mode == "none":
# plain tar, no compression
cmd = [
"tar",
"-cf", str(archive_path),
"--ignore-failed-read",
"-C", str(category_root),
".",
]
run(cmd)
else:
level = 19 if mode == "high" else 3
# Use shell for tar | zstd pipeline
shell_cmd = (
f"cd {shlex.quote(str(category_root))} && "
f"tar -cf - --ignore-failed-read . "
f"| zstd -T0 -{level} -o {shlex.quote(str(archive_path))}"
)
run(["sh", "-c", shell_cmd])
log(f"category {category_name}: archive created at {archive_path}")
# Upload archive to remote snapshot dir
rsync_cmd = [
"rsync",
"-a",
"--human-readable",
"-e",
f"ssh -i {SSH_KEY} -p {SSH_PORT} -oBatchMode=yes -oStrictHostKeyChecking=accept-new",
str(archive_path),
f"{SSH_USER}@{SSH_HOST}:{remote_snapshot_dir}/",
]
# insert extra opts after -a
rsync_cmd[3:3] = RSYNC_EXTRA_OPTS
run(rsync_cmd)
log(f"category {category_name}: archive uploaded to {remote_snapshot_dir}")
# ---------------------------------------------------------------------------
# Single-copy mirrors (unchanged)
# ---------------------------------------------------------------------------
def rsync_single_copy(src: str, dest_remote: str, excludes_file: str) -> None:
src = src.rstrip("/")
if not os.path.exists(src):
log(f"skip missing single-copy src: {src}")
return
dest_remote = dest_remote.rstrip("/")
remote_shell(f"mkdir -p {shlex.quote(dest_remote)}")
cmd = [
"rsync",
"-aHAX",
"--delete",
"--human-readable",
f"--exclude-from={excludes_file}",
"-e",
f"ssh -i {SSH_KEY} -p {SSH_PORT} -oBatchMode=yes -oStrictHostKeyChecking=accept-new",
f"{src}/",
f"{SSH_USER}@{SSH_HOST}:{dest_remote}/",
]
cmd[4:4] = RSYNC_EXTRA_OPTS
run(cmd)
# ---------------------------------------------------------------------------
# Main
# ---------------------------------------------------------------------------
def main() -> None:
log("==== arch-rsync-backup (local compression): start ====")
ensure_remote_snapshot_root()
snap = snapshot_name()
remote_snapshot_dir = f"{REMOTE_SNAPSHOTS_DIR}/{snap}"
remote_shell(f"mkdir -p {shlex.quote(remote_snapshot_dir)}")
log(f"snapshot: {snap} -> {remote_snapshot_dir}")
excludes_file = build_excludes_file()
tmp_root = Path(
tempfile.mkdtemp(
prefix=f"arch-rsync-backup-{snap}-",
dir=str(COMPRESSION_ROOT),
)
)
try:
# 1) Build local category trees
system_root = tmp_root / "system"
user_dirs_root = tmp_root / "user-dirs"
user_files_root = tmp_root / "user-files"
rsync_to_local_category(system_root, INCLUDE_PATHS, excludes_file)
rsync_to_local_category(user_dirs_root, USER_INCLUDE_DIRS, excludes_file)
rsync_to_local_category(user_files_root, USER_INCLUDE_FILES, excludes_file)
# 2) Compress locally and upload archives
compress_category_local("system", system_root, tmp_root, remote_snapshot_dir)
compress_category_local("user-dirs", user_dirs_root, tmp_root, remote_snapshot_dir)
compress_category_local("user-files", user_files_root, tmp_root, remote_snapshot_dir)
# 3) Single-copy mirrors (unchanged)
for mapping in SINGLE_COPY_MAPPINGS:
if not mapping:
continue
if "|" not in mapping:
log(f"skip malformed mapping (no '|'): {mapping}")
continue
src, dest = mapping.split("|", 1)
src = src.strip()
dest = dest.strip()
if not src or not dest:
log(f"skip malformed mapping (empty src/dest): {mapping}")
continue
rsync_single_copy(src, dest, excludes_file)
# 4) Retention
prune_old_snapshots()
log(f"Backup complete: {snap}")
finally:
try:
os.remove(excludes_file)
except FileNotFoundError:
pass
# clean up local temp tree
shutil.rmtree(tmp_root, ignore_errors=True)
log("==== arch-rsync-backup: end ====")
if __name__ == "__main__":
main()