417 lines
12 KiB
Python
417 lines
12 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
zuzu-system-backup
|
||
|
||
Snapshot + mirror backups from Arch hosts to NAS over SSH/rsync.
|
||
|
||
- Snapshot sources:
|
||
INCLUDE_PATHS
|
||
USER_INCLUDE_DIRS
|
||
USER_INCLUDE_FILES
|
||
|
||
-> rsync’d into a local temp tree per category
|
||
-> compressed locally (tar + optional zstd)
|
||
-> archives uploaded to NAS snapshot dir
|
||
|
||
- Single-copy mirrors:
|
||
SINGLE_COPY_MAPPINGS
|
||
|
||
-> rsync directly to NAS with --delete
|
||
"""
|
||
|
||
import os
|
||
import shlex
|
||
import shutil
|
||
import subprocess
|
||
import sys
|
||
import tempfile
|
||
from datetime import datetime
|
||
from pathlib import Path
|
||
|
||
import yaml
|
||
|
||
BASE_DIR = Path(__file__).resolve().parent
|
||
CONFIG_PATH = BASE_DIR / "backup.yaml"
|
||
|
||
SNAPSHOT_FMT = "%Y-%m-%d_%H-%M-%S"
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Load config (backup.yaml)
|
||
# ---------------------------------------------------------------------------
|
||
|
||
def _fatal(msg: str) -> None:
|
||
print(f"[FATAL] {msg}", file=sys.stderr)
|
||
sys.exit(1)
|
||
|
||
try:
|
||
with CONFIG_PATH.open("r", encoding="utf-8") as f:
|
||
cfg = yaml.safe_load(f) or {}
|
||
except FileNotFoundError:
|
||
_fatal(f"Config file not found: {CONFIG_PATH}")
|
||
|
||
remote_cfg = cfg.get("remote", {})
|
||
SSH_USER = remote_cfg.get("user")
|
||
SSH_HOST = remote_cfg.get("host")
|
||
SSH_PORT = int(remote_cfg.get("port", 22))
|
||
SSH_KEY = remote_cfg.get("key")
|
||
REMOTE_BASE = remote_cfg.get("base")
|
||
REMOTE_HOST_DIR = remote_cfg.get("host_dir")
|
||
|
||
if not all([SSH_USER, SSH_HOST, SSH_KEY, REMOTE_BASE, REMOTE_HOST_DIR]):
|
||
_fatal("remote.{user,host,key,base,host_dir} must all be set in backup.yaml")
|
||
|
||
# retention
|
||
RETENTION_DAYS = int(cfg.get("retention", {}).get("snapshots", 7))
|
||
|
||
# compression
|
||
compression_cfg = cfg.get("compression", {})
|
||
COMPRESSION_MODE = (compression_cfg.get("mode") or "high").lower()
|
||
if COMPRESSION_MODE not in ("high", "light", "none"):
|
||
COMPRESSION_MODE = "high"
|
||
|
||
COMPRESSION_PATH = compression_cfg.get("path")
|
||
|
||
# rsync
|
||
RSYNC_EXTRA_OPTS = cfg.get("rsync", {}).get("extra_opts", [])
|
||
|
||
user_cfg = cfg.get("user", {})
|
||
USER_HOME = user_cfg.get("home")
|
||
|
||
|
||
def expand_user_path(p: str) -> str:
|
||
if not p:
|
||
return p
|
||
if p.startswith("/"):
|
||
return p
|
||
if USER_HOME:
|
||
return str(Path(USER_HOME) / p)
|
||
return p
|
||
|
||
|
||
def expand_home_var(s: str) -> str:
|
||
if not isinstance(s, str):
|
||
return s
|
||
if USER_HOME:
|
||
return (
|
||
s.replace("${USER_HOME}", USER_HOME)
|
||
.replace("${HOME}", USER_HOME)
|
||
)
|
||
return s
|
||
|
||
# System trees
|
||
INCLUDE_PATHS = cfg.get("system", {}).get("include_paths", [])
|
||
|
||
# User trees
|
||
USER_INCLUDE_DIRS = [expand_user_path(p) for p in user_cfg.get("include_dirs", [])]
|
||
USER_INCLUDE_FILES = [expand_user_path(p) for p in user_cfg.get("include_files", [])]
|
||
|
||
# Exclude patterns
|
||
raw_excl = cfg.get("exclude_patterns", [])
|
||
EXCLUDE_PATTERNS = [expand_home_var(p) for p in raw_excl]
|
||
|
||
# Single-copy mappings
|
||
SINGLE_COPY_MAPPINGS = cfg.get("single_copy_mappings", [])
|
||
|
||
# Derived remote paths
|
||
REMOTE_ROOT = f"{REMOTE_BASE.rstrip('/')}/{REMOTE_HOST_DIR}"
|
||
REMOTE_SNAPSHOTS_DIR = f"{REMOTE_ROOT}/snapshots"
|
||
|
||
# Decide where local temp/compression work happens
|
||
if COMPRESSION_PATH:
|
||
compression_root = Path(COMPRESSION_PATH)
|
||
try:
|
||
compression_root.mkdir(parents=True, exist_ok=True)
|
||
except Exception as e:
|
||
print(
|
||
f"[WARN] COMPRESSION_PATH {compression_root} not usable ({e}); "
|
||
"falling back to system temp",
|
||
file=sys.stderr,
|
||
)
|
||
compression_root = Path(tempfile.gettempdir())
|
||
else:
|
||
compression_root = Path(tempfile.gettempdir())
|
||
|
||
COMPRESSION_ROOT = compression_root
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Helpers
|
||
# ---------------------------------------------------------------------------
|
||
|
||
def log(msg: str) -> None:
|
||
ts = datetime.now().strftime(SNAPSHOT_FMT)
|
||
print(f"[{ts}] {msg}", file=sys.stderr)
|
||
|
||
|
||
def run(cmd, check: bool = True, capture_output: bool = False, text: bool = True):
|
||
if isinstance(cmd, list):
|
||
debug_cmd = " ".join(shlex.quote(str(c)) for c in cmd)
|
||
else:
|
||
debug_cmd = cmd
|
||
log(f"run: {debug_cmd}")
|
||
return subprocess.run(
|
||
cmd,
|
||
check=check,
|
||
capture_output=capture_output,
|
||
text=text,
|
||
)
|
||
|
||
def remote_shell(cmd_str: str, **kwargs):
|
||
ssh_cmd = [
|
||
"ssh",
|
||
"-i", SSH_KEY,
|
||
"-p", str(SSH_PORT),
|
||
"-o", "BatchMode=yes",
|
||
"-o", "StrictHostKeyChecking=accept-new",
|
||
f"{SSH_USER}@{SSH_HOST}",
|
||
cmd_str,
|
||
]
|
||
return run(ssh_cmd, **kwargs)
|
||
|
||
|
||
def build_excludes_file() -> str:
|
||
fd, path = tempfile.mkstemp(prefix="arch-rsync-backup-excludes-", text=True)
|
||
with os.fdopen(fd, "w") as fh:
|
||
for pat in EXCLUDE_PATTERNS:
|
||
fh.write(str(pat) + "\n")
|
||
return path
|
||
|
||
|
||
def ensure_remote_snapshot_root() -> None:
|
||
remote_shell(f"mkdir -p {shlex.quote(REMOTE_SNAPSHOTS_DIR)}")
|
||
|
||
|
||
def snapshot_name() -> str:
|
||
return datetime.now().strftime(SNAPSHOT_FMT)
|
||
|
||
|
||
def list_remote_snapshots():
|
||
cmd = f"ls -1 {shlex.quote(REMOTE_SNAPSHOTS_DIR)} || true"
|
||
result = remote_shell(cmd, capture_output=True)
|
||
names = []
|
||
if result.stdout:
|
||
for line in result.stdout.splitlines():
|
||
line = line.strip()
|
||
if line:
|
||
names.append(line)
|
||
return names
|
||
|
||
|
||
def prune_old_snapshots() -> None:
|
||
"""
|
||
Retention policy: keep at most RETENTION_DAYS snapshots
|
||
(by timestamp order). Older ones are deleted.
|
||
"""
|
||
max_keep = RETENTION_DAYS
|
||
if max_keep <= 0:
|
||
log("RETENTION_DAYS <= 0; skipping pruning")
|
||
return
|
||
|
||
names = list_remote_snapshots()
|
||
parsed = []
|
||
for name in names:
|
||
try:
|
||
dt = datetime.strptime(name, SNAPSHOT_FMT)
|
||
except ValueError:
|
||
# non-standard dirs, ignore
|
||
continue
|
||
parsed.append((dt, name))
|
||
|
||
parsed.sort()
|
||
if len(parsed) <= max_keep:
|
||
log(f"prune: {len(parsed)} snapshots <= {max_keep}; nothing to delete")
|
||
return
|
||
|
||
to_delete = [name for _, name in parsed[:-max_keep]]
|
||
base_q = shlex.quote(REMOTE_SNAPSHOTS_DIR)
|
||
del_str = " ".join(shlex.quote(n) for n in to_delete)
|
||
log(f"prune: deleting old snapshots: {', '.join(to_delete)}")
|
||
remote_shell(f"cd {base_q} && rm -rf -- {del_str}")
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Snapshot: build local tree per category, compress locally, upload archive
|
||
# ---------------------------------------------------------------------------
|
||
|
||
def rsync_to_local_category(category_root: Path, sources, excludes_file: str) -> None:
|
||
category_root.mkdir(parents=True, exist_ok=True)
|
||
|
||
for src in sources:
|
||
src = str(src).rstrip()
|
||
if not src:
|
||
continue
|
||
if not os.path.exists(src):
|
||
log(f"skip missing snapshot source: {src}")
|
||
continue
|
||
|
||
cmd = [
|
||
"rsync",
|
||
"-aHAXR",
|
||
"--relative",
|
||
"--human-readable",
|
||
f"--exclude-from={excludes_file}",
|
||
src,
|
||
str(category_root) + "/",
|
||
]
|
||
# insert extra options after -aHAXR
|
||
cmd[5:5] = RSYNC_EXTRA_OPTS
|
||
run(cmd)
|
||
|
||
def compress_category_local(category_name: str, category_root: Path, tmp_root: Path,
|
||
remote_snapshot_dir: str) -> None:
|
||
# If directory is empty, nothing to do
|
||
if not category_root.exists():
|
||
log(f"category {category_name}: no data, skipping")
|
||
return
|
||
|
||
any_content = False
|
||
for _ in category_root.rglob("*"):
|
||
any_content = True
|
||
break
|
||
if not any_content:
|
||
log(f"category {category_name}: empty tree, skipping")
|
||
return
|
||
|
||
mode = COMPRESSION_MODE
|
||
if mode == "none":
|
||
archive_name = f"{category_name}.tar"
|
||
else:
|
||
archive_name = f"{category_name}.tar.zst"
|
||
|
||
archive_path = tmp_root / archive_name
|
||
|
||
if mode == "none":
|
||
# plain tar, no compression
|
||
cmd = [
|
||
"tar",
|
||
"-cf", str(archive_path),
|
||
"--ignore-failed-read",
|
||
"-C", str(category_root),
|
||
".",
|
||
]
|
||
run(cmd)
|
||
else:
|
||
level = 19 if mode == "high" else 3
|
||
# Use shell for tar | zstd pipeline
|
||
shell_cmd = (
|
||
f"cd {shlex.quote(str(category_root))} && "
|
||
f"tar -cf - --ignore-failed-read . "
|
||
f"| zstd -T0 -{level} -o {shlex.quote(str(archive_path))}"
|
||
)
|
||
run(["sh", "-c", shell_cmd])
|
||
|
||
log(f"category {category_name}: archive created at {archive_path}")
|
||
|
||
# Upload archive to remote snapshot dir
|
||
rsync_cmd = [
|
||
"rsync",
|
||
"-a",
|
||
"--human-readable",
|
||
"-e",
|
||
f"ssh -i {SSH_KEY} -p {SSH_PORT} -oBatchMode=yes -oStrictHostKeyChecking=accept-new",
|
||
str(archive_path),
|
||
f"{SSH_USER}@{SSH_HOST}:{remote_snapshot_dir}/",
|
||
]
|
||
# insert extra opts after -a
|
||
rsync_cmd[3:3] = RSYNC_EXTRA_OPTS
|
||
run(rsync_cmd)
|
||
|
||
log(f"category {category_name}: archive uploaded to {remote_snapshot_dir}")
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Single-copy mirrors (unchanged)
|
||
# ---------------------------------------------------------------------------
|
||
|
||
def rsync_single_copy(src: str, dest_remote: str, excludes_file: str) -> None:
|
||
src = src.rstrip("/")
|
||
if not os.path.exists(src):
|
||
log(f"skip missing single-copy src: {src}")
|
||
return
|
||
|
||
dest_remote = dest_remote.rstrip("/")
|
||
remote_shell(f"mkdir -p {shlex.quote(dest_remote)}")
|
||
|
||
cmd = [
|
||
"rsync",
|
||
"-aHAX",
|
||
"--delete",
|
||
"--human-readable",
|
||
f"--exclude-from={excludes_file}",
|
||
"-e",
|
||
f"ssh -i {SSH_KEY} -p {SSH_PORT} -oBatchMode=yes -oStrictHostKeyChecking=accept-new",
|
||
f"{src}/",
|
||
f"{SSH_USER}@{SSH_HOST}:{dest_remote}/",
|
||
]
|
||
cmd[4:4] = RSYNC_EXTRA_OPTS
|
||
run(cmd)
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Main
|
||
# ---------------------------------------------------------------------------
|
||
|
||
def main() -> None:
|
||
log("==== arch-rsync-backup (local compression): start ====")
|
||
ensure_remote_snapshot_root()
|
||
|
||
snap = snapshot_name()
|
||
remote_snapshot_dir = f"{REMOTE_SNAPSHOTS_DIR}/{snap}"
|
||
remote_shell(f"mkdir -p {shlex.quote(remote_snapshot_dir)}")
|
||
log(f"snapshot: {snap} -> {remote_snapshot_dir}")
|
||
|
||
excludes_file = build_excludes_file()
|
||
|
||
tmp_root = Path(
|
||
tempfile.mkdtemp(
|
||
prefix=f"arch-rsync-backup-{snap}-",
|
||
dir=str(COMPRESSION_ROOT),
|
||
)
|
||
)
|
||
|
||
try:
|
||
# 1) Build local category trees
|
||
system_root = tmp_root / "system"
|
||
user_dirs_root = tmp_root / "user-dirs"
|
||
user_files_root = tmp_root / "user-files"
|
||
|
||
rsync_to_local_category(system_root, INCLUDE_PATHS, excludes_file)
|
||
rsync_to_local_category(user_dirs_root, USER_INCLUDE_DIRS, excludes_file)
|
||
rsync_to_local_category(user_files_root, USER_INCLUDE_FILES, excludes_file)
|
||
|
||
# 2) Compress locally and upload archives
|
||
compress_category_local("system", system_root, tmp_root, remote_snapshot_dir)
|
||
compress_category_local("user-dirs", user_dirs_root, tmp_root, remote_snapshot_dir)
|
||
compress_category_local("user-files", user_files_root, tmp_root, remote_snapshot_dir)
|
||
|
||
# 3) Single-copy mirrors (unchanged)
|
||
for mapping in SINGLE_COPY_MAPPINGS:
|
||
if not mapping:
|
||
continue
|
||
if "|" not in mapping:
|
||
log(f"skip malformed mapping (no '|'): {mapping}")
|
||
continue
|
||
src, dest = mapping.split("|", 1)
|
||
src = src.strip()
|
||
dest = dest.strip()
|
||
if not src or not dest:
|
||
log(f"skip malformed mapping (empty src/dest): {mapping}")
|
||
continue
|
||
rsync_single_copy(src, dest, excludes_file)
|
||
|
||
# 4) Retention
|
||
prune_old_snapshots()
|
||
|
||
log(f"Backup complete: {snap}")
|
||
finally:
|
||
try:
|
||
os.remove(excludes_file)
|
||
except FileNotFoundError:
|
||
pass
|
||
# clean up local temp tree
|
||
shutil.rmtree(tmp_root, ignore_errors=True)
|
||
log("==== arch-rsync-backup: end ====")
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|