From 9f88beed48043a112fd6f7b176e2b3858a988e18 Mon Sep 17 00:00:00 2001 From: Peter Knauer Date: Sat, 6 Dec 2025 09:25:05 -0500 Subject: [PATCH] first release --- LICENSE | 21 ++ packaging/PKGBUILD | 25 +++ zuzu-system-backup.py | 417 ++++++++++++++++++++++++++++++++++++++++ zuzu-system-backup.yaml | 80 ++++++++ 4 files changed, 543 insertions(+) create mode 100644 LICENSE create mode 100644 packaging/PKGBUILD create mode 100644 zuzu-system-backup.py create mode 100644 zuzu-system-backup.yaml diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..3f2cf76 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2025 Peter Knauer + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/packaging/PKGBUILD b/packaging/PKGBUILD new file mode 100644 index 0000000..05029a0 --- /dev/null +++ b/packaging/PKGBUILD @@ -0,0 +1,25 @@ +# Maintainer: Peter Knauer +pkgname=zuzu-system-backup +pkgver=1.0.0 +pkgrel=1 +pkgdesc="SSH/rsync snapshot + mirror backups to NAS for Linux based systems (Python-based)" +arch=(any) +depends=(python python-yaml rsync openssh coreutils tar zstd) +source=( + 'zuzu-system-backup.py' + 'zuzu-system-backup.yaml' + 'zuzu-system-backup.service' + 'zuzu-system-backup.timer' +) +sha256sums=('SKIP' 'SKIP' 'SKIP' 'SKIP') + +package() { + install -d "$pkgdir/usr/local/sbin/zuzu-system-backup" + install -m 0755 "backup.py" "$pkgdir/usr/local/sbin/zuzu-system-backup/zuzu-system-backup.py" + # Template config; edit per-host after install + install -m 0644 "backup.yaml" "$pkgdir/usr/local/sbin/zuzu-system-backup/zuzu-system-backup.yaml" + + install -d "$pkgdir/usr/lib/systemd/system" + install -m 0644 "zuzu-system-backup.service" "$pkgdir/usr/lib/systemd/system/zuzu-system-backup.service" + install -m 0644 "zuzu-system-backup.timer" "$pkgdir/usr/lib/systemd/system/zuzu-system-backup.timer" +} diff --git a/zuzu-system-backup.py b/zuzu-system-backup.py new file mode 100644 index 0000000..9a98861 --- /dev/null +++ b/zuzu-system-backup.py @@ -0,0 +1,417 @@ +#!/usr/bin/env python3 +""" +zuzu-system-backup + +Snapshot + mirror backups from Arch hosts to NAS over SSH/rsync. + +- Snapshot sources: + INCLUDE_PATHS + USER_INCLUDE_DIRS + USER_INCLUDE_FILES + + -> rsync’d into a local temp tree per category + -> compressed locally (tar + optional zstd) + -> archives uploaded to NAS snapshot dir + +- Single-copy mirrors: + SINGLE_COPY_MAPPINGS + + -> rsync directly to NAS with --delete +""" + +import os +import shlex +import shutil +import subprocess +import sys +import tempfile +from datetime import datetime +from pathlib import Path + +import yaml + +BASE_DIR = Path(__file__).resolve().parent +CONFIG_PATH = BASE_DIR / "backup.yaml" + +SNAPSHOT_FMT = "%Y-%m-%d_%H-%M-%S" + + +# --------------------------------------------------------------------------- +# Load config (backup.yaml) +# --------------------------------------------------------------------------- + +def _fatal(msg: str) -> None: + print(f"[FATAL] {msg}", file=sys.stderr) + sys.exit(1) + +try: + with CONFIG_PATH.open("r", encoding="utf-8") as f: + cfg = yaml.safe_load(f) or {} +except FileNotFoundError: + _fatal(f"Config file not found: {CONFIG_PATH}") + +remote_cfg = cfg.get("remote", {}) +SSH_USER = remote_cfg.get("user") +SSH_HOST = remote_cfg.get("host") +SSH_PORT = int(remote_cfg.get("port", 22)) +SSH_KEY = remote_cfg.get("key") +REMOTE_BASE = remote_cfg.get("base") +REMOTE_HOST_DIR = remote_cfg.get("host_dir") + +if not all([SSH_USER, SSH_HOST, SSH_KEY, REMOTE_BASE, REMOTE_HOST_DIR]): + _fatal("remote.{user,host,key,base,host_dir} must all be set in backup.yaml") + +# retention +RETENTION_DAYS = int(cfg.get("retention", {}).get("snapshots", 7)) + +# compression +compression_cfg = cfg.get("compression", {}) +COMPRESSION_MODE = (compression_cfg.get("mode") or "high").lower() +if COMPRESSION_MODE not in ("high", "light", "none"): + COMPRESSION_MODE = "high" + +COMPRESSION_PATH = compression_cfg.get("path") + +# rsync +RSYNC_EXTRA_OPTS = cfg.get("rsync", {}).get("extra_opts", []) + +user_cfg = cfg.get("user", {}) +USER_HOME = user_cfg.get("home") + + +def expand_user_path(p: str) -> str: + if not p: + return p + if p.startswith("/"): + return p + if USER_HOME: + return str(Path(USER_HOME) / p) + return p + + +def expand_home_var(s: str) -> str: + if not isinstance(s, str): + return s + if USER_HOME: + return ( + s.replace("${USER_HOME}", USER_HOME) + .replace("${HOME}", USER_HOME) + ) + return s + +# System trees +INCLUDE_PATHS = cfg.get("system", {}).get("include_paths", []) + +# User trees +USER_INCLUDE_DIRS = [expand_user_path(p) for p in user_cfg.get("include_dirs", [])] +USER_INCLUDE_FILES = [expand_user_path(p) for p in user_cfg.get("include_files", [])] + +# Exclude patterns +raw_excl = cfg.get("exclude_patterns", []) +EXCLUDE_PATTERNS = [expand_home_var(p) for p in raw_excl] + +# Single-copy mappings +SINGLE_COPY_MAPPINGS = cfg.get("single_copy_mappings", []) + +# Derived remote paths +REMOTE_ROOT = f"{REMOTE_BASE.rstrip('/')}/{REMOTE_HOST_DIR}" +REMOTE_SNAPSHOTS_DIR = f"{REMOTE_ROOT}/snapshots" + +# Decide where local temp/compression work happens +if COMPRESSION_PATH: + compression_root = Path(COMPRESSION_PATH) + try: + compression_root.mkdir(parents=True, exist_ok=True) + except Exception as e: + print( + f"[WARN] COMPRESSION_PATH {compression_root} not usable ({e}); " + "falling back to system temp", + file=sys.stderr, + ) + compression_root = Path(tempfile.gettempdir()) +else: + compression_root = Path(tempfile.gettempdir()) + +COMPRESSION_ROOT = compression_root + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def log(msg: str) -> None: + ts = datetime.now().strftime(SNAPSHOT_FMT) + print(f"[{ts}] {msg}", file=sys.stderr) + + +def run(cmd, check: bool = True, capture_output: bool = False, text: bool = True): + if isinstance(cmd, list): + debug_cmd = " ".join(shlex.quote(str(c)) for c in cmd) + else: + debug_cmd = cmd + log(f"run: {debug_cmd}") + return subprocess.run( + cmd, + check=check, + capture_output=capture_output, + text=text, + ) + +def remote_shell(cmd_str: str, **kwargs): + ssh_cmd = [ + "ssh", + "-i", SSH_KEY, + "-p", str(SSH_PORT), + "-o", "BatchMode=yes", + "-o", "StrictHostKeyChecking=accept-new", + f"{SSH_USER}@{SSH_HOST}", + cmd_str, + ] + return run(ssh_cmd, **kwargs) + + +def build_excludes_file() -> str: + fd, path = tempfile.mkstemp(prefix="arch-rsync-backup-excludes-", text=True) + with os.fdopen(fd, "w") as fh: + for pat in EXCLUDE_PATTERNS: + fh.write(str(pat) + "\n") + return path + + +def ensure_remote_snapshot_root() -> None: + remote_shell(f"mkdir -p {shlex.quote(REMOTE_SNAPSHOTS_DIR)}") + + +def snapshot_name() -> str: + return datetime.now().strftime(SNAPSHOT_FMT) + + +def list_remote_snapshots(): + cmd = f"ls -1 {shlex.quote(REMOTE_SNAPSHOTS_DIR)} || true" + result = remote_shell(cmd, capture_output=True) + names = [] + if result.stdout: + for line in result.stdout.splitlines(): + line = line.strip() + if line: + names.append(line) + return names + + +def prune_old_snapshots() -> None: + """ + Retention policy: keep at most RETENTION_DAYS snapshots + (by timestamp order). Older ones are deleted. + """ + max_keep = RETENTION_DAYS + if max_keep <= 0: + log("RETENTION_DAYS <= 0; skipping pruning") + return + + names = list_remote_snapshots() + parsed = [] + for name in names: + try: + dt = datetime.strptime(name, SNAPSHOT_FMT) + except ValueError: + # non-standard dirs, ignore + continue + parsed.append((dt, name)) + + parsed.sort() + if len(parsed) <= max_keep: + log(f"prune: {len(parsed)} snapshots <= {max_keep}; nothing to delete") + return + + to_delete = [name for _, name in parsed[:-max_keep]] + base_q = shlex.quote(REMOTE_SNAPSHOTS_DIR) + del_str = " ".join(shlex.quote(n) for n in to_delete) + log(f"prune: deleting old snapshots: {', '.join(to_delete)}") + remote_shell(f"cd {base_q} && rm -rf -- {del_str}") + + +# --------------------------------------------------------------------------- +# Snapshot: build local tree per category, compress locally, upload archive +# --------------------------------------------------------------------------- + +def rsync_to_local_category(category_root: Path, sources, excludes_file: str) -> None: + category_root.mkdir(parents=True, exist_ok=True) + + for src in sources: + src = str(src).rstrip() + if not src: + continue + if not os.path.exists(src): + log(f"skip missing snapshot source: {src}") + continue + + cmd = [ + "rsync", + "-aHAXR", + "--relative", + "--human-readable", + f"--exclude-from={excludes_file}", + src, + str(category_root) + "/", + ] + # insert extra options after -aHAXR + cmd[5:5] = RSYNC_EXTRA_OPTS + run(cmd) + +def compress_category_local(category_name: str, category_root: Path, tmp_root: Path, + remote_snapshot_dir: str) -> None: + # If directory is empty, nothing to do + if not category_root.exists(): + log(f"category {category_name}: no data, skipping") + return + + any_content = False + for _ in category_root.rglob("*"): + any_content = True + break + if not any_content: + log(f"category {category_name}: empty tree, skipping") + return + + mode = COMPRESSION_MODE + if mode == "none": + archive_name = f"{category_name}.tar" + else: + archive_name = f"{category_name}.tar.zst" + + archive_path = tmp_root / archive_name + + if mode == "none": + # plain tar, no compression + cmd = [ + "tar", + "-cf", str(archive_path), + "--ignore-failed-read", + "-C", str(category_root), + ".", + ] + run(cmd) + else: + level = 19 if mode == "high" else 3 + # Use shell for tar | zstd pipeline + shell_cmd = ( + f"cd {shlex.quote(str(category_root))} && " + f"tar -cf - --ignore-failed-read . " + f"| zstd -T0 -{level} -o {shlex.quote(str(archive_path))}" + ) + run(["sh", "-c", shell_cmd]) + + log(f"category {category_name}: archive created at {archive_path}") + + # Upload archive to remote snapshot dir + rsync_cmd = [ + "rsync", + "-a", + "--human-readable", + "-e", + f"ssh -i {SSH_KEY} -p {SSH_PORT} -oBatchMode=yes -oStrictHostKeyChecking=accept-new", + str(archive_path), + f"{SSH_USER}@{SSH_HOST}:{remote_snapshot_dir}/", + ] + # insert extra opts after -a + rsync_cmd[3:3] = RSYNC_EXTRA_OPTS + run(rsync_cmd) + + log(f"category {category_name}: archive uploaded to {remote_snapshot_dir}") + + +# --------------------------------------------------------------------------- +# Single-copy mirrors (unchanged) +# --------------------------------------------------------------------------- + +def rsync_single_copy(src: str, dest_remote: str, excludes_file: str) -> None: + src = src.rstrip("/") + if not os.path.exists(src): + log(f"skip missing single-copy src: {src}") + return + + dest_remote = dest_remote.rstrip("/") + remote_shell(f"mkdir -p {shlex.quote(dest_remote)}") + + cmd = [ + "rsync", + "-aHAX", + "--delete", + "--human-readable", + f"--exclude-from={excludes_file}", + "-e", + f"ssh -i {SSH_KEY} -p {SSH_PORT} -oBatchMode=yes -oStrictHostKeyChecking=accept-new", + f"{src}/", + f"{SSH_USER}@{SSH_HOST}:{dest_remote}/", + ] + cmd[4:4] = RSYNC_EXTRA_OPTS + run(cmd) + + +# --------------------------------------------------------------------------- +# Main +# --------------------------------------------------------------------------- + +def main() -> None: + log("==== arch-rsync-backup (local compression): start ====") + ensure_remote_snapshot_root() + + snap = snapshot_name() + remote_snapshot_dir = f"{REMOTE_SNAPSHOTS_DIR}/{snap}" + remote_shell(f"mkdir -p {shlex.quote(remote_snapshot_dir)}") + log(f"snapshot: {snap} -> {remote_snapshot_dir}") + + excludes_file = build_excludes_file() + + tmp_root = Path( + tempfile.mkdtemp( + prefix=f"arch-rsync-backup-{snap}-", + dir=str(COMPRESSION_ROOT), + ) + ) + + try: + # 1) Build local category trees + system_root = tmp_root / "system" + user_dirs_root = tmp_root / "user-dirs" + user_files_root = tmp_root / "user-files" + + rsync_to_local_category(system_root, INCLUDE_PATHS, excludes_file) + rsync_to_local_category(user_dirs_root, USER_INCLUDE_DIRS, excludes_file) + rsync_to_local_category(user_files_root, USER_INCLUDE_FILES, excludes_file) + + # 2) Compress locally and upload archives + compress_category_local("system", system_root, tmp_root, remote_snapshot_dir) + compress_category_local("user-dirs", user_dirs_root, tmp_root, remote_snapshot_dir) + compress_category_local("user-files", user_files_root, tmp_root, remote_snapshot_dir) + + # 3) Single-copy mirrors (unchanged) + for mapping in SINGLE_COPY_MAPPINGS: + if not mapping: + continue + if "|" not in mapping: + log(f"skip malformed mapping (no '|'): {mapping}") + continue + src, dest = mapping.split("|", 1) + src = src.strip() + dest = dest.strip() + if not src or not dest: + log(f"skip malformed mapping (empty src/dest): {mapping}") + continue + rsync_single_copy(src, dest, excludes_file) + + # 4) Retention + prune_old_snapshots() + + log(f"Backup complete: {snap}") + finally: + try: + os.remove(excludes_file) + except FileNotFoundError: + pass + # clean up local temp tree + shutil.rmtree(tmp_root, ignore_errors=True) + log("==== arch-rsync-backup: end ====") + + +if __name__ == "__main__": + main() diff --git a/zuzu-system-backup.yaml b/zuzu-system-backup.yaml new file mode 100644 index 0000000..6a18feb --- /dev/null +++ b/zuzu-system-backup.yaml @@ -0,0 +1,80 @@ +remote: + user: backupuser + host: backup-nas.local + port: 22 + key: /home/backupuser/.ssh/id_ed25519-orion + base: /srv/backup/automated + host_dir: system-orion + +retention: + # Max number of snapshot directories to keep on NAS + snapshots: 7 + +compression: + # high | light | none + mode: high + # Optional: where local temp trees and archives live + path: /srv/tmp/backups + +rsync: + extra_opts: + - --numeric-ids + - --info=progress2 + - --protect-args + +system: + include_paths: + - /etc/nftables.conf + - /etc/snapper/configs + - /etc/NetworkManager/system-connections + - /etc/chromium/policies/managed + - /etc/fstab + - /etc/systemd/system/*.mount + - /etc/systemd/system/*.automount + - /etc/nut/nut.conf + - /etc/nut/upsmon.conf + +user: + home: /home/devuser + + include_dirs: + - .ssh + - .gnupg + - .local/share/wallpapers + - projects + - pkgbuilds + - venvs + + include_files: + - .config/chromium/Default/Preferences + - .config/chromium/Default/Bookmarks + - .config/vlc/vlcrc + - .gitconfig + - .bashrc + - .bash_profile + - .local/share/user-places.xbel + +exclude_patterns: + # Caches (generic) + - "**/Cache/**" + - "**/GPUCache/**" + - "**/shadercache/**" + - "**/ShaderCache/**" + - "**/Code Cache/**" + + # SSH ControlMaster sockets + - "${USER_HOME}/.ssh/ctl-*" + - "**/.ssh/ctl-*" + + # JetBrains bulk (plugins + Toolbox app bundles) + - "${USER_HOME}/.local/share/JetBrains/**/plugins/**" + - "${USER_HOME}/.local/share/JetBrains/Toolbox/apps/**" + - "${USER_HOME}/.cache/JetBrains/**" + + # Chromium bulk (we include only specific files above) + - "${USER_HOME}/.config/chromium/**" + +single_copy_mappings: + # Example mirrors: + - "/srv/data/postgres|/srv/backup/automated/sync/system-orion-postgres" + - "/srv/data/models|/srv/backup/automated/sync/system-orion-models" \ No newline at end of file