From 254f915051711abb2885a8e93930264a9bc4473c Mon Sep 17 00:00:00 2001 From: Peter van Arkel Date: Tue, 19 May 2026 11:24:48 +0200 Subject: [PATCH] Plan Django retention from snapshot records --- README.md | 2 + src/pobsync/commands/retention_plan.py | 59 +----- src/pobsync/retention.py | 47 +++++ .../commands/run_pobsync_retention.py | 50 +++-- src/pobsync_backend/retention.py | 182 ++++++++++++++++++ .../tests/test_sql_retention.py | 149 ++++++++++++++ 6 files changed, 405 insertions(+), 84 deletions(-) create mode 100644 src/pobsync_backend/retention.py create mode 100644 src/pobsync_backend/tests/test_sql_retention.py diff --git a/README.md b/README.md index 348b3bf..75d7242 100644 --- a/README.md +++ b/README.md @@ -140,6 +140,7 @@ SQLite remains the default because it is enough for a single backup server and k The public command surface is Django-first. The old YAML/cron CLI has been retired from the `pobsync` entrypoint. Discovered snapshots are stored in `SnapshotRecord`, including the base snapshot metadata and a nullable SQL link to the base record when it is known. +The Django retention command plans from `SnapshotRecord` instead of rediscovering snapshots from the filesystem. The remaining internal engine code still contains reusable backup primitives: @@ -151,6 +152,7 @@ The remaining internal engine code still contains reusable backup primitives: Next refactor targets: - Surface `SnapshotRecord` data through API/admin views instead of filesystem inspection. +- Move post-backup pruning onto the SQL retention service. - Move more snapshot lifecycle details into typed domain objects. - Replace remaining dictionary-shaped config at engine boundaries. - Remove legacy YAML import/export once production migration no longer needs it. diff --git a/src/pobsync/commands/retention_plan.py b/src/pobsync/commands/retention_plan.py index 278f3e6..a2fb5f9 100644 --- a/src/pobsync/commands/retention_plan.py +++ b/src/pobsync/commands/retention_plan.py @@ -2,12 +2,12 @@ from __future__ import annotations from datetime import datetime, timezone from pathlib import Path -from typing import Any, Dict, List, Optional, Tuple +from typing import Any, List from ..config.source import ConfigSource, FileConfigSource from ..errors import ConfigError from ..paths import PobsyncPaths -from ..retention import Snapshot, build_retention_plan +from ..retention import Snapshot, apply_base_protection, build_retention_plan from ..snapshot_meta import iter_snapshot_dirs, read_snapshot_meta, resolve_host_root from ..util import sanitize_host @@ -28,59 +28,6 @@ def _parse_snapshot_dt(dirname: str, meta: dict) -> datetime: return datetime.fromtimestamp(0, tz=timezone.utc) -def _apply_base_protection( - snapshots: List[Snapshot], - keep: set[str], - reasons: Dict[str, List[str]], -) -> Tuple[set[str], Dict[str, List[str]]]: - """ - Optional policy: if a kept snapshot has a base (kind+dirname), also keep that base snapshot. - This is NOT required for hardlink snapshots to remain readable, but can be useful - for performance (better base selection) or "chain" readability. - - Adds reason: "base-of:" - """ - # Index snapshots by (kind, dirname) - idx: Dict[Tuple[str, str], Snapshot] = {(s.kind, s.dirname): s for s in snapshots} - - changed = True - while changed: - changed = False - - # Iterate over a stable list of current keep items - for child_dirname in list(keep): - # Find the child snapshot (may exist in multiple kinds; check both) - child: Optional[Snapshot] = None - for k in ("scheduled", "manual"): - child = idx.get((k, child_dirname)) - if child is not None: - break - - if child is None: - continue - - base = child.base - if not isinstance(base, dict): - continue - - base_kind = base.get("kind") - base_dirname = base.get("dirname") - if not isinstance(base_kind, str) or not isinstance(base_dirname, str): - continue - - base_snap = idx.get((base_kind, base_dirname)) - if base_snap is None: - # Base might have been pruned already or never existed; ignore. - continue - - if base_dirname not in keep: - keep.add(base_dirname) - reasons.setdefault(base_dirname, []).append(f"base-of:{child_dirname}") - changed = True - - return keep, reasons - - def run_retention_plan( prefix: Path, host: str, @@ -142,7 +89,7 @@ def run_retention_plan( reasons = dict(plan.reasons) if protect_bases: - keep, reasons = _apply_base_protection(snapshots=snapshots, keep=keep, reasons=reasons) + keep, reasons = apply_base_protection(snapshots=snapshots, keep=keep, reasons=reasons) delete = [s for s in snapshots if s.dirname not in keep] diff --git a/src/pobsync/retention.py b/src/pobsync/retention.py index cd9a22a..6ae748f 100644 --- a/src/pobsync/retention.py +++ b/src/pobsync/retention.py @@ -123,3 +123,50 @@ def build_retention_plan( return RetentionResult(keep=keep, reasons=reasons) + +def apply_base_protection( + *, + snapshots: Iterable[Snapshot], + keep: Set[str], + reasons: Dict[str, List[str]], +) -> Tuple[Set[str], Dict[str, List[str]]]: + """ + If a kept snapshot has a base (kind+dirname), also keep that base snapshot. + + Hardlink snapshots remain readable without this, but keeping bases can make + future base selection and chain inspection easier. + """ + snapshot_list = list(snapshots) + index: Dict[Tuple[str, str], Snapshot] = {(snapshot.kind, snapshot.dirname): snapshot for snapshot in snapshot_list} + + changed = True + while changed: + changed = False + + for child_dirname in list(keep): + child = _find_snapshot_by_dirname(snapshot_list, child_dirname) + if child is None or not isinstance(child.base, dict): + continue + + base_kind = child.base.get("kind") + base_dirname = child.base.get("dirname") + if not isinstance(base_kind, str) or not isinstance(base_dirname, str): + continue + + base_snapshot = index.get((base_kind, base_dirname)) + if base_snapshot is None or base_dirname in keep: + continue + + keep.add(base_dirname) + reasons.setdefault(base_dirname, []).append(f"base-of:{child_dirname}") + changed = True + + return keep, reasons + + +def _find_snapshot_by_dirname(snapshots: Iterable[Snapshot], dirname: str) -> Snapshot | None: + for kind in ("scheduled", "manual"): + for snapshot in snapshots: + if snapshot.kind == kind and snapshot.dirname == dirname: + return snapshot + return None diff --git a/src/pobsync_backend/management/commands/run_pobsync_retention.py b/src/pobsync_backend/management/commands/run_pobsync_retention.py index 620f477..63eca5b 100644 --- a/src/pobsync_backend/management/commands/run_pobsync_retention.py +++ b/src/pobsync_backend/management/commands/run_pobsync_retention.py @@ -7,10 +7,8 @@ from typing import Any from django.conf import settings from django.core.management.base import BaseCommand, CommandError -from pobsync.commands.retention_apply import run_retention_apply -from pobsync.commands.retention_plan import run_retention_plan -from pobsync_backend.config_source import DjangoConfigSource -from pobsync_backend.models import HostConfig +from pobsync.errors import ConfigError +from pobsync_backend.retention import run_sql_retention_apply, run_sql_retention_plan class Command(BaseCommand): @@ -27,29 +25,25 @@ class Command(BaseCommand): def handle(self, *args: Any, **options: Any) -> None: host = options["host"] - if not HostConfig.objects.filter(host=host, enabled=True).exists(): - raise CommandError(f"Missing enabled HostConfig {host!r}") - - config_source = DjangoConfigSource() - if options["apply"]: - if not options["yes"]: - raise CommandError("--yes is required with --apply") - result = run_retention_apply( - prefix=Path(options["prefix"]), - host=host, - kind=options["kind"], - protect_bases=bool(options["protect_bases"]), - yes=True, - max_delete=int(options["max_delete"]), - config_source=config_source, - ) - else: - result = run_retention_plan( - prefix=Path(options["prefix"]), - host=host, - kind=options["kind"], - protect_bases=bool(options["protect_bases"]), - config_source=config_source, - ) + try: + if options["apply"]: + if not options["yes"]: + raise CommandError("--yes is required with --apply") + result = run_sql_retention_apply( + prefix=Path(options["prefix"]), + host=host, + kind=options["kind"], + protect_bases=bool(options["protect_bases"]), + yes=True, + max_delete=int(options["max_delete"]), + ) + else: + result = run_sql_retention_plan( + host=host, + kind=options["kind"], + protect_bases=bool(options["protect_bases"]), + ) + except ConfigError as exc: + raise CommandError(str(exc)) from exc self.stdout.write(json.dumps(result, indent=2, sort_keys=False)) diff --git a/src/pobsync_backend/retention.py b/src/pobsync_backend/retention.py new file mode 100644 index 0000000..5136224 --- /dev/null +++ b/src/pobsync_backend/retention.py @@ -0,0 +1,182 @@ +from __future__ import annotations + +import shutil +from datetime import datetime, timezone +from pathlib import Path +from typing import Any + +from pobsync.errors import ConfigError +from pobsync.lock import acquire_host_lock +from pobsync.paths import PobsyncPaths +from pobsync.retention import Snapshot, apply_base_protection, build_retention_plan +from pobsync.util import sanitize_host + +from .models import HostConfig, SnapshotRecord + + +def run_sql_retention_plan(*, host: str, kind: str, protect_bases: bool) -> dict[str, Any]: + host = sanitize_host(host) + if kind not in {"scheduled", "manual", "all"}: + raise ConfigError("kind must be scheduled, manual, or all") + + host_config = _enabled_host_config(host) + retention = _retention_for_host(host_config) + snapshots = _snapshots_for_retention(host_config=host_config, kind=kind) + + plan = build_retention_plan( + snapshots=snapshots, + retention=retention, + now=datetime.now(timezone.utc), + ) + + keep = set(plan.keep) + reasons = dict(plan.reasons) + if protect_bases: + keep, reasons = apply_base_protection(snapshots=snapshots, keep=keep, reasons=reasons) + + delete = [snapshot for snapshot in snapshots if snapshot.dirname not in keep] + + return { + "ok": True, + "host": host, + "kind": kind, + "protect_bases": bool(protect_bases), + "retention": retention, + "source": "sql", + "keep": sorted(keep), + "delete": [_snapshot_to_delete_item(snapshot) for snapshot in delete], + "reasons": reasons, + } + + +def run_sql_retention_apply( + *, + prefix: Path, + host: str, + kind: str, + protect_bases: bool, + yes: bool, + max_delete: int, + acquire_lock: bool = True, +) -> dict[str, Any]: + host = sanitize_host(host) + if not yes: + raise ConfigError("Refusing to delete snapshots without --yes") + if max_delete < 0: + raise ConfigError("--max-delete must be >= 0") + + paths = PobsyncPaths(home=prefix) + + def _do_apply() -> dict[str, Any]: + plan = run_sql_retention_plan(host=host, kind=kind, protect_bases=bool(protect_bases)) + delete_list = plan.get("delete") or [] + if not isinstance(delete_list, list): + raise ConfigError("Invalid retention plan output: delete is not a list") + if max_delete == 0 and len(delete_list) > 0: + raise ConfigError("Deletion blocked by --max-delete=0") + if len(delete_list) > max_delete: + raise ConfigError(f"Refusing to delete {len(delete_list)} snapshots (exceeds --max-delete={max_delete})") + + actions: list[str] = [] + deleted: list[dict[str, Any]] = [] + + for item in delete_list: + dirname = item.get("dirname") if isinstance(item, dict) else None + snap_kind = item.get("kind") if isinstance(item, dict) else None + snap_path = item.get("path") if isinstance(item, dict) else None + if not isinstance(dirname, str) or not isinstance(snap_kind, str) or not isinstance(snap_path, str): + continue + if snap_kind not in {"scheduled", "manual"}: + raise ConfigError(f"Refusing to delete unsupported snapshot kind: {snap_kind!r}") + + path = Path(snap_path) + if not path.exists(): + actions.append(f"skip missing {snap_kind}/{dirname}") + continue + if not path.is_dir(): + raise ConfigError(f"Refusing to delete non-directory path: {snap_path}") + + shutil.rmtree(path) + SnapshotRecord.objects.filter(host__host=host, kind=snap_kind, dirname=dirname).delete() + actions.append(f"deleted {snap_kind} {dirname}") + deleted.append({"dirname": dirname, "kind": snap_kind, "path": snap_path}) + + return { + "ok": True, + "host": host, + "kind": kind, + "protect_bases": bool(protect_bases), + "max_delete": max_delete, + "source": "sql", + "deleted": deleted, + "actions": actions, + } + + if acquire_lock: + with acquire_host_lock(paths.locks_dir, host, command="retention-apply"): + return _do_apply() + return _do_apply() + + +def _enabled_host_config(host: str) -> HostConfig: + try: + return HostConfig.objects.get(host=host, enabled=True) + except HostConfig.DoesNotExist as exc: + raise ConfigError(f"Missing enabled HostConfig {host!r}") from exc + + +def _retention_for_host(host_config: HostConfig) -> dict[str, int]: + return { + "daily": host_config.retention_daily, + "weekly": host_config.retention_weekly, + "monthly": host_config.retention_monthly, + "yearly": host_config.retention_yearly, + } + + +def _snapshots_for_retention(*, host_config: HostConfig, kind: str) -> list[Snapshot]: + kinds = ["scheduled", "manual"] if kind == "all" else [kind] + records = ( + SnapshotRecord.objects.filter(host=host_config, kind__in=kinds) + .exclude(kind=SnapshotRecord.Kind.INCOMPLETE) + .select_related("base") + .order_by("-started_at", "dirname") + ) + return [_snapshot_from_record(record) for record in records] + + +def _snapshot_from_record(record: SnapshotRecord) -> Snapshot: + return Snapshot( + kind=record.kind, + dirname=record.dirname, + path=record.path, + dt=record.started_at or datetime.fromtimestamp(0, tz=timezone.utc), + status=record.status or None, + base=_base_meta_from_record(record), + ) + + +def _base_meta_from_record(record: SnapshotRecord) -> dict[str, str] | None: + if record.base is not None: + return { + "kind": record.base.kind, + "dirname": record.base.dirname, + "path": record.base.path, + } + if record.base_kind and record.base_dirname: + return { + "kind": record.base_kind, + "dirname": record.base_dirname, + "path": record.base_path, + } + return None + + +def _snapshot_to_delete_item(snapshot: Snapshot) -> dict[str, Any]: + return { + "dirname": snapshot.dirname, + "kind": snapshot.kind, + "path": snapshot.path, + "dt": snapshot.dt.isoformat(), + "status": snapshot.status, + } diff --git a/src/pobsync_backend/tests/test_sql_retention.py b/src/pobsync_backend/tests/test_sql_retention.py new file mode 100644 index 0000000..846c38d --- /dev/null +++ b/src/pobsync_backend/tests/test_sql_retention.py @@ -0,0 +1,149 @@ +from __future__ import annotations + +import json +from datetime import datetime, timezone +from io import StringIO +from pathlib import Path +from tempfile import TemporaryDirectory + +from django.core.management import call_command +from django.test import TestCase + +from pobsync.errors import ConfigError +from pobsync_backend.models import HostConfig, SnapshotRecord +from pobsync_backend.retention import run_sql_retention_apply, run_sql_retention_plan + + +class SqlRetentionTests(TestCase): + def test_plan_uses_snapshot_records(self) -> None: + host = HostConfig.objects.create( + host="web-01", + address="web-01.example.test", + retention_daily=0, + retention_weekly=0, + retention_monthly=0, + retention_yearly=0, + ) + old = self._snapshot(host, "20260518-021500Z__OLD") + new = self._snapshot(host, "20260519-021500Z__NEW") + + plan = run_sql_retention_plan(host=host.host, kind="scheduled", protect_bases=False) + + self.assertEqual(plan["source"], "sql") + self.assertEqual(plan["keep"], [new.dirname]) + self.assertEqual([item["dirname"] for item in plan["delete"]], [old.dirname]) + + def test_plan_can_protect_base_snapshot_from_sql_relation(self) -> None: + host = HostConfig.objects.create( + host="web-01", + address="web-01.example.test", + retention_daily=0, + retention_weekly=0, + retention_monthly=0, + retention_yearly=0, + ) + base = self._snapshot(host, "20260518-021500Z__BASE") + child = self._snapshot(host, "20260519-021500Z__CHILD", base=base) + + plan = run_sql_retention_plan(host=host.host, kind="scheduled", protect_bases=True) + + self.assertEqual(plan["keep"], [base.dirname, child.dirname]) + self.assertEqual(plan["delete"], []) + self.assertEqual(plan["reasons"][base.dirname], [f"base-of:{child.dirname}"]) + + def test_apply_deletes_snapshot_directory_and_record(self) -> None: + with TemporaryDirectory() as tmp: + prefix = Path(tmp) / "home" + host = HostConfig.objects.create( + host="web-01", + address="web-01.example.test", + retention_daily=0, + retention_weekly=0, + retention_monthly=0, + retention_yearly=0, + ) + old_dir = Path(tmp) / "backups" / host.host / "scheduled" / "20260518-021500Z__OLD" + new_dir = Path(tmp) / "backups" / host.host / "scheduled" / "20260519-021500Z__NEW" + old_dir.mkdir(parents=True) + new_dir.mkdir(parents=True) + old = self._snapshot(host, old_dir.name, path=str(old_dir)) + new = self._snapshot(host, new_dir.name, path=str(new_dir)) + + result = run_sql_retention_apply( + prefix=prefix, + host=host.host, + kind="scheduled", + protect_bases=False, + yes=True, + max_delete=1, + acquire_lock=False, + ) + + self.assertFalse(old_dir.exists()) + self.assertTrue(new_dir.exists()) + self.assertTrue(SnapshotRecord.objects.filter(pk=new.pk).exists()) + self.assertFalse(SnapshotRecord.objects.filter(pk=old.pk).exists()) + self.assertEqual(result["deleted"], [{"dirname": old.dirname, "kind": "scheduled", "path": str(old_dir)}]) + + def test_apply_respects_max_delete(self) -> None: + host = HostConfig.objects.create( + host="web-01", + address="web-01.example.test", + retention_daily=0, + retention_weekly=0, + retention_monthly=0, + retention_yearly=0, + ) + self._snapshot(host, "20260517-021500Z__OLDER") + self._snapshot(host, "20260518-021500Z__OLD") + self._snapshot(host, "20260519-021500Z__NEW") + + with self.assertRaisesRegex(ConfigError, "exceeds --max-delete=1"): + run_sql_retention_apply( + prefix=Path("/tmp/pobsync-test"), + host=host.host, + kind="scheduled", + protect_bases=False, + yes=True, + max_delete=1, + acquire_lock=False, + ) + + def test_management_command_plans_from_sql(self) -> None: + host = HostConfig.objects.create( + host="web-01", + address="web-01.example.test", + retention_daily=0, + retention_weekly=0, + retention_monthly=0, + retention_yearly=0, + ) + old = self._snapshot(host, "20260518-021500Z__OLD") + new = self._snapshot(host, "20260519-021500Z__NEW") + stdout = StringIO() + + call_command("run_pobsync_retention", host.host, stdout=stdout) + + result = json.loads(stdout.getvalue()) + self.assertEqual(result["source"], "sql") + self.assertEqual(result["keep"], [new.dirname]) + self.assertEqual([item["dirname"] for item in result["delete"]], [old.dirname]) + + def _snapshot( + self, + host: HostConfig, + dirname: str, + *, + path: str | None = None, + base: SnapshotRecord | None = None, + ) -> SnapshotRecord: + started_at = datetime.strptime(dirname.split("__", 1)[0], "%Y%m%d-%H%M%SZ").replace(tzinfo=timezone.utc) + return SnapshotRecord.objects.create( + host=host, + kind="scheduled", + dirname=dirname, + path=path or f"/backups/{host.host}/scheduled/{dirname}", + base=base, + status="success", + started_at=started_at, + )