Plan Django retention from snapshot records

This commit is contained in:
2026-05-19 11:24:48 +02:00
parent 659377d894
commit 254f915051
6 changed files with 405 additions and 84 deletions

View File

@@ -140,6 +140,7 @@ SQLite remains the default because it is enough for a single backup server and k
The public command surface is Django-first. The old YAML/cron CLI has been retired from the `pobsync` entrypoint. The public command surface is Django-first. The old YAML/cron CLI has been retired from the `pobsync` entrypoint.
Discovered snapshots are stored in `SnapshotRecord`, including the base snapshot metadata and a nullable SQL link to the Discovered snapshots are stored in `SnapshotRecord`, including the base snapshot metadata and a nullable SQL link to the
base record when it is known. base record when it is known.
The Django retention command plans from `SnapshotRecord` instead of rediscovering snapshots from the filesystem.
The remaining internal engine code still contains reusable backup primitives: The remaining internal engine code still contains reusable backup primitives:
@@ -151,6 +152,7 @@ The remaining internal engine code still contains reusable backup primitives:
Next refactor targets: Next refactor targets:
- Surface `SnapshotRecord` data through API/admin views instead of filesystem inspection. - Surface `SnapshotRecord` data through API/admin views instead of filesystem inspection.
- Move post-backup pruning onto the SQL retention service.
- Move more snapshot lifecycle details into typed domain objects. - Move more snapshot lifecycle details into typed domain objects.
- Replace remaining dictionary-shaped config at engine boundaries. - Replace remaining dictionary-shaped config at engine boundaries.
- Remove legacy YAML import/export once production migration no longer needs it. - Remove legacy YAML import/export once production migration no longer needs it.

View File

@@ -2,12 +2,12 @@ from __future__ import annotations
from datetime import datetime, timezone from datetime import datetime, timezone
from pathlib import Path from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple from typing import Any, List
from ..config.source import ConfigSource, FileConfigSource from ..config.source import ConfigSource, FileConfigSource
from ..errors import ConfigError from ..errors import ConfigError
from ..paths import PobsyncPaths from ..paths import PobsyncPaths
from ..retention import Snapshot, build_retention_plan from ..retention import Snapshot, apply_base_protection, build_retention_plan
from ..snapshot_meta import iter_snapshot_dirs, read_snapshot_meta, resolve_host_root from ..snapshot_meta import iter_snapshot_dirs, read_snapshot_meta, resolve_host_root
from ..util import sanitize_host from ..util import sanitize_host
@@ -28,59 +28,6 @@ def _parse_snapshot_dt(dirname: str, meta: dict) -> datetime:
return datetime.fromtimestamp(0, tz=timezone.utc) return datetime.fromtimestamp(0, tz=timezone.utc)
def _apply_base_protection(
snapshots: List[Snapshot],
keep: set[str],
reasons: Dict[str, List[str]],
) -> Tuple[set[str], Dict[str, List[str]]]:
"""
Optional policy: if a kept snapshot has a base (kind+dirname), also keep that base snapshot.
This is NOT required for hardlink snapshots to remain readable, but can be useful
for performance (better base selection) or "chain" readability.
Adds reason: "base-of:<child_dirname>"
"""
# Index snapshots by (kind, dirname)
idx: Dict[Tuple[str, str], Snapshot] = {(s.kind, s.dirname): s for s in snapshots}
changed = True
while changed:
changed = False
# Iterate over a stable list of current keep items
for child_dirname in list(keep):
# Find the child snapshot (may exist in multiple kinds; check both)
child: Optional[Snapshot] = None
for k in ("scheduled", "manual"):
child = idx.get((k, child_dirname))
if child is not None:
break
if child is None:
continue
base = child.base
if not isinstance(base, dict):
continue
base_kind = base.get("kind")
base_dirname = base.get("dirname")
if not isinstance(base_kind, str) or not isinstance(base_dirname, str):
continue
base_snap = idx.get((base_kind, base_dirname))
if base_snap is None:
# Base might have been pruned already or never existed; ignore.
continue
if base_dirname not in keep:
keep.add(base_dirname)
reasons.setdefault(base_dirname, []).append(f"base-of:{child_dirname}")
changed = True
return keep, reasons
def run_retention_plan( def run_retention_plan(
prefix: Path, prefix: Path,
host: str, host: str,
@@ -142,7 +89,7 @@ def run_retention_plan(
reasons = dict(plan.reasons) reasons = dict(plan.reasons)
if protect_bases: if protect_bases:
keep, reasons = _apply_base_protection(snapshots=snapshots, keep=keep, reasons=reasons) keep, reasons = apply_base_protection(snapshots=snapshots, keep=keep, reasons=reasons)
delete = [s for s in snapshots if s.dirname not in keep] delete = [s for s in snapshots if s.dirname not in keep]

View File

@@ -123,3 +123,50 @@ def build_retention_plan(
return RetentionResult(keep=keep, reasons=reasons) return RetentionResult(keep=keep, reasons=reasons)
def apply_base_protection(
*,
snapshots: Iterable[Snapshot],
keep: Set[str],
reasons: Dict[str, List[str]],
) -> Tuple[Set[str], Dict[str, List[str]]]:
"""
If a kept snapshot has a base (kind+dirname), also keep that base snapshot.
Hardlink snapshots remain readable without this, but keeping bases can make
future base selection and chain inspection easier.
"""
snapshot_list = list(snapshots)
index: Dict[Tuple[str, str], Snapshot] = {(snapshot.kind, snapshot.dirname): snapshot for snapshot in snapshot_list}
changed = True
while changed:
changed = False
for child_dirname in list(keep):
child = _find_snapshot_by_dirname(snapshot_list, child_dirname)
if child is None or not isinstance(child.base, dict):
continue
base_kind = child.base.get("kind")
base_dirname = child.base.get("dirname")
if not isinstance(base_kind, str) or not isinstance(base_dirname, str):
continue
base_snapshot = index.get((base_kind, base_dirname))
if base_snapshot is None or base_dirname in keep:
continue
keep.add(base_dirname)
reasons.setdefault(base_dirname, []).append(f"base-of:{child_dirname}")
changed = True
return keep, reasons
def _find_snapshot_by_dirname(snapshots: Iterable[Snapshot], dirname: str) -> Snapshot | None:
for kind in ("scheduled", "manual"):
for snapshot in snapshots:
if snapshot.kind == kind and snapshot.dirname == dirname:
return snapshot
return None

View File

@@ -7,10 +7,8 @@ from typing import Any
from django.conf import settings from django.conf import settings
from django.core.management.base import BaseCommand, CommandError from django.core.management.base import BaseCommand, CommandError
from pobsync.commands.retention_apply import run_retention_apply from pobsync.errors import ConfigError
from pobsync.commands.retention_plan import run_retention_plan from pobsync_backend.retention import run_sql_retention_apply, run_sql_retention_plan
from pobsync_backend.config_source import DjangoConfigSource
from pobsync_backend.models import HostConfig
class Command(BaseCommand): class Command(BaseCommand):
@@ -27,29 +25,25 @@ class Command(BaseCommand):
def handle(self, *args: Any, **options: Any) -> None: def handle(self, *args: Any, **options: Any) -> None:
host = options["host"] host = options["host"]
if not HostConfig.objects.filter(host=host, enabled=True).exists(): try:
raise CommandError(f"Missing enabled HostConfig {host!r}") if options["apply"]:
if not options["yes"]:
config_source = DjangoConfigSource() raise CommandError("--yes is required with --apply")
if options["apply"]: result = run_sql_retention_apply(
if not options["yes"]: prefix=Path(options["prefix"]),
raise CommandError("--yes is required with --apply") host=host,
result = run_retention_apply( kind=options["kind"],
prefix=Path(options["prefix"]), protect_bases=bool(options["protect_bases"]),
host=host, yes=True,
kind=options["kind"], max_delete=int(options["max_delete"]),
protect_bases=bool(options["protect_bases"]), )
yes=True, else:
max_delete=int(options["max_delete"]), result = run_sql_retention_plan(
config_source=config_source, host=host,
) kind=options["kind"],
else: protect_bases=bool(options["protect_bases"]),
result = run_retention_plan( )
prefix=Path(options["prefix"]), except ConfigError as exc:
host=host, raise CommandError(str(exc)) from exc
kind=options["kind"],
protect_bases=bool(options["protect_bases"]),
config_source=config_source,
)
self.stdout.write(json.dumps(result, indent=2, sort_keys=False)) self.stdout.write(json.dumps(result, indent=2, sort_keys=False))

View File

@@ -0,0 +1,182 @@
from __future__ import annotations
import shutil
from datetime import datetime, timezone
from pathlib import Path
from typing import Any
from pobsync.errors import ConfigError
from pobsync.lock import acquire_host_lock
from pobsync.paths import PobsyncPaths
from pobsync.retention import Snapshot, apply_base_protection, build_retention_plan
from pobsync.util import sanitize_host
from .models import HostConfig, SnapshotRecord
def run_sql_retention_plan(*, host: str, kind: str, protect_bases: bool) -> dict[str, Any]:
host = sanitize_host(host)
if kind not in {"scheduled", "manual", "all"}:
raise ConfigError("kind must be scheduled, manual, or all")
host_config = _enabled_host_config(host)
retention = _retention_for_host(host_config)
snapshots = _snapshots_for_retention(host_config=host_config, kind=kind)
plan = build_retention_plan(
snapshots=snapshots,
retention=retention,
now=datetime.now(timezone.utc),
)
keep = set(plan.keep)
reasons = dict(plan.reasons)
if protect_bases:
keep, reasons = apply_base_protection(snapshots=snapshots, keep=keep, reasons=reasons)
delete = [snapshot for snapshot in snapshots if snapshot.dirname not in keep]
return {
"ok": True,
"host": host,
"kind": kind,
"protect_bases": bool(protect_bases),
"retention": retention,
"source": "sql",
"keep": sorted(keep),
"delete": [_snapshot_to_delete_item(snapshot) for snapshot in delete],
"reasons": reasons,
}
def run_sql_retention_apply(
*,
prefix: Path,
host: str,
kind: str,
protect_bases: bool,
yes: bool,
max_delete: int,
acquire_lock: bool = True,
) -> dict[str, Any]:
host = sanitize_host(host)
if not yes:
raise ConfigError("Refusing to delete snapshots without --yes")
if max_delete < 0:
raise ConfigError("--max-delete must be >= 0")
paths = PobsyncPaths(home=prefix)
def _do_apply() -> dict[str, Any]:
plan = run_sql_retention_plan(host=host, kind=kind, protect_bases=bool(protect_bases))
delete_list = plan.get("delete") or []
if not isinstance(delete_list, list):
raise ConfigError("Invalid retention plan output: delete is not a list")
if max_delete == 0 and len(delete_list) > 0:
raise ConfigError("Deletion blocked by --max-delete=0")
if len(delete_list) > max_delete:
raise ConfigError(f"Refusing to delete {len(delete_list)} snapshots (exceeds --max-delete={max_delete})")
actions: list[str] = []
deleted: list[dict[str, Any]] = []
for item in delete_list:
dirname = item.get("dirname") if isinstance(item, dict) else None
snap_kind = item.get("kind") if isinstance(item, dict) else None
snap_path = item.get("path") if isinstance(item, dict) else None
if not isinstance(dirname, str) or not isinstance(snap_kind, str) or not isinstance(snap_path, str):
continue
if snap_kind not in {"scheduled", "manual"}:
raise ConfigError(f"Refusing to delete unsupported snapshot kind: {snap_kind!r}")
path = Path(snap_path)
if not path.exists():
actions.append(f"skip missing {snap_kind}/{dirname}")
continue
if not path.is_dir():
raise ConfigError(f"Refusing to delete non-directory path: {snap_path}")
shutil.rmtree(path)
SnapshotRecord.objects.filter(host__host=host, kind=snap_kind, dirname=dirname).delete()
actions.append(f"deleted {snap_kind} {dirname}")
deleted.append({"dirname": dirname, "kind": snap_kind, "path": snap_path})
return {
"ok": True,
"host": host,
"kind": kind,
"protect_bases": bool(protect_bases),
"max_delete": max_delete,
"source": "sql",
"deleted": deleted,
"actions": actions,
}
if acquire_lock:
with acquire_host_lock(paths.locks_dir, host, command="retention-apply"):
return _do_apply()
return _do_apply()
def _enabled_host_config(host: str) -> HostConfig:
try:
return HostConfig.objects.get(host=host, enabled=True)
except HostConfig.DoesNotExist as exc:
raise ConfigError(f"Missing enabled HostConfig {host!r}") from exc
def _retention_for_host(host_config: HostConfig) -> dict[str, int]:
return {
"daily": host_config.retention_daily,
"weekly": host_config.retention_weekly,
"monthly": host_config.retention_monthly,
"yearly": host_config.retention_yearly,
}
def _snapshots_for_retention(*, host_config: HostConfig, kind: str) -> list[Snapshot]:
kinds = ["scheduled", "manual"] if kind == "all" else [kind]
records = (
SnapshotRecord.objects.filter(host=host_config, kind__in=kinds)
.exclude(kind=SnapshotRecord.Kind.INCOMPLETE)
.select_related("base")
.order_by("-started_at", "dirname")
)
return [_snapshot_from_record(record) for record in records]
def _snapshot_from_record(record: SnapshotRecord) -> Snapshot:
return Snapshot(
kind=record.kind,
dirname=record.dirname,
path=record.path,
dt=record.started_at or datetime.fromtimestamp(0, tz=timezone.utc),
status=record.status or None,
base=_base_meta_from_record(record),
)
def _base_meta_from_record(record: SnapshotRecord) -> dict[str, str] | None:
if record.base is not None:
return {
"kind": record.base.kind,
"dirname": record.base.dirname,
"path": record.base.path,
}
if record.base_kind and record.base_dirname:
return {
"kind": record.base_kind,
"dirname": record.base_dirname,
"path": record.base_path,
}
return None
def _snapshot_to_delete_item(snapshot: Snapshot) -> dict[str, Any]:
return {
"dirname": snapshot.dirname,
"kind": snapshot.kind,
"path": snapshot.path,
"dt": snapshot.dt.isoformat(),
"status": snapshot.status,
}

View File

@@ -0,0 +1,149 @@
from __future__ import annotations
import json
from datetime import datetime, timezone
from io import StringIO
from pathlib import Path
from tempfile import TemporaryDirectory
from django.core.management import call_command
from django.test import TestCase
from pobsync.errors import ConfigError
from pobsync_backend.models import HostConfig, SnapshotRecord
from pobsync_backend.retention import run_sql_retention_apply, run_sql_retention_plan
class SqlRetentionTests(TestCase):
def test_plan_uses_snapshot_records(self) -> None:
host = HostConfig.objects.create(
host="web-01",
address="web-01.example.test",
retention_daily=0,
retention_weekly=0,
retention_monthly=0,
retention_yearly=0,
)
old = self._snapshot(host, "20260518-021500Z__OLD")
new = self._snapshot(host, "20260519-021500Z__NEW")
plan = run_sql_retention_plan(host=host.host, kind="scheduled", protect_bases=False)
self.assertEqual(plan["source"], "sql")
self.assertEqual(plan["keep"], [new.dirname])
self.assertEqual([item["dirname"] for item in plan["delete"]], [old.dirname])
def test_plan_can_protect_base_snapshot_from_sql_relation(self) -> None:
host = HostConfig.objects.create(
host="web-01",
address="web-01.example.test",
retention_daily=0,
retention_weekly=0,
retention_monthly=0,
retention_yearly=0,
)
base = self._snapshot(host, "20260518-021500Z__BASE")
child = self._snapshot(host, "20260519-021500Z__CHILD", base=base)
plan = run_sql_retention_plan(host=host.host, kind="scheduled", protect_bases=True)
self.assertEqual(plan["keep"], [base.dirname, child.dirname])
self.assertEqual(plan["delete"], [])
self.assertEqual(plan["reasons"][base.dirname], [f"base-of:{child.dirname}"])
def test_apply_deletes_snapshot_directory_and_record(self) -> None:
with TemporaryDirectory() as tmp:
prefix = Path(tmp) / "home"
host = HostConfig.objects.create(
host="web-01",
address="web-01.example.test",
retention_daily=0,
retention_weekly=0,
retention_monthly=0,
retention_yearly=0,
)
old_dir = Path(tmp) / "backups" / host.host / "scheduled" / "20260518-021500Z__OLD"
new_dir = Path(tmp) / "backups" / host.host / "scheduled" / "20260519-021500Z__NEW"
old_dir.mkdir(parents=True)
new_dir.mkdir(parents=True)
old = self._snapshot(host, old_dir.name, path=str(old_dir))
new = self._snapshot(host, new_dir.name, path=str(new_dir))
result = run_sql_retention_apply(
prefix=prefix,
host=host.host,
kind="scheduled",
protect_bases=False,
yes=True,
max_delete=1,
acquire_lock=False,
)
self.assertFalse(old_dir.exists())
self.assertTrue(new_dir.exists())
self.assertTrue(SnapshotRecord.objects.filter(pk=new.pk).exists())
self.assertFalse(SnapshotRecord.objects.filter(pk=old.pk).exists())
self.assertEqual(result["deleted"], [{"dirname": old.dirname, "kind": "scheduled", "path": str(old_dir)}])
def test_apply_respects_max_delete(self) -> None:
host = HostConfig.objects.create(
host="web-01",
address="web-01.example.test",
retention_daily=0,
retention_weekly=0,
retention_monthly=0,
retention_yearly=0,
)
self._snapshot(host, "20260517-021500Z__OLDER")
self._snapshot(host, "20260518-021500Z__OLD")
self._snapshot(host, "20260519-021500Z__NEW")
with self.assertRaisesRegex(ConfigError, "exceeds --max-delete=1"):
run_sql_retention_apply(
prefix=Path("/tmp/pobsync-test"),
host=host.host,
kind="scheduled",
protect_bases=False,
yes=True,
max_delete=1,
acquire_lock=False,
)
def test_management_command_plans_from_sql(self) -> None:
host = HostConfig.objects.create(
host="web-01",
address="web-01.example.test",
retention_daily=0,
retention_weekly=0,
retention_monthly=0,
retention_yearly=0,
)
old = self._snapshot(host, "20260518-021500Z__OLD")
new = self._snapshot(host, "20260519-021500Z__NEW")
stdout = StringIO()
call_command("run_pobsync_retention", host.host, stdout=stdout)
result = json.loads(stdout.getvalue())
self.assertEqual(result["source"], "sql")
self.assertEqual(result["keep"], [new.dirname])
self.assertEqual([item["dirname"] for item in result["delete"]], [old.dirname])
def _snapshot(
self,
host: HostConfig,
dirname: str,
*,
path: str | None = None,
base: SnapshotRecord | None = None,
) -> SnapshotRecord:
started_at = datetime.strptime(dirname.split("__", 1)[0], "%Y%m%d-%H%M%SZ").replace(tzinfo=timezone.utc)
return SnapshotRecord.objects.create(
host=host,
kind="scheduled",
dirname=dirname,
path=path or f"/backups/{host.host}/scheduled/{dirname}",
base=base,
status="success",
started_at=started_at,
)