feat: discover snapshots into Django records

Add a Django-native snapshot discovery service and management command
that scans backup directories, reads snapshot metadata, and idempotently
upserts SnapshotRecord rows. Expose it through the pobsync command
wrapper, update admin/docs, and cover discovery behavior with tests.
This commit is contained in:
2026-05-19 05:18:01 +02:00
parent e564262c72
commit 336fb1a5be
7 changed files with 214 additions and 2 deletions

View File

@@ -74,6 +74,12 @@ pobsync retention <host>
pobsync retention <host> --apply --yes --max-delete 10 pobsync retention <host> --apply --yes --max-delete 10
``` ```
Discover snapshots already present on disk:
```
pobsync discover-snapshots --host <host>
```
The `pobsync` executable is a thin wrapper around Django management commands. Direct Django access is also available: The `pobsync` executable is a thin wrapper around Django management commands. Direct Django access is also available:
``` ```
@@ -143,6 +149,7 @@ The remaining internal engine code still contains reusable backup primitives:
Next refactor targets: Next refactor targets:
- Record discovered snapshots into `SnapshotRecord`. - Record discovered snapshots into `SnapshotRecord`.
- Surface `SnapshotRecord` data through API/admin views instead of filesystem inspection.
- Move more snapshot lifecycle details into typed domain objects. - Move more snapshot lifecycle details into typed domain objects.
- Replace remaining dictionary-shaped config at engine boundaries. - Replace remaining dictionary-shaped config at engine boundaries.
- Remove legacy YAML import/export once production migration no longer needs it. - Remove legacy YAML import/export once production migration no longer needs it.

View File

@@ -13,6 +13,7 @@ COMMAND_ALIASES = {
"schedule": "configure_pobsync_schedule", "schedule": "configure_pobsync_schedule",
"backup": "run_pobsync_backup", "backup": "run_pobsync_backup",
"retention": "run_pobsync_retention", "retention": "run_pobsync_retention",
"discover-snapshots": "discover_pobsync_snapshots",
"scheduler": "run_pobsync_scheduler", "scheduler": "run_pobsync_scheduler",
} }

View File

@@ -57,9 +57,10 @@ class BackupRunAdmin(admin.ModelAdmin):
@admin.register(SnapshotRecord) @admin.register(SnapshotRecord)
class SnapshotRecordAdmin(admin.ModelAdmin): class SnapshotRecordAdmin(admin.ModelAdmin):
list_display = ("host", "kind", "dirname", "status", "started_at") list_display = ("host", "kind", "dirname", "status", "started_at", "discovered_at")
list_filter = ("kind", "status", "started_at") list_filter = ("kind", "status", "started_at", "discovered_at")
search_fields = ("host__host", "dirname", "path") search_fields = ("host__host", "dirname", "path")
readonly_fields = ("discovered_at",)
@admin.register(ScheduleConfig) @admin.register(ScheduleConfig)

View File

@@ -0,0 +1,39 @@
from __future__ import annotations
from typing import Any
from django.core.management.base import BaseCommand, CommandError
from pobsync.snapshot_meta import normalize_kind
from pobsync_backend.models import GlobalConfig, HostConfig
from pobsync_backend.snapshot_discovery import discover_snapshots
class Command(BaseCommand):
help = "Discover snapshot metadata on disk and upsert SnapshotRecord rows."
def add_arguments(self, parser) -> None:
parser.add_argument("--host", default=None)
parser.add_argument("--kind", default="all", help="scheduled|manual|incomplete|all")
def handle(self, *args: Any, **options: Any) -> None:
try:
global_config = GlobalConfig.objects.get(name="default")
except GlobalConfig.DoesNotExist as exc:
raise CommandError("Missing GlobalConfig 'default'") from exc
host = None
if options["host"]:
try:
host = HostConfig.objects.get(host=options["host"], enabled=True)
except HostConfig.DoesNotExist as exc:
raise CommandError(f"Missing enabled HostConfig {options['host']!r}") from exc
kind = normalize_kind(options["kind"])
kinds = ["scheduled", "manual", "incomplete"] if kind == "all" else [kind]
result = discover_snapshots(host=host, global_config=global_config, kinds=kinds)
self.stdout.write(
self.style.SUCCESS(
f"Scanned {result['scanned']} snapshot(s), created {result['created']}, updated {result['updated']}."
)
)

View File

@@ -0,0 +1,85 @@
from __future__ import annotations
from datetime import datetime, timezone
from pathlib import Path
from typing import Any
from pobsync.snapshot_meta import iter_snapshot_dirs, read_snapshot_meta, resolve_host_root
from .models import GlobalConfig, HostConfig, SnapshotRecord
def parse_snapshot_datetime(dirname: str, meta: dict[str, Any], key: str) -> datetime | None:
value = meta.get(key)
if isinstance(value, str):
parsed = _parse_iso_z(value)
if parsed is not None:
return parsed
if key == "started_at":
try:
prefix = dirname.split("__", 1)[0]
return datetime.strptime(prefix, "%Y%m%d-%H%M%SZ").replace(tzinfo=timezone.utc)
except ValueError:
return None
return None
def discover_snapshots(
*,
host: HostConfig | None = None,
global_config: GlobalConfig | None = None,
kinds: list[str] | None = None,
) -> dict[str, Any]:
global_config = global_config or GlobalConfig.objects.get(name="default")
host_qs = HostConfig.objects.filter(enabled=True).order_by("host")
if host is not None:
host_qs = host_qs.filter(pk=host.pk)
kinds = kinds or ["scheduled", "manual", "incomplete"]
scanned = 0
created = 0
updated = 0
for host_config in host_qs:
host_root = resolve_host_root(global_config.backup_root, host_config.host)
for kind in kinds:
for snapshot_dir in iter_snapshot_dirs(host_root, kind):
meta = read_snapshot_meta(snapshot_dir)
defaults = {
"path": str(snapshot_dir),
"status": str(meta.get("status") or ""),
"started_at": parse_snapshot_datetime(snapshot_dir.name, meta, "started_at"),
"ended_at": parse_snapshot_datetime(snapshot_dir.name, meta, "ended_at"),
"metadata": meta,
}
_record, was_created = SnapshotRecord.objects.update_or_create(
host=host_config,
kind=kind,
dirname=snapshot_dir.name,
defaults=defaults,
)
scanned += 1
if was_created:
created += 1
else:
updated += 1
return {
"ok": True,
"scanned": scanned,
"created": created,
"updated": updated,
}
def _parse_iso_z(value: str) -> datetime | None:
try:
if value.endswith("Z"):
return datetime.fromisoformat(value.removesuffix("Z") + "+00:00")
parsed = datetime.fromisoformat(value)
if parsed.tzinfo is None:
return parsed.replace(tzinfo=timezone.utc)
return parsed
except ValueError:
return None

View File

@@ -39,3 +39,10 @@ class ConsoleEntrypointTests(SimpleTestCase):
execute.assert_called_once_with( execute.assert_called_once_with(
["pobsync", "configure_pobsync_schedule", "web-01", "--cron", "15 2 * * *"] ["pobsync", "configure_pobsync_schedule", "web-01", "--cron", "15 2 * * *"]
) )
def test_maps_discover_snapshots_alias_to_django_command(self) -> None:
with patch("pobsync.cli.execute_from_command_line") as execute:
exit_code = main(["discover-snapshots", "--host", "web-01"])
self.assertEqual(exit_code, 0)
execute.assert_called_once_with(["pobsync", "discover_pobsync_snapshots", "--host", "web-01"])

View File

@@ -0,0 +1,72 @@
from __future__ import annotations
from datetime import datetime, timezone
from io import StringIO
from pathlib import Path
from tempfile import TemporaryDirectory
from django.core.management import call_command
from django.test import TestCase
from pobsync.util import write_yaml_atomic
from pobsync_backend.models import GlobalConfig, HostConfig, SnapshotRecord
from pobsync_backend.snapshot_discovery import discover_snapshots, parse_snapshot_datetime
class SnapshotDiscoveryTests(TestCase):
def test_parse_snapshot_datetime_prefers_metadata(self) -> None:
parsed = parse_snapshot_datetime(
"20260519-021500Z__ABCDEFGH",
{"started_at": "2026-05-20T03:16:00Z"},
"started_at",
)
self.assertEqual(parsed, datetime(2026, 5, 20, 3, 16, tzinfo=timezone.utc))
def test_parse_snapshot_datetime_falls_back_to_dirname(self) -> None:
parsed = parse_snapshot_datetime("20260519-021500Z__ABCDEFGH", {}, "started_at")
self.assertEqual(parsed, datetime(2026, 5, 19, 2, 15, tzinfo=timezone.utc))
def test_discovery_upserts_snapshot_records_idempotently(self) -> None:
with TemporaryDirectory() as tmp:
backup_root = Path(tmp) / "backups"
GlobalConfig.objects.create(name="default", backup_root=str(backup_root))
host = HostConfig.objects.create(host="web-01", address="web-01.example.test")
snapshot_dir = backup_root / host.host / "scheduled" / "20260519-021500Z__ABCDEFGH"
meta_dir = snapshot_dir / "meta"
meta_dir.mkdir(parents=True)
write_yaml_atomic(
meta_dir / "meta.yaml",
{
"status": "success",
"started_at": "2026-05-19T02:15:00Z",
"ended_at": "2026-05-19T02:16:00Z",
},
)
first = discover_snapshots(host=host)
second = discover_snapshots(host=host)
self.assertEqual(first["created"], 1)
self.assertEqual(first["updated"], 0)
self.assertEqual(second["created"], 0)
self.assertEqual(second["updated"], 1)
self.assertEqual(SnapshotRecord.objects.count(), 1)
record = SnapshotRecord.objects.get()
self.assertEqual(record.status, "success")
self.assertEqual(record.kind, "scheduled")
self.assertEqual(record.started_at, datetime(2026, 5, 19, 2, 15, tzinfo=timezone.utc))
def test_command_discovers_snapshots_for_host(self) -> None:
with TemporaryDirectory() as tmp:
backup_root = Path(tmp) / "backups"
GlobalConfig.objects.create(name="default", backup_root=str(backup_root))
host = HostConfig.objects.create(host="web-01", address="web-01.example.test")
snapshot_dir = backup_root / host.host / ".incomplete" / "20260519-021500Z__ABCDEFGH"
(snapshot_dir / "meta").mkdir(parents=True)
call_command("discover_pobsync_snapshots", host=host.host, kind="incomplete", stdout=StringIO())
self.assertEqual(SnapshotRecord.objects.count(), 1)
self.assertEqual(SnapshotRecord.objects.get().kind, "incomplete")