diff --git a/README.md b/README.md index 210db80..97610ea 100644 --- a/README.md +++ b/README.md @@ -74,6 +74,12 @@ pobsync retention pobsync retention --apply --yes --max-delete 10 ``` +Discover snapshots already present on disk: + +``` +pobsync discover-snapshots --host +``` + The `pobsync` executable is a thin wrapper around Django management commands. Direct Django access is also available: ``` @@ -143,6 +149,7 @@ The remaining internal engine code still contains reusable backup primitives: Next refactor targets: - Record discovered snapshots into `SnapshotRecord`. +- Surface `SnapshotRecord` data through API/admin views instead of filesystem inspection. - Move more snapshot lifecycle details into typed domain objects. - Replace remaining dictionary-shaped config at engine boundaries. - Remove legacy YAML import/export once production migration no longer needs it. diff --git a/src/pobsync/cli.py b/src/pobsync/cli.py index d64d73e..4595c25 100644 --- a/src/pobsync/cli.py +++ b/src/pobsync/cli.py @@ -13,6 +13,7 @@ COMMAND_ALIASES = { "schedule": "configure_pobsync_schedule", "backup": "run_pobsync_backup", "retention": "run_pobsync_retention", + "discover-snapshots": "discover_pobsync_snapshots", "scheduler": "run_pobsync_scheduler", } diff --git a/src/pobsync_backend/admin.py b/src/pobsync_backend/admin.py index 9fcca30..917c169 100644 --- a/src/pobsync_backend/admin.py +++ b/src/pobsync_backend/admin.py @@ -57,9 +57,10 @@ class BackupRunAdmin(admin.ModelAdmin): @admin.register(SnapshotRecord) class SnapshotRecordAdmin(admin.ModelAdmin): - list_display = ("host", "kind", "dirname", "status", "started_at") - list_filter = ("kind", "status", "started_at") + list_display = ("host", "kind", "dirname", "status", "started_at", "discovered_at") + list_filter = ("kind", "status", "started_at", "discovered_at") search_fields = ("host__host", "dirname", "path") + readonly_fields = ("discovered_at",) @admin.register(ScheduleConfig) diff --git a/src/pobsync_backend/management/commands/discover_pobsync_snapshots.py b/src/pobsync_backend/management/commands/discover_pobsync_snapshots.py new file mode 100644 index 0000000..1e5ca32 --- /dev/null +++ b/src/pobsync_backend/management/commands/discover_pobsync_snapshots.py @@ -0,0 +1,39 @@ +from __future__ import annotations + +from typing import Any + +from django.core.management.base import BaseCommand, CommandError + +from pobsync.snapshot_meta import normalize_kind +from pobsync_backend.models import GlobalConfig, HostConfig +from pobsync_backend.snapshot_discovery import discover_snapshots + + +class Command(BaseCommand): + help = "Discover snapshot metadata on disk and upsert SnapshotRecord rows." + + def add_arguments(self, parser) -> None: + parser.add_argument("--host", default=None) + parser.add_argument("--kind", default="all", help="scheduled|manual|incomplete|all") + + def handle(self, *args: Any, **options: Any) -> None: + try: + global_config = GlobalConfig.objects.get(name="default") + except GlobalConfig.DoesNotExist as exc: + raise CommandError("Missing GlobalConfig 'default'") from exc + + host = None + if options["host"]: + try: + host = HostConfig.objects.get(host=options["host"], enabled=True) + except HostConfig.DoesNotExist as exc: + raise CommandError(f"Missing enabled HostConfig {options['host']!r}") from exc + + kind = normalize_kind(options["kind"]) + kinds = ["scheduled", "manual", "incomplete"] if kind == "all" else [kind] + result = discover_snapshots(host=host, global_config=global_config, kinds=kinds) + self.stdout.write( + self.style.SUCCESS( + f"Scanned {result['scanned']} snapshot(s), created {result['created']}, updated {result['updated']}." + ) + ) diff --git a/src/pobsync_backend/snapshot_discovery.py b/src/pobsync_backend/snapshot_discovery.py new file mode 100644 index 0000000..288cf7f --- /dev/null +++ b/src/pobsync_backend/snapshot_discovery.py @@ -0,0 +1,85 @@ +from __future__ import annotations + +from datetime import datetime, timezone +from pathlib import Path +from typing import Any + +from pobsync.snapshot_meta import iter_snapshot_dirs, read_snapshot_meta, resolve_host_root + +from .models import GlobalConfig, HostConfig, SnapshotRecord + + +def parse_snapshot_datetime(dirname: str, meta: dict[str, Any], key: str) -> datetime | None: + value = meta.get(key) + if isinstance(value, str): + parsed = _parse_iso_z(value) + if parsed is not None: + return parsed + + if key == "started_at": + try: + prefix = dirname.split("__", 1)[0] + return datetime.strptime(prefix, "%Y%m%d-%H%M%SZ").replace(tzinfo=timezone.utc) + except ValueError: + return None + return None + + +def discover_snapshots( + *, + host: HostConfig | None = None, + global_config: GlobalConfig | None = None, + kinds: list[str] | None = None, +) -> dict[str, Any]: + global_config = global_config or GlobalConfig.objects.get(name="default") + host_qs = HostConfig.objects.filter(enabled=True).order_by("host") + if host is not None: + host_qs = host_qs.filter(pk=host.pk) + + kinds = kinds or ["scheduled", "manual", "incomplete"] + scanned = 0 + created = 0 + updated = 0 + + for host_config in host_qs: + host_root = resolve_host_root(global_config.backup_root, host_config.host) + for kind in kinds: + for snapshot_dir in iter_snapshot_dirs(host_root, kind): + meta = read_snapshot_meta(snapshot_dir) + defaults = { + "path": str(snapshot_dir), + "status": str(meta.get("status") or ""), + "started_at": parse_snapshot_datetime(snapshot_dir.name, meta, "started_at"), + "ended_at": parse_snapshot_datetime(snapshot_dir.name, meta, "ended_at"), + "metadata": meta, + } + _record, was_created = SnapshotRecord.objects.update_or_create( + host=host_config, + kind=kind, + dirname=snapshot_dir.name, + defaults=defaults, + ) + scanned += 1 + if was_created: + created += 1 + else: + updated += 1 + + return { + "ok": True, + "scanned": scanned, + "created": created, + "updated": updated, + } + + +def _parse_iso_z(value: str) -> datetime | None: + try: + if value.endswith("Z"): + return datetime.fromisoformat(value.removesuffix("Z") + "+00:00") + parsed = datetime.fromisoformat(value) + if parsed.tzinfo is None: + return parsed.replace(tzinfo=timezone.utc) + return parsed + except ValueError: + return None diff --git a/src/pobsync_backend/tests/test_console_entrypoint.py b/src/pobsync_backend/tests/test_console_entrypoint.py index b7ef819..434cf5b 100644 --- a/src/pobsync_backend/tests/test_console_entrypoint.py +++ b/src/pobsync_backend/tests/test_console_entrypoint.py @@ -39,3 +39,10 @@ class ConsoleEntrypointTests(SimpleTestCase): execute.assert_called_once_with( ["pobsync", "configure_pobsync_schedule", "web-01", "--cron", "15 2 * * *"] ) + + def test_maps_discover_snapshots_alias_to_django_command(self) -> None: + with patch("pobsync.cli.execute_from_command_line") as execute: + exit_code = main(["discover-snapshots", "--host", "web-01"]) + + self.assertEqual(exit_code, 0) + execute.assert_called_once_with(["pobsync", "discover_pobsync_snapshots", "--host", "web-01"]) diff --git a/src/pobsync_backend/tests/test_snapshot_discovery.py b/src/pobsync_backend/tests/test_snapshot_discovery.py new file mode 100644 index 0000000..895139b --- /dev/null +++ b/src/pobsync_backend/tests/test_snapshot_discovery.py @@ -0,0 +1,72 @@ +from __future__ import annotations + +from datetime import datetime, timezone +from io import StringIO +from pathlib import Path +from tempfile import TemporaryDirectory + +from django.core.management import call_command +from django.test import TestCase + +from pobsync.util import write_yaml_atomic +from pobsync_backend.models import GlobalConfig, HostConfig, SnapshotRecord +from pobsync_backend.snapshot_discovery import discover_snapshots, parse_snapshot_datetime + + +class SnapshotDiscoveryTests(TestCase): + def test_parse_snapshot_datetime_prefers_metadata(self) -> None: + parsed = parse_snapshot_datetime( + "20260519-021500Z__ABCDEFGH", + {"started_at": "2026-05-20T03:16:00Z"}, + "started_at", + ) + + self.assertEqual(parsed, datetime(2026, 5, 20, 3, 16, tzinfo=timezone.utc)) + + def test_parse_snapshot_datetime_falls_back_to_dirname(self) -> None: + parsed = parse_snapshot_datetime("20260519-021500Z__ABCDEFGH", {}, "started_at") + + self.assertEqual(parsed, datetime(2026, 5, 19, 2, 15, tzinfo=timezone.utc)) + + def test_discovery_upserts_snapshot_records_idempotently(self) -> None: + with TemporaryDirectory() as tmp: + backup_root = Path(tmp) / "backups" + GlobalConfig.objects.create(name="default", backup_root=str(backup_root)) + host = HostConfig.objects.create(host="web-01", address="web-01.example.test") + snapshot_dir = backup_root / host.host / "scheduled" / "20260519-021500Z__ABCDEFGH" + meta_dir = snapshot_dir / "meta" + meta_dir.mkdir(parents=True) + write_yaml_atomic( + meta_dir / "meta.yaml", + { + "status": "success", + "started_at": "2026-05-19T02:15:00Z", + "ended_at": "2026-05-19T02:16:00Z", + }, + ) + + first = discover_snapshots(host=host) + second = discover_snapshots(host=host) + + self.assertEqual(first["created"], 1) + self.assertEqual(first["updated"], 0) + self.assertEqual(second["created"], 0) + self.assertEqual(second["updated"], 1) + self.assertEqual(SnapshotRecord.objects.count(), 1) + record = SnapshotRecord.objects.get() + self.assertEqual(record.status, "success") + self.assertEqual(record.kind, "scheduled") + self.assertEqual(record.started_at, datetime(2026, 5, 19, 2, 15, tzinfo=timezone.utc)) + + def test_command_discovers_snapshots_for_host(self) -> None: + with TemporaryDirectory() as tmp: + backup_root = Path(tmp) / "backups" + GlobalConfig.objects.create(name="default", backup_root=str(backup_root)) + host = HostConfig.objects.create(host="web-01", address="web-01.example.test") + snapshot_dir = backup_root / host.host / ".incomplete" / "20260519-021500Z__ABCDEFGH" + (snapshot_dir / "meta").mkdir(parents=True) + + call_command("discover_pobsync_snapshots", host=host.host, kind="incomplete", stdout=StringIO()) + + self.assertEqual(SnapshotRecord.objects.count(), 1) + self.assertEqual(SnapshotRecord.objects.get().kind, "incomplete")