2 Commits

Author SHA1 Message Date
659377d894 Track snapshot base lineage in Django 2026-05-19 11:19:22 +02:00
5808800981 feat: link backup runs to snapshot records
Add a nullable SnapshotRecord foreign key to BackupRun and populate it
when run_pobsync_backup records a completed or failed snapshot. Keep the
existing snapshot_path for audit compatibility while making run-to-snapshot
navigation explicit in the database and admin.
2026-05-19 11:13:06 +02:00
9 changed files with 249 additions and 14 deletions

View File

@@ -138,6 +138,8 @@ SQLite remains the default because it is enough for a single backup server and k
## Current Architecture ## Current Architecture
The public command surface is Django-first. The old YAML/cron CLI has been retired from the `pobsync` entrypoint. The public command surface is Django-first. The old YAML/cron CLI has been retired from the `pobsync` entrypoint.
Discovered snapshots are stored in `SnapshotRecord`, including the base snapshot metadata and a nullable SQL link to the
base record when it is known.
The remaining internal engine code still contains reusable backup primitives: The remaining internal engine code still contains reusable backup primitives:
@@ -148,7 +150,6 @@ The remaining internal engine code still contains reusable backup primitives:
Next refactor targets: Next refactor targets:
- Record discovered snapshots into `SnapshotRecord`.
- Surface `SnapshotRecord` data through API/admin views instead of filesystem inspection. - Surface `SnapshotRecord` data through API/admin views instead of filesystem inspection.
- Move more snapshot lifecycle details into typed domain objects. - Move more snapshot lifecycle details into typed domain objects.
- Replace remaining dictionary-shaped config at engine boundaries. - Replace remaining dictionary-shaped config at engine boundaries.

View File

@@ -50,16 +50,25 @@ class HostConfigAdmin(admin.ModelAdmin):
@admin.register(BackupRun) @admin.register(BackupRun)
class BackupRunAdmin(admin.ModelAdmin): class BackupRunAdmin(admin.ModelAdmin):
list_display = ("host", "run_type", "status", "started_at", "ended_at", "snapshot_path") list_display = ("host", "run_type", "status", "started_at", "ended_at", "snapshot")
list_filter = ("run_type", "status", "started_at") list_filter = ("run_type", "status", "started_at")
search_fields = ("host__host", "snapshot_path") search_fields = ("host__host", "snapshot_path", "snapshot__dirname", "snapshot__path")
autocomplete_fields = ("snapshot",)
@admin.register(SnapshotRecord) @admin.register(SnapshotRecord)
class SnapshotRecordAdmin(admin.ModelAdmin): class SnapshotRecordAdmin(admin.ModelAdmin):
list_display = ("host", "kind", "dirname", "status", "started_at", "discovered_at") list_display = ("host", "kind", "dirname", "status", "base", "started_at", "discovered_at")
list_filter = ("kind", "status", "started_at", "discovered_at") list_filter = ("kind", "status", "base_kind", "started_at", "discovered_at")
search_fields = ("host__host", "dirname", "path") search_fields = (
"host__host",
"dirname",
"path",
"base__dirname",
"base_path",
"base_snapshot_id",
)
autocomplete_fields = ("base",)
readonly_fields = ("discovered_at",) readonly_fields = ("discovered_at",)

View File

@@ -64,23 +64,26 @@ class Command(BaseCommand):
rsync = result.get("rsync") if isinstance(result.get("rsync"), dict) else {} rsync = result.get("rsync") if isinstance(result.get("rsync"), dict) else {}
run.rsync_exit_code = rsync.get("exit_code") run.rsync_exit_code = rsync.get("exit_code")
run.result = result run.result = result
snapshot_record = None
if run.snapshot_path:
snapshot_path = Path(run.snapshot_path)
try:
kind = infer_snapshot_kind(snapshot_path)
snapshot_record, _created = upsert_snapshot_record(host=host, kind=kind, snapshot_dir=snapshot_path)
except ValueError:
snapshot_record = None
run.snapshot = snapshot_record
run.save( run.save(
update_fields=[ update_fields=[
"status", "status",
"ended_at", "ended_at",
"snapshot_path", "snapshot_path",
"snapshot",
"base_path", "base_path",
"rsync_exit_code", "rsync_exit_code",
"result", "result",
], ],
) )
if run.snapshot_path:
snapshot_path = Path(run.snapshot_path)
try:
kind = infer_snapshot_kind(snapshot_path)
upsert_snapshot_record(host=host, kind=kind, snapshot_dir=snapshot_path)
except ValueError:
pass
if result.get("ok"): if result.get("ok"):
self.stdout.write(self.style.SUCCESS(f"Backup completed for {host.host}.")) self.stdout.write(self.style.SUCCESS(f"Backup completed for {host.host}."))

View File

@@ -0,0 +1,24 @@
from __future__ import annotations
import django.db.models.deletion
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("pobsync_backend", "0003_structured_config_fields"),
]
operations = [
migrations.AddField(
model_name="backuprun",
name="snapshot",
field=models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
related_name="backup_runs",
to="pobsync_backend.snapshotrecord",
),
),
]

View File

@@ -0,0 +1,44 @@
from __future__ import annotations
import django.db.models.deletion
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("pobsync_backend", "0004_backuprun_snapshot"),
]
operations = [
migrations.AddField(
model_name="snapshotrecord",
name="base",
field=models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
related_name="derived_snapshots",
to="pobsync_backend.snapshotrecord",
),
),
migrations.AddField(
model_name="snapshotrecord",
name="base_dirname",
field=models.CharField(blank=True, max_length=255),
),
migrations.AddField(
model_name="snapshotrecord",
name="base_snapshot_id",
field=models.CharField(blank=True, max_length=64),
),
migrations.AddField(
model_name="snapshotrecord",
name="base_kind",
field=models.CharField(blank=True, max_length=16),
),
migrations.AddField(
model_name="snapshotrecord",
name="base_path",
field=models.CharField(blank=True, max_length=1024),
),
]

View File

@@ -82,6 +82,13 @@ class BackupRun(models.Model):
started_at = models.DateTimeField(null=True, blank=True) started_at = models.DateTimeField(null=True, blank=True)
ended_at = models.DateTimeField(null=True, blank=True) ended_at = models.DateTimeField(null=True, blank=True)
snapshot_path = models.CharField(max_length=1024, blank=True) snapshot_path = models.CharField(max_length=1024, blank=True)
snapshot = models.ForeignKey(
"SnapshotRecord",
on_delete=models.SET_NULL,
null=True,
blank=True,
related_name="backup_runs",
)
base_path = models.CharField(max_length=1024, blank=True) base_path = models.CharField(max_length=1024, blank=True)
rsync_exit_code = models.IntegerField(null=True, blank=True) rsync_exit_code = models.IntegerField(null=True, blank=True)
result = models.JSONField(default=dict, blank=True) result = models.JSONField(default=dict, blank=True)
@@ -104,6 +111,17 @@ class SnapshotRecord(models.Model):
kind = models.CharField(max_length=16, choices=Kind.choices) kind = models.CharField(max_length=16, choices=Kind.choices)
dirname = models.CharField(max_length=255) dirname = models.CharField(max_length=255)
path = models.CharField(max_length=1024) path = models.CharField(max_length=1024)
base = models.ForeignKey(
"self",
on_delete=models.SET_NULL,
null=True,
blank=True,
related_name="derived_snapshots",
)
base_kind = models.CharField(max_length=16, blank=True)
base_dirname = models.CharField(max_length=255, blank=True)
base_path = models.CharField(max_length=1024, blank=True)
base_snapshot_id = models.CharField(max_length=64, blank=True)
status = models.CharField(max_length=32, blank=True) status = models.CharField(max_length=32, blank=True)
started_at = models.DateTimeField(null=True, blank=True) started_at = models.DateTimeField(null=True, blank=True)
ended_at = models.DateTimeField(null=True, blank=True) ended_at = models.DateTimeField(null=True, blank=True)

View File

@@ -55,6 +55,7 @@ def discover_snapshots(
created += 1 created += 1
else: else:
updated += 1 updated += 1
resolve_base_links(host=host_config)
return { return {
"ok": True, "ok": True,
@@ -66,8 +67,15 @@ def discover_snapshots(
def upsert_snapshot_record(*, host: HostConfig, kind: str, snapshot_dir: Path) -> tuple[SnapshotRecord, bool]: def upsert_snapshot_record(*, host: HostConfig, kind: str, snapshot_dir: Path) -> tuple[SnapshotRecord, bool]:
meta = read_snapshot_meta(snapshot_dir) meta = read_snapshot_meta(snapshot_dir)
base_defaults = _base_defaults_from_meta(meta)
defaults = { defaults = {
"path": str(snapshot_dir), "path": str(snapshot_dir),
**base_defaults,
"base": _resolve_base_record(
host=host,
kind=base_defaults["base_kind"],
dirname=base_defaults["base_dirname"],
),
"status": str(meta.get("status") or ""), "status": str(meta.get("status") or ""),
"started_at": parse_snapshot_datetime(snapshot_dir.name, meta, "started_at"), "started_at": parse_snapshot_datetime(snapshot_dir.name, meta, "started_at"),
"ended_at": parse_snapshot_datetime(snapshot_dir.name, meta, "ended_at"), "ended_at": parse_snapshot_datetime(snapshot_dir.name, meta, "ended_at"),
@@ -81,6 +89,26 @@ def upsert_snapshot_record(*, host: HostConfig, kind: str, snapshot_dir: Path) -
) )
def resolve_base_links(*, host: HostConfig | None = None) -> int:
snapshot_qs = SnapshotRecord.objects.exclude(base_dirname="").filter(base__isnull=True)
if host is not None:
snapshot_qs = snapshot_qs.filter(host=host)
updated = 0
for snapshot in snapshot_qs.select_related("host"):
base = _resolve_base_record(
host=snapshot.host,
kind=snapshot.base_kind,
dirname=snapshot.base_dirname,
)
if base is None:
continue
snapshot.base = base
snapshot.save(update_fields=["base"])
updated += 1
return updated
def infer_snapshot_kind(snapshot_path: Path) -> str: def infer_snapshot_kind(snapshot_path: Path) -> str:
parent = snapshot_path.parent.name parent = snapshot_path.parent.name
if parent == "scheduled": if parent == "scheduled":
@@ -92,6 +120,29 @@ def infer_snapshot_kind(snapshot_path: Path) -> str:
raise ValueError(f"Cannot infer snapshot kind from path: {snapshot_path}") raise ValueError(f"Cannot infer snapshot kind from path: {snapshot_path}")
def _base_defaults_from_meta(meta: dict[str, Any]) -> dict[str, Any]:
base = meta.get("base")
if not isinstance(base, dict):
base = {}
return {
"base_kind": _base_value(base.get("kind")),
"base_dirname": _base_value(base.get("dirname")),
"base_path": _base_value(base.get("path")),
"base_snapshot_id": _base_value(base.get("id")),
}
def _base_value(value: Any) -> str:
return value if isinstance(value, str) else ""
def _resolve_base_record(*, host: HostConfig, kind: str, dirname: str) -> SnapshotRecord | None:
if not kind or not dirname:
return None
return SnapshotRecord.objects.filter(host=host, kind=kind, dirname=dirname).first()
def _parse_iso_z(value: str) -> datetime | None: def _parse_iso_z(value: str) -> datetime | None:
try: try:
if value.endswith("Z"): if value.endswith("Z"):

View File

@@ -43,8 +43,10 @@ class RunBackupRecordsSnapshotTests(TestCase):
call_command("run_pobsync_backup", host.host, prefix=str(Path(tmp) / "home"), stdout=StringIO()) call_command("run_pobsync_backup", host.host, prefix=str(Path(tmp) / "home"), stdout=StringIO())
self.assertEqual(BackupRun.objects.count(), 1) self.assertEqual(BackupRun.objects.count(), 1)
run = BackupRun.objects.get()
self.assertEqual(SnapshotRecord.objects.count(), 1) self.assertEqual(SnapshotRecord.objects.count(), 1)
record = SnapshotRecord.objects.get() record = SnapshotRecord.objects.get()
self.assertEqual(run.snapshot, record)
self.assertEqual(record.host, host) self.assertEqual(record.host, host)
self.assertEqual(record.kind, "scheduled") self.assertEqual(record.kind, "scheduled")
self.assertEqual(record.status, "success") self.assertEqual(record.status, "success")
@@ -74,6 +76,7 @@ class RunBackupRecordsSnapshotTests(TestCase):
run = BackupRun.objects.get() run = BackupRun.objects.get()
self.assertEqual(run.status, BackupRun.Status.FAILED) self.assertEqual(run.status, BackupRun.Status.FAILED)
record = SnapshotRecord.objects.get() record = SnapshotRecord.objects.get()
self.assertEqual(run.snapshot, record)
self.assertEqual(record.kind, "incomplete") self.assertEqual(record.kind, "incomplete")
self.assertEqual(record.status, "failed") self.assertEqual(record.status, "failed")
@@ -99,4 +102,5 @@ class RunBackupRecordsSnapshotTests(TestCase):
) )
self.assertEqual(BackupRun.objects.count(), 1) self.assertEqual(BackupRun.objects.count(), 1)
self.assertIsNone(BackupRun.objects.get().snapshot)
self.assertEqual(SnapshotRecord.objects.count(), 0) self.assertEqual(SnapshotRecord.objects.count(), 0)

View File

@@ -10,7 +10,12 @@ from django.test import TestCase
from pobsync.util import write_yaml_atomic from pobsync.util import write_yaml_atomic
from pobsync_backend.models import GlobalConfig, HostConfig, SnapshotRecord from pobsync_backend.models import GlobalConfig, HostConfig, SnapshotRecord
from pobsync_backend.snapshot_discovery import discover_snapshots, parse_snapshot_datetime from pobsync_backend.snapshot_discovery import (
discover_snapshots,
parse_snapshot_datetime,
resolve_base_links,
upsert_snapshot_record,
)
class SnapshotDiscoveryTests(TestCase): class SnapshotDiscoveryTests(TestCase):
@@ -58,6 +63,82 @@ class SnapshotDiscoveryTests(TestCase):
self.assertEqual(record.kind, "scheduled") self.assertEqual(record.kind, "scheduled")
self.assertEqual(record.started_at, datetime(2026, 5, 19, 2, 15, tzinfo=timezone.utc)) self.assertEqual(record.started_at, datetime(2026, 5, 19, 2, 15, tzinfo=timezone.utc))
def test_discovery_links_snapshot_to_base_record(self) -> None:
with TemporaryDirectory() as tmp:
backup_root = Path(tmp) / "backups"
GlobalConfig.objects.create(name="default", backup_root=str(backup_root))
host = HostConfig.objects.create(host="web-01", address="web-01.example.test")
base_dir = backup_root / host.host / "scheduled" / "20260518-021500Z__BASESNAP"
child_dir = backup_root / host.host / "scheduled" / "20260519-021500Z__CHILDSNP"
(base_dir / "meta").mkdir(parents=True)
(child_dir / "meta").mkdir(parents=True)
write_yaml_atomic(
base_dir / "meta" / "meta.yaml",
{
"id": "base-id",
"status": "success",
"started_at": "2026-05-18T02:15:00Z",
"base": None,
},
)
write_yaml_atomic(
child_dir / "meta" / "meta.yaml",
{
"id": "child-id",
"status": "success",
"started_at": "2026-05-19T02:15:00Z",
"base": {
"kind": "scheduled",
"dirname": base_dir.name,
"id": "base-id",
"path": str(base_dir / "data"),
},
},
)
result = discover_snapshots(host=host)
self.assertEqual(result["created"], 2)
child = SnapshotRecord.objects.get(dirname=child_dir.name)
base = SnapshotRecord.objects.get(dirname=base_dir.name)
self.assertEqual(child.base, base)
self.assertEqual(child.base_kind, "scheduled")
self.assertEqual(child.base_dirname, base_dir.name)
self.assertEqual(child.base_snapshot_id, "base-id")
self.assertEqual(child.base_path, str(base_dir / "data"))
def test_base_link_can_be_resolved_after_base_record_exists(self) -> None:
with TemporaryDirectory() as tmp:
backup_root = Path(tmp) / "backups"
host = HostConfig.objects.create(host="web-01", address="web-01.example.test")
base_dir = backup_root / host.host / "scheduled" / "20260518-021500Z__BASESNAP"
child_dir = backup_root / host.host / "scheduled" / "20260519-021500Z__CHILDSNP"
(base_dir / "meta").mkdir(parents=True)
(child_dir / "meta").mkdir(parents=True)
write_yaml_atomic(base_dir / "meta" / "meta.yaml", {"status": "success"})
write_yaml_atomic(
child_dir / "meta" / "meta.yaml",
{
"status": "success",
"base": {
"kind": "scheduled",
"dirname": base_dir.name,
"id": "base-id",
"path": str(base_dir / "data"),
},
},
)
child, _created = upsert_snapshot_record(host=host, kind="scheduled", snapshot_dir=child_dir)
upsert_snapshot_record(host=host, kind="scheduled", snapshot_dir=base_dir)
linked = resolve_base_links(host=host)
child.refresh_from_db()
self.assertEqual(linked, 1)
self.assertIsNotNone(child.base)
self.assertEqual(child.base.dirname, base_dir.name)
self.assertEqual(child.base_dirname, base_dir.name)
def test_command_discovers_snapshots_for_host(self) -> None: def test_command_discovers_snapshots_for_host(self) -> None:
with TemporaryDirectory() as tmp: with TemporaryDirectory() as tmp:
backup_root = Path(tmp) / "backups" backup_root = Path(tmp) / "backups"