Track snapshot base lineage in Django

This commit is contained in:
2026-05-19 11:19:22 +02:00
parent 5808800981
commit 659377d894
6 changed files with 201 additions and 5 deletions

View File

@@ -138,6 +138,8 @@ SQLite remains the default because it is enough for a single backup server and k
## Current Architecture ## Current Architecture
The public command surface is Django-first. The old YAML/cron CLI has been retired from the `pobsync` entrypoint. The public command surface is Django-first. The old YAML/cron CLI has been retired from the `pobsync` entrypoint.
Discovered snapshots are stored in `SnapshotRecord`, including the base snapshot metadata and a nullable SQL link to the
base record when it is known.
The remaining internal engine code still contains reusable backup primitives: The remaining internal engine code still contains reusable backup primitives:
@@ -148,7 +150,6 @@ The remaining internal engine code still contains reusable backup primitives:
Next refactor targets: Next refactor targets:
- Record discovered snapshots into `SnapshotRecord`.
- Surface `SnapshotRecord` data through API/admin views instead of filesystem inspection. - Surface `SnapshotRecord` data through API/admin views instead of filesystem inspection.
- Move more snapshot lifecycle details into typed domain objects. - Move more snapshot lifecycle details into typed domain objects.
- Replace remaining dictionary-shaped config at engine boundaries. - Replace remaining dictionary-shaped config at engine boundaries.

View File

@@ -58,9 +58,17 @@ class BackupRunAdmin(admin.ModelAdmin):
@admin.register(SnapshotRecord) @admin.register(SnapshotRecord)
class SnapshotRecordAdmin(admin.ModelAdmin): class SnapshotRecordAdmin(admin.ModelAdmin):
list_display = ("host", "kind", "dirname", "status", "started_at", "discovered_at") list_display = ("host", "kind", "dirname", "status", "base", "started_at", "discovered_at")
list_filter = ("kind", "status", "started_at", "discovered_at") list_filter = ("kind", "status", "base_kind", "started_at", "discovered_at")
search_fields = ("host__host", "dirname", "path") search_fields = (
"host__host",
"dirname",
"path",
"base__dirname",
"base_path",
"base_snapshot_id",
)
autocomplete_fields = ("base",)
readonly_fields = ("discovered_at",) readonly_fields = ("discovered_at",)

View File

@@ -0,0 +1,44 @@
from __future__ import annotations
import django.db.models.deletion
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("pobsync_backend", "0004_backuprun_snapshot"),
]
operations = [
migrations.AddField(
model_name="snapshotrecord",
name="base",
field=models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
related_name="derived_snapshots",
to="pobsync_backend.snapshotrecord",
),
),
migrations.AddField(
model_name="snapshotrecord",
name="base_dirname",
field=models.CharField(blank=True, max_length=255),
),
migrations.AddField(
model_name="snapshotrecord",
name="base_snapshot_id",
field=models.CharField(blank=True, max_length=64),
),
migrations.AddField(
model_name="snapshotrecord",
name="base_kind",
field=models.CharField(blank=True, max_length=16),
),
migrations.AddField(
model_name="snapshotrecord",
name="base_path",
field=models.CharField(blank=True, max_length=1024),
),
]

View File

@@ -111,6 +111,17 @@ class SnapshotRecord(models.Model):
kind = models.CharField(max_length=16, choices=Kind.choices) kind = models.CharField(max_length=16, choices=Kind.choices)
dirname = models.CharField(max_length=255) dirname = models.CharField(max_length=255)
path = models.CharField(max_length=1024) path = models.CharField(max_length=1024)
base = models.ForeignKey(
"self",
on_delete=models.SET_NULL,
null=True,
blank=True,
related_name="derived_snapshots",
)
base_kind = models.CharField(max_length=16, blank=True)
base_dirname = models.CharField(max_length=255, blank=True)
base_path = models.CharField(max_length=1024, blank=True)
base_snapshot_id = models.CharField(max_length=64, blank=True)
status = models.CharField(max_length=32, blank=True) status = models.CharField(max_length=32, blank=True)
started_at = models.DateTimeField(null=True, blank=True) started_at = models.DateTimeField(null=True, blank=True)
ended_at = models.DateTimeField(null=True, blank=True) ended_at = models.DateTimeField(null=True, blank=True)

View File

@@ -55,6 +55,7 @@ def discover_snapshots(
created += 1 created += 1
else: else:
updated += 1 updated += 1
resolve_base_links(host=host_config)
return { return {
"ok": True, "ok": True,
@@ -66,8 +67,15 @@ def discover_snapshots(
def upsert_snapshot_record(*, host: HostConfig, kind: str, snapshot_dir: Path) -> tuple[SnapshotRecord, bool]: def upsert_snapshot_record(*, host: HostConfig, kind: str, snapshot_dir: Path) -> tuple[SnapshotRecord, bool]:
meta = read_snapshot_meta(snapshot_dir) meta = read_snapshot_meta(snapshot_dir)
base_defaults = _base_defaults_from_meta(meta)
defaults = { defaults = {
"path": str(snapshot_dir), "path": str(snapshot_dir),
**base_defaults,
"base": _resolve_base_record(
host=host,
kind=base_defaults["base_kind"],
dirname=base_defaults["base_dirname"],
),
"status": str(meta.get("status") or ""), "status": str(meta.get("status") or ""),
"started_at": parse_snapshot_datetime(snapshot_dir.name, meta, "started_at"), "started_at": parse_snapshot_datetime(snapshot_dir.name, meta, "started_at"),
"ended_at": parse_snapshot_datetime(snapshot_dir.name, meta, "ended_at"), "ended_at": parse_snapshot_datetime(snapshot_dir.name, meta, "ended_at"),
@@ -81,6 +89,26 @@ def upsert_snapshot_record(*, host: HostConfig, kind: str, snapshot_dir: Path) -
) )
def resolve_base_links(*, host: HostConfig | None = None) -> int:
snapshot_qs = SnapshotRecord.objects.exclude(base_dirname="").filter(base__isnull=True)
if host is not None:
snapshot_qs = snapshot_qs.filter(host=host)
updated = 0
for snapshot in snapshot_qs.select_related("host"):
base = _resolve_base_record(
host=snapshot.host,
kind=snapshot.base_kind,
dirname=snapshot.base_dirname,
)
if base is None:
continue
snapshot.base = base
snapshot.save(update_fields=["base"])
updated += 1
return updated
def infer_snapshot_kind(snapshot_path: Path) -> str: def infer_snapshot_kind(snapshot_path: Path) -> str:
parent = snapshot_path.parent.name parent = snapshot_path.parent.name
if parent == "scheduled": if parent == "scheduled":
@@ -92,6 +120,29 @@ def infer_snapshot_kind(snapshot_path: Path) -> str:
raise ValueError(f"Cannot infer snapshot kind from path: {snapshot_path}") raise ValueError(f"Cannot infer snapshot kind from path: {snapshot_path}")
def _base_defaults_from_meta(meta: dict[str, Any]) -> dict[str, Any]:
base = meta.get("base")
if not isinstance(base, dict):
base = {}
return {
"base_kind": _base_value(base.get("kind")),
"base_dirname": _base_value(base.get("dirname")),
"base_path": _base_value(base.get("path")),
"base_snapshot_id": _base_value(base.get("id")),
}
def _base_value(value: Any) -> str:
return value if isinstance(value, str) else ""
def _resolve_base_record(*, host: HostConfig, kind: str, dirname: str) -> SnapshotRecord | None:
if not kind or not dirname:
return None
return SnapshotRecord.objects.filter(host=host, kind=kind, dirname=dirname).first()
def _parse_iso_z(value: str) -> datetime | None: def _parse_iso_z(value: str) -> datetime | None:
try: try:
if value.endswith("Z"): if value.endswith("Z"):

View File

@@ -10,7 +10,12 @@ from django.test import TestCase
from pobsync.util import write_yaml_atomic from pobsync.util import write_yaml_atomic
from pobsync_backend.models import GlobalConfig, HostConfig, SnapshotRecord from pobsync_backend.models import GlobalConfig, HostConfig, SnapshotRecord
from pobsync_backend.snapshot_discovery import discover_snapshots, parse_snapshot_datetime from pobsync_backend.snapshot_discovery import (
discover_snapshots,
parse_snapshot_datetime,
resolve_base_links,
upsert_snapshot_record,
)
class SnapshotDiscoveryTests(TestCase): class SnapshotDiscoveryTests(TestCase):
@@ -58,6 +63,82 @@ class SnapshotDiscoveryTests(TestCase):
self.assertEqual(record.kind, "scheduled") self.assertEqual(record.kind, "scheduled")
self.assertEqual(record.started_at, datetime(2026, 5, 19, 2, 15, tzinfo=timezone.utc)) self.assertEqual(record.started_at, datetime(2026, 5, 19, 2, 15, tzinfo=timezone.utc))
def test_discovery_links_snapshot_to_base_record(self) -> None:
with TemporaryDirectory() as tmp:
backup_root = Path(tmp) / "backups"
GlobalConfig.objects.create(name="default", backup_root=str(backup_root))
host = HostConfig.objects.create(host="web-01", address="web-01.example.test")
base_dir = backup_root / host.host / "scheduled" / "20260518-021500Z__BASESNAP"
child_dir = backup_root / host.host / "scheduled" / "20260519-021500Z__CHILDSNP"
(base_dir / "meta").mkdir(parents=True)
(child_dir / "meta").mkdir(parents=True)
write_yaml_atomic(
base_dir / "meta" / "meta.yaml",
{
"id": "base-id",
"status": "success",
"started_at": "2026-05-18T02:15:00Z",
"base": None,
},
)
write_yaml_atomic(
child_dir / "meta" / "meta.yaml",
{
"id": "child-id",
"status": "success",
"started_at": "2026-05-19T02:15:00Z",
"base": {
"kind": "scheduled",
"dirname": base_dir.name,
"id": "base-id",
"path": str(base_dir / "data"),
},
},
)
result = discover_snapshots(host=host)
self.assertEqual(result["created"], 2)
child = SnapshotRecord.objects.get(dirname=child_dir.name)
base = SnapshotRecord.objects.get(dirname=base_dir.name)
self.assertEqual(child.base, base)
self.assertEqual(child.base_kind, "scheduled")
self.assertEqual(child.base_dirname, base_dir.name)
self.assertEqual(child.base_snapshot_id, "base-id")
self.assertEqual(child.base_path, str(base_dir / "data"))
def test_base_link_can_be_resolved_after_base_record_exists(self) -> None:
with TemporaryDirectory() as tmp:
backup_root = Path(tmp) / "backups"
host = HostConfig.objects.create(host="web-01", address="web-01.example.test")
base_dir = backup_root / host.host / "scheduled" / "20260518-021500Z__BASESNAP"
child_dir = backup_root / host.host / "scheduled" / "20260519-021500Z__CHILDSNP"
(base_dir / "meta").mkdir(parents=True)
(child_dir / "meta").mkdir(parents=True)
write_yaml_atomic(base_dir / "meta" / "meta.yaml", {"status": "success"})
write_yaml_atomic(
child_dir / "meta" / "meta.yaml",
{
"status": "success",
"base": {
"kind": "scheduled",
"dirname": base_dir.name,
"id": "base-id",
"path": str(base_dir / "data"),
},
},
)
child, _created = upsert_snapshot_record(host=host, kind="scheduled", snapshot_dir=child_dir)
upsert_snapshot_record(host=host, kind="scheduled", snapshot_dir=base_dir)
linked = resolve_base_links(host=host)
child.refresh_from_db()
self.assertEqual(linked, 1)
self.assertIsNotNone(child.base)
self.assertEqual(child.base.dirname, base_dir.name)
self.assertEqual(child.base_dirname, base_dir.name)
def test_command_discovers_snapshots_for_host(self) -> None: def test_command_discovers_snapshots_for_host(self) -> None:
with TemporaryDirectory() as tmp: with TemporaryDirectory() as tmp:
backup_root = Path(tmp) / "backups" backup_root = Path(tmp) / "backups"