(ui) Make retention planning warnings explicit

Show keep/delete reasons in the retention plan, surface scheduled prune
limit warnings, and explain base snapshot protection before retention is
applied.

Also surface incomplete snapshots from the retention views without deleting
them automatically, so interrupted backups are visible on the dashboard,
host detail, and retention plan.
This commit is contained in:
2026-05-21 01:10:45 +02:00
parent 26265be440
commit 50eb7cf2f3
8 changed files with 297 additions and 9 deletions

View File

@@ -23,6 +23,7 @@ def run_sql_retention_plan(*, host: str, kind: str, protect_bases: bool) -> dict
host_config = _enabled_host_config(host) host_config = _enabled_host_config(host)
retention = _retention_for_host(host_config) retention = _retention_for_host(host_config)
snapshots = _snapshots_for_retention(host_config=host_config, kind=kind) snapshots = _snapshots_for_retention(host_config=host_config, kind=kind)
incomplete_snapshots = _incomplete_snapshots_for_host(host_config)
plan = build_retention_plan( plan = build_retention_plan(
snapshots=snapshots, snapshots=snapshots,
@@ -36,6 +37,7 @@ def run_sql_retention_plan(*, host: str, kind: str, protect_bases: bool) -> dict
keep, reasons = apply_base_protection(snapshots=snapshots, keep=keep, reasons=reasons) keep, reasons = apply_base_protection(snapshots=snapshots, keep=keep, reasons=reasons)
delete = [snapshot for snapshot in snapshots if snapshot.dirname not in keep] delete = [snapshot for snapshot in snapshots if snapshot.dirname not in keep]
keep_items = [snapshot for snapshot in snapshots if snapshot.dirname in keep]
return { return {
"ok": True, "ok": True,
@@ -45,7 +47,12 @@ def run_sql_retention_plan(*, host: str, kind: str, protect_bases: bool) -> dict
"retention": retention, "retention": retention,
"source": "sql", "source": "sql",
"keep": sorted(keep), "keep": sorted(keep),
"delete": [_snapshot_to_delete_item(snapshot) for snapshot in delete], "keep_items": [_snapshot_to_item(snapshot, reasons=reasons.get(snapshot.dirname, [])) for snapshot in keep_items],
"delete": [_snapshot_to_item(snapshot, reasons=["outside retention policy"]) for snapshot in delete],
"incomplete": [
_snapshot_to_item(snapshot, reasons=["incomplete snapshot; excluded from retention cleanup"])
for snapshot in incomplete_snapshots
],
"reasons": reasons, "reasons": reasons,
} }
@@ -146,6 +153,15 @@ def _snapshots_for_retention(*, host_config: HostConfig, kind: str) -> list[Snap
return [_snapshot_from_record(record) for record in records] return [_snapshot_from_record(record) for record in records]
def _incomplete_snapshots_for_host(host_config: HostConfig) -> list[Snapshot]:
records = (
SnapshotRecord.objects.filter(host=host_config, kind=SnapshotRecord.Kind.INCOMPLETE)
.select_related("base")
.order_by("-started_at", "dirname")
)
return [_snapshot_from_record(record) for record in records]
def _snapshot_from_record(record: SnapshotRecord) -> Snapshot: def _snapshot_from_record(record: SnapshotRecord) -> Snapshot:
return Snapshot( return Snapshot(
kind=record.kind, kind=record.kind,
@@ -173,13 +189,15 @@ def _base_meta_from_record(record: SnapshotRecord) -> dict[str, str] | None:
return None return None
def _snapshot_to_delete_item(snapshot: Snapshot) -> dict[str, Any]: def _snapshot_to_item(snapshot: Snapshot, *, reasons: list[str]) -> dict[str, Any]:
return { return {
"dirname": snapshot.dirname, "dirname": snapshot.dirname,
"kind": snapshot.kind, "kind": snapshot.kind,
"path": snapshot.path, "path": snapshot.path,
"dt": snapshot.dt.isoformat(), "dt": snapshot.dt.isoformat(),
"status": snapshot.status, "status": snapshot.status,
"reasons": reasons,
"reason": ", ".join(reasons),
} }

View File

@@ -231,6 +231,17 @@
.host-card-stat.wide { .host-card-stat.wide {
grid-column: 1 / -1; grid-column: 1 / -1;
} }
.host-card-warning {
background: #fffaf0;
border: 1px solid #e7cf8a;
border-radius: 6px;
color: var(--running);
display: flex;
flex-wrap: wrap;
gap: 8px;
margin-top: 14px;
padding: 10px;
}
.messages { display: grid; gap: 8px; margin-bottom: 18px; } .messages { display: grid; gap: 8px; margin-bottom: 18px; }
.message { .message {
background: var(--panel); background: var(--panel);

View File

@@ -118,6 +118,20 @@
</div> </div>
</div> </div>
</div> </div>
{% if host.retention_warning.has_warning %}
<div class="host-card-warning">
<span class="status warning">retention</span>
{% if host.retention_warning.prune_exceeded %}
Scheduled prune would delete {{ host.retention_warning.delete_count }} snapshot(s), above max {{ host.retention_warning.max_delete }}.
{% endif %}
{% if host.retention_warning.incomplete_count %}
{{ host.retention_warning.incomplete_count }} incomplete snapshot(s) need review.
{% endif %}
{% if host.retention_warning.error %}
{{ host.retention_warning.error }}
{% endif %}
</div>
{% endif %}
</article> </article>
{% empty %} {% empty %}
<p class="muted">No hosts configured yet.</p> <p class="muted">No hosts configured yet.</p>

View File

@@ -68,6 +68,30 @@
</section> </section>
</div> </div>
{% if retention_warning.has_warning %}
<section class="panel highlight warning">
<h2>Retention Warnings</h2>
<div class="stack">
{% if retention_warning.prune_exceeded %}
<div>
Scheduled pruning would delete {{ retention_warning.delete_count }} snapshot(s), above max delete
{{ retention_warning.max_delete }}. Scheduled pruning will refuse this plan until the limit or retention
selection is adjusted.
</div>
{% endif %}
{% if retention_warning.incomplete_count %}
<div>
{{ retention_warning.incomplete_count }} incomplete snapshot(s) exist. Retention does not delete incomplete
snapshots automatically; inspect them before cleanup.
</div>
{% endif %}
{% if retention_warning.error %}
<div>{{ retention_warning.error }}</div>
{% endif %}
</div>
</section>
{% endif %}
{% if effective_config %} {% if effective_config %}
<section class="panel"> <section class="panel">
<h2>Effective Config</h2> <h2>Effective Config</h2>

View File

@@ -18,8 +18,31 @@
<div class="metric"><div class="label">Kind</div><div class="value">{{ plan.kind }}</div></div> <div class="metric"><div class="label">Kind</div><div class="value">{{ plan.kind }}</div></div>
<div class="metric"><div class="label">Keep</div><div class="value">{{ plan.keep|length }}</div></div> <div class="metric"><div class="label">Keep</div><div class="value">{{ plan.keep|length }}</div></div>
<div class="metric"><div class="label">Would Delete</div><div class="value">{{ plan.delete|length }}</div></div> <div class="metric"><div class="label">Would Delete</div><div class="value">{{ plan.delete|length }}</div></div>
<div class="metric"><div class="label">Scheduled Limit</div><div class="value">{{ scheduled_prune_limit|default:"none" }}</div></div>
<div class="metric"><div class="label">Incomplete</div><div class="value">{{ plan.incomplete|length }}</div></div>
</section> </section>
{% if scheduled_prune_exceeded %}
<section class="panel highlight warning">
<h2>Scheduled Prune Limit</h2>
<p>
This plan would delete {{ plan.delete|length }} snapshot(s), which exceeds the scheduled prune limit of
{{ scheduled_prune_limit }}. Scheduled pruning will refuse to apply this plan until the limit or retention
selection is adjusted.
</p>
</section>
{% endif %}
{% if plan.incomplete %}
<section class="panel highlight warning">
<h2>Incomplete Snapshots</h2>
<p>
{{ plan.incomplete|length }} incomplete snapshot(s) exist for this host. Retention does not delete incomplete
snapshots automatically because they can indicate an interrupted backup that should be inspected first.
</p>
</section>
{% endif %}
<section class="panel"> <section class="panel">
<h2>Policy</h2> <h2>Policy</h2>
<div class="stack"> <div class="stack">
@@ -28,6 +51,17 @@
<div><strong>Monthly:</strong> {{ plan.retention.monthly }}</div> <div><strong>Monthly:</strong> {{ plan.retention.monthly }}</div>
<div><strong>Yearly:</strong> {{ plan.retention.yearly }}</div> <div><strong>Yearly:</strong> {{ plan.retention.yearly }}</div>
<div><strong>Protect bases:</strong> {{ protect_bases|yesno:"yes,no" }}</div> <div><strong>Protect bases:</strong> {{ protect_bases|yesno:"yes,no" }}</div>
<div class="muted">
{% if protect_bases %}
Base snapshots referenced by kept snapshots are also kept and marked with a base-of reason.
{% else %}
Base snapshots are only kept when they match the regular retention policy.
{% endif %}
</div>
{% if schedule %}
<div><strong>Schedule pruning:</strong> {{ schedule.prune|yesno:"enabled,disabled" }}</div>
<div><strong>Schedule max delete:</strong> {{ schedule.prune_max_delete }}</div>
{% endif %}
</div> </div>
</section> </section>
@@ -40,6 +74,7 @@
<th>Dirname</th> <th>Dirname</th>
<th>Started</th> <th>Started</th>
<th>Status</th> <th>Status</th>
<th>Reason</th>
<th>Path</th> <th>Path</th>
</tr> </tr>
</thead> </thead>
@@ -50,10 +85,11 @@
<td>{{ snapshot.dirname }}</td> <td>{{ snapshot.dirname }}</td>
<td>{{ snapshot.dt }}</td> <td>{{ snapshot.dt }}</td>
<td>{{ snapshot.status|default:"" }}</td> <td>{{ snapshot.status|default:"" }}</td>
<td>{{ snapshot.reason }}</td>
<td class="muted">{{ snapshot.path }}</td> <td class="muted">{{ snapshot.path }}</td>
</tr> </tr>
{% empty %} {% empty %}
<tr><td colspan="5" class="muted">Retention would not delete snapshots for this selection.</td></tr> <tr><td colspan="6" class="muted">Retention would not delete snapshots for this selection.</td></tr>
{% endfor %} {% endfor %}
</tbody> </tbody>
</table> </table>
@@ -71,7 +107,7 @@
{{ apply_form.max_delete.errors }} {{ apply_form.max_delete.errors }}
<label for="{{ apply_form.max_delete.id_for_label }}">Max delete</label> <label for="{{ apply_form.max_delete.id_for_label }}">Max delete</label>
{{ apply_form.max_delete }} {{ apply_form.max_delete }}
<div class="helptext">Must be at least the number of snapshots shown in Would Delete.</div> <div class="helptext">Must be at least {{ plan.delete|length }} for the snapshots shown in Would Delete.</div>
</div> </div>
<div class="field"> <div class="field">
@@ -99,20 +135,54 @@
<table> <table>
<thead> <thead>
<tr> <tr>
<th>Kind</th>
<th>Dirname</th> <th>Dirname</th>
<th>Started</th>
<th>Status</th>
<th>Reasons</th> <th>Reasons</th>
</tr> </tr>
</thead> </thead>
<tbody> <tbody>
{% for dirname, reasons in plan.reasons.items %} {% for snapshot in plan.keep_items %}
<tr> <tr>
<td>{{ dirname }}</td> <td>{{ snapshot.kind }}</td>
<td>{{ reasons|join:", " }}</td> <td>{{ snapshot.dirname }}</td>
<td>{{ snapshot.dt }}</td>
<td>{{ snapshot.status|default:"" }}</td>
<td>{{ snapshot.reason }}</td>
</tr> </tr>
{% empty %} {% empty %}
<tr><td colspan="2" class="muted">No snapshots matched this retention selection.</td></tr> <tr><td colspan="5" class="muted">No snapshots matched this retention selection.</td></tr>
{% endfor %} {% endfor %}
</tbody> </tbody>
</table> </table>
</section> </section>
{% if plan.incomplete %}
<section class="panel">
<h2>Incomplete Snapshots</h2>
<table>
<thead>
<tr>
<th>Dirname</th>
<th>Started</th>
<th>Status</th>
<th>Reason</th>
<th>Path</th>
</tr>
</thead>
<tbody>
{% for snapshot in plan.incomplete %}
<tr>
<td>{{ snapshot.dirname }}</td>
<td>{{ snapshot.dt }}</td>
<td>{{ snapshot.status|default:"" }}</td>
<td>{{ snapshot.reason }}</td>
<td class="muted">{{ snapshot.path }}</td>
</tr>
{% endfor %}
</tbody>
</table>
</section>
{% endif %}
{% endblock %} {% endblock %}

View File

@@ -32,7 +32,10 @@ class SqlRetentionTests(TestCase):
self.assertEqual(plan["source"], "sql") self.assertEqual(plan["source"], "sql")
self.assertEqual(plan["keep"], [new.dirname]) self.assertEqual(plan["keep"], [new.dirname])
self.assertEqual([item["dirname"] for item in plan["keep_items"]], [new.dirname])
self.assertEqual([item["dirname"] for item in plan["delete"]], [old.dirname]) self.assertEqual([item["dirname"] for item in plan["delete"]], [old.dirname])
self.assertEqual(plan["delete"][0]["reason"], "outside retention policy")
self.assertEqual(plan["incomplete"], [])
def test_plan_can_protect_base_snapshot_from_sql_relation(self) -> None: def test_plan_can_protect_base_snapshot_from_sql_relation(self) -> None:
host = HostConfig.objects.create( host = HostConfig.objects.create(

View File

@@ -99,6 +99,35 @@ class ViewTests(TestCase):
self.assertContains(response, "manual") self.assertContains(response, "manual")
self.assertContains(response, "1000") self.assertContains(response, "1000")
def test_dashboard_surfaces_retention_warnings(self) -> None:
self.client.force_login(self.staff_user)
host = HostConfig.objects.create(
host="web-01",
address="web-01.example.test",
retention_daily=0,
retention_weekly=0,
retention_monthly=0,
retention_yearly=0,
)
ScheduleConfig.objects.create(host=host, cron_expr="15 2 * * *", enabled=True, prune=True, prune_max_delete=1)
self._snapshot(host, "20260517-021500Z__OLDSNP1")
self._snapshot(host, "20260518-021500Z__OLDSNP2")
self._snapshot(host, "20260519-021500Z__NEWSNAP")
SnapshotRecord.objects.create(
host=host,
kind=SnapshotRecord.Kind.INCOMPLETE,
dirname="20260519-031500Z__BROKEN01",
path=f"/backups/{host.host}/.incomplete/20260519-031500Z__BROKEN01",
status="failed",
started_at=datetime(2026, 5, 19, 3, 15, tzinfo=timezone.utc),
)
response = self.client.get(reverse("dashboard"))
self.assertEqual(response.status_code, 200)
self.assertContains(response, "Scheduled prune would delete 2 snapshot(s), above max 1.")
self.assertContains(response, "1 incomplete snapshot(s) need review.")
def test_dashboard_links_latest_snapshot_for_each_host(self) -> None: def test_dashboard_links_latest_snapshot_for_each_host(self) -> None:
self.client.force_login(self.staff_user) self.client.force_login(self.staff_user)
host = HostConfig.objects.create(host="web-01", address="web-01.example.test") host = HostConfig.objects.create(host="web-01", address="web-01.example.test")
@@ -1351,6 +1380,30 @@ class ViewTests(TestCase):
self.assertContains(response, new_snapshot.dirname) self.assertContains(response, new_snapshot.dirname)
self.assertContains(response, "newest") self.assertContains(response, "newest")
self.assertContains(response, "Would Delete") self.assertContains(response, "Would Delete")
self.assertContains(response, "outside retention policy")
def test_retention_plan_warns_when_scheduled_prune_limit_is_exceeded(self) -> None:
self.client.force_login(self.staff_user)
host = HostConfig.objects.create(
host="web-01",
address="web-01.example.test",
retention_daily=0,
retention_weekly=0,
retention_monthly=0,
retention_yearly=0,
)
ScheduleConfig.objects.create(host=host, cron_expr="15 2 * * *", enabled=True, prune=True, prune_max_delete=1)
self._snapshot(host, "20260517-021500Z__OLDSNP1")
self._snapshot(host, "20260518-021500Z__OLDSNP2")
self._snapshot(host, "20260519-021500Z__NEWSNAP")
response = self.client.get(reverse("host_retention_plan", args=[host.host]))
self.assertEqual(response.status_code, 200)
self.assertContains(response, "Scheduled Prune Limit")
self.assertContains(response, "would delete 2 snapshot(s)")
self.assertContains(response, "scheduled prune limit of")
self.assertContains(response, "Schedule max delete:</strong> 1")
def test_retention_plan_can_enable_base_protection(self) -> None: def test_retention_plan_can_enable_base_protection(self) -> None:
self.client.force_login(self.staff_user) self.client.force_login(self.staff_user)
@@ -1371,8 +1424,58 @@ class ViewTests(TestCase):
self.assertEqual(response.status_code, 200) self.assertEqual(response.status_code, 200)
self.assertContains(response, "Protect bases:</strong> yes") self.assertContains(response, "Protect bases:</strong> yes")
self.assertContains(response, "Base snapshots referenced by kept snapshots")
self.assertContains(response, f"base-of:{child.dirname}") self.assertContains(response, f"base-of:{child.dirname}")
def test_retention_plan_surfaces_incomplete_snapshots_without_deleting_them(self) -> None:
self.client.force_login(self.staff_user)
host = HostConfig.objects.create(
host="web-01",
address="web-01.example.test",
retention_daily=0,
retention_weekly=0,
retention_monthly=0,
retention_yearly=0,
)
self._snapshot(host, "20260518-021500Z__OLDSNAP")
self._snapshot(host, "20260519-021500Z__NEWSNAP")
SnapshotRecord.objects.create(
host=host,
kind=SnapshotRecord.Kind.INCOMPLETE,
dirname="20260519-031500Z__BROKEN01",
path=f"/backups/{host.host}/.incomplete/20260519-031500Z__BROKEN01",
status="failed",
started_at=datetime(2026, 5, 19, 3, 15, tzinfo=timezone.utc),
)
response = self.client.get(reverse("host_retention_plan", args=[host.host]))
self.assertEqual(response.status_code, 200)
self.assertContains(response, "Incomplete Snapshots")
self.assertContains(response, "20260519-031500Z__BROKEN01")
self.assertContains(response, "excluded from retention cleanup")
def test_host_detail_surfaces_retention_warnings(self) -> None:
self.client.force_login(self.staff_user)
host = HostConfig.objects.create(
host="web-01",
address="web-01.example.test",
retention_daily=0,
retention_weekly=0,
retention_monthly=0,
retention_yearly=0,
)
ScheduleConfig.objects.create(host=host, cron_expr="15 2 * * *", enabled=True, prune=True, prune_max_delete=1)
self._snapshot(host, "20260517-021500Z__OLDSNP1")
self._snapshot(host, "20260518-021500Z__OLDSNP2")
self._snapshot(host, "20260519-021500Z__NEWSNAP")
response = self.client.get(reverse("host_detail", args=[host.host]))
self.assertEqual(response.status_code, 200)
self.assertContains(response, "Retention Warnings")
self.assertContains(response, "Scheduled pruning would delete 2 snapshot(s), above max delete")
def test_retention_plan_rejects_invalid_kind(self) -> None: def test_retention_plan_rejects_invalid_kind(self) -> None:
self.client.force_login(self.staff_user) self.client.force_login(self.staff_user)
host = HostConfig.objects.create(host="web-01", address="web-01.example.test") host = HostConfig.objects.create(host="web-01", address="web-01.example.test")

View File

@@ -55,6 +55,7 @@ def dashboard(request):
.first() .first()
) )
host_config.next_run_at = _next_run_for_host(host_config) host_config.next_run_at = _next_run_for_host(host_config)
host_config.retention_warning = _retention_warning_for_host(host_config, _schedule_for_host(host_config))
stats_summary = collect_dashboard_stats(hosts=hosts, global_config=global_config) stats_summary = collect_dashboard_stats(hosts=hosts, global_config=global_config)
context = { context = {
"hosts": hosts, "hosts": hosts,
@@ -274,6 +275,7 @@ def host_detail(request, host: str):
context = { context = {
"host": host_config, "host": host_config,
"schedule": schedule, "schedule": schedule,
"retention_warning": _retention_warning_for_host(host_config, schedule),
"next_run_at": _next_run_for_schedule(schedule, host_config), "next_run_at": _next_run_for_schedule(schedule, host_config),
"scheduler_timezone": timezone.get_current_timezone_name(), "scheduler_timezone": timezone.get_current_timezone_name(),
"discovery": inspect_snapshot_discovery(host=host_config), "discovery": inspect_snapshot_discovery(host=host_config),
@@ -526,17 +528,23 @@ def host_retention_plan(request, host: str):
except PobsyncError as exc: except PobsyncError as exc:
messages.error(request, str(exc)) messages.error(request, str(exc))
return redirect("host_detail", host=host_config.host) return redirect("host_detail", host=host_config.host)
schedule = _schedule_for_host(host_config)
scheduled_prune_limit = schedule.prune_max_delete if schedule and schedule.prune else None
delete_count = len(plan["delete"])
context = { context = {
"host": host_config, "host": host_config,
"kind": kind, "kind": kind,
"protect_bases": protect_bases, "protect_bases": protect_bases,
"plan": plan, "plan": plan,
"schedule": schedule,
"scheduled_prune_limit": scheduled_prune_limit,
"scheduled_prune_exceeded": scheduled_prune_limit is not None and delete_count > scheduled_prune_limit,
"apply_form": RetentionApplyForm( "apply_form": RetentionApplyForm(
host_name=host_config.host, host_name=host_config.host,
initial={ initial={
"kind": kind, "kind": kind,
"protect_bases": protect_bases, "protect_bases": protect_bases,
"max_delete": len(plan["delete"]), "max_delete": delete_count,
}, },
), ),
} }
@@ -652,6 +660,43 @@ def _next_run_for_schedule(schedule: ScheduleConfig | None, host_config: HostCon
return None return None
def _retention_warning_for_host(host_config: HostConfig, schedule: ScheduleConfig | None) -> dict[str, object]:
incomplete_count = host_config.snapshots.filter(kind=SnapshotRecord.Kind.INCOMPLETE).count()
warning: dict[str, object] = {
"has_warning": incomplete_count > 0,
"incomplete_count": incomplete_count,
}
if schedule is None or not schedule.prune or not host_config.enabled:
return warning
try:
plan = run_sql_retention_plan(
host=host_config.host,
kind="scheduled",
protect_bases=bool(schedule.prune_protect_bases),
)
except PobsyncError as exc:
warning.update(
{
"has_warning": True,
"error": str(exc),
}
)
return warning
delete_count = len(plan.get("delete") or [])
warning.update(
{
"delete_count": delete_count,
"max_delete": schedule.prune_max_delete,
"protect_bases": bool(schedule.prune_protect_bases),
"prune_exceeded": delete_count > schedule.prune_max_delete,
}
)
if warning["prune_exceeded"]:
warning["has_warning"] = True
return warning
def _default_schedule_initial() -> dict[str, object]: def _default_schedule_initial() -> dict[str, object]:
return { return {
"cron_expr": "15 2 * * *", "cron_expr": "15 2 * * *",