(feature) Improve retention UX and prune safety #14

Merged
parkel merged 5 commits from issue-4-retention-ux-and-safety into master 2026-05-21 01:27:20 +02:00
12 changed files with 506 additions and 18 deletions

View File

@@ -249,12 +249,18 @@ class RetentionApplyForm(forms.Form):
kind = forms.ChoiceField(choices=(("scheduled", "Scheduled"), ("manual", "Manual"), ("all", "All")))
protect_bases = forms.BooleanField(required=False)
max_delete = forms.IntegerField(min_value=0, initial=10)
confirm_delete_count = forms.IntegerField(min_value=0)
confirm_host = forms.CharField()
def __init__(self, *args, host_name: str, **kwargs) -> None:
def __init__(self, *args, host_name: str, expected_delete_count: int | None = None, **kwargs) -> None:
self.host_name = host_name
self.expected_delete_count = expected_delete_count
super().__init__(*args, **kwargs)
self.fields["confirm_host"].help_text = f"Type {host_name} to confirm deletion."
if expected_delete_count is not None:
self.fields["confirm_delete_count"].help_text = (
f"Type {expected_delete_count} to confirm the current number of planned deletions."
)
def clean_confirm_host(self) -> str:
value = self.cleaned_data["confirm_host"].strip()
@@ -262,6 +268,12 @@ class RetentionApplyForm(forms.Form):
raise forms.ValidationError(f"Type {self.host_name} to confirm.")
return value
def clean_confirm_delete_count(self) -> int:
value = self.cleaned_data["confirm_delete_count"]
if self.expected_delete_count is not None and value != self.expected_delete_count:
raise forms.ValidationError(f"Type {self.expected_delete_count} to confirm the delete count.")
return value
class ScheduleConfigForm(forms.ModelForm):
cron_expr = forms.CharField(

View File

@@ -10,7 +10,7 @@ from django.core.management.base import BaseCommand
from django.db import transaction
from django.utils import timezone
from pobsync_backend.models import ScheduleConfig
from pobsync_backend.models import BackupRun, ScheduleConfig
from pobsync_backend.scheduler import due_key, is_due
@@ -52,12 +52,13 @@ class Command(BaseCommand):
if not is_due(schedule.cron_expr, now):
continue
schedule_started_at = timezone.now()
with transaction.atomic():
locked = ScheduleConfig.objects.select_for_update().get(pk=schedule.pk)
if locked.last_due_key == current_due_key:
continue
locked.last_due_key = current_due_key
locked.last_started_at = timezone.now()
locked.last_started_at = schedule_started_at
locked.last_status = "running"
locked.save(update_fields=["last_due_key", "last_started_at", "last_status", "updated_at"])
@@ -72,6 +73,7 @@ class Command(BaseCommand):
prune_max_delete=schedule.prune_max_delete,
prune_protect_bases=schedule.prune_protect_bases,
)
status = _latest_scheduled_run_status(host_id=schedule.host_id, started_at=schedule_started_at) or status
except Exception as exc:
status = "failed"
self.stderr.write(f"{schedule.host.host}: {type(exc).__name__}: {exc}")
@@ -83,3 +85,16 @@ class Command(BaseCommand):
ran += 1
return ran
def _latest_scheduled_run_status(*, host_id: int, started_at) -> str | None:
run = (
BackupRun.objects.filter(
host_id=host_id,
run_type=BackupRun.RunType.SCHEDULED,
created_at__gte=started_at,
)
.order_by("-created_at", "-id")
.first()
)
return run.status if run is not None else None

View File

@@ -23,6 +23,7 @@ def run_sql_retention_plan(*, host: str, kind: str, protect_bases: bool) -> dict
host_config = _enabled_host_config(host)
retention = _retention_for_host(host_config)
snapshots = _snapshots_for_retention(host_config=host_config, kind=kind)
incomplete_snapshots = _incomplete_snapshots_for_host(host_config)
plan = build_retention_plan(
snapshots=snapshots,
@@ -36,6 +37,7 @@ def run_sql_retention_plan(*, host: str, kind: str, protect_bases: bool) -> dict
keep, reasons = apply_base_protection(snapshots=snapshots, keep=keep, reasons=reasons)
delete = [snapshot for snapshot in snapshots if snapshot.dirname not in keep]
keep_items = [snapshot for snapshot in snapshots if snapshot.dirname in keep]
return {
"ok": True,
@@ -45,7 +47,12 @@ def run_sql_retention_plan(*, host: str, kind: str, protect_bases: bool) -> dict
"retention": retention,
"source": "sql",
"keep": sorted(keep),
"delete": [_snapshot_to_delete_item(snapshot) for snapshot in delete],
"keep_items": [_snapshot_to_item(snapshot, reasons=reasons.get(snapshot.dirname, [])) for snapshot in keep_items],
"delete": [_snapshot_to_item(snapshot, reasons=["outside retention policy"]) for snapshot in delete],
"incomplete": [
_snapshot_to_item(snapshot, reasons=["incomplete snapshot; excluded from retention cleanup"])
for snapshot in incomplete_snapshots
],
"reasons": reasons,
}
@@ -71,8 +78,11 @@ def run_sql_retention_apply(
def _do_apply() -> dict[str, Any]:
plan = run_sql_retention_plan(host=host, kind=kind, protect_bases=bool(protect_bases))
delete_list = plan.get("delete") or []
incomplete_list = plan.get("incomplete") or []
if not isinstance(delete_list, list):
raise ConfigError("Invalid retention plan output: delete is not a list")
if not isinstance(incomplete_list, list):
raise ConfigError("Invalid retention plan output: incomplete is not a list")
if max_delete == 0 and len(delete_list) > 0:
raise ConfigError("Deletion blocked by --max-delete=0")
if len(delete_list) > max_delete:
@@ -109,6 +119,8 @@ def run_sql_retention_apply(
"protect_bases": bool(protect_bases),
"max_delete": max_delete,
"source": "sql",
"planned_delete_count": len(delete_list),
"incomplete_ignored_count": len(incomplete_list),
"deleted": deleted,
"actions": actions,
}
@@ -146,6 +158,15 @@ def _snapshots_for_retention(*, host_config: HostConfig, kind: str) -> list[Snap
return [_snapshot_from_record(record) for record in records]
def _incomplete_snapshots_for_host(host_config: HostConfig) -> list[Snapshot]:
records = (
SnapshotRecord.objects.filter(host=host_config, kind=SnapshotRecord.Kind.INCOMPLETE)
.select_related("base")
.order_by("-started_at", "dirname")
)
return [_snapshot_from_record(record) for record in records]
def _snapshot_from_record(record: SnapshotRecord) -> Snapshot:
return Snapshot(
kind=record.kind,
@@ -173,13 +194,15 @@ def _base_meta_from_record(record: SnapshotRecord) -> dict[str, str] | None:
return None
def _snapshot_to_delete_item(snapshot: Snapshot) -> dict[str, Any]:
def _snapshot_to_item(snapshot: Snapshot, *, reasons: list[str]) -> dict[str, Any]:
return {
"dirname": snapshot.dirname,
"kind": snapshot.kind,
"path": snapshot.path,
"dt": snapshot.dt.isoformat(),
"status": snapshot.status,
"reasons": reasons,
"reason": ", ".join(reasons),
}

View File

@@ -231,6 +231,17 @@
.host-card-stat.wide {
grid-column: 1 / -1;
}
.host-card-warning {
background: #fffaf0;
border: 1px solid #e7cf8a;
border-radius: 6px;
color: var(--running);
display: flex;
flex-wrap: wrap;
gap: 8px;
margin-top: 14px;
padding: 10px;
}
.messages { display: grid; gap: 8px; margin-bottom: 18px; }
.message {
background: var(--panel);

View File

@@ -118,6 +118,20 @@
</div>
</div>
</div>
{% if host.retention_warning.has_warning %}
<div class="host-card-warning">
<span class="status warning">retention</span>
{% if host.retention_warning.prune_exceeded %}
Scheduled prune would delete {{ host.retention_warning.delete_count }} snapshot(s), above max {{ host.retention_warning.max_delete }}.
{% endif %}
{% if host.retention_warning.incomplete_count %}
{{ host.retention_warning.incomplete_count }} incomplete snapshot(s) need review.
{% endif %}
{% if host.retention_warning.error %}
{{ host.retention_warning.error }}
{% endif %}
</div>
{% endif %}
</article>
{% empty %}
<p class="muted">No hosts configured yet.</p>

View File

@@ -68,6 +68,30 @@
</section>
</div>
{% if retention_warning.has_warning %}
<section class="panel highlight warning">
<h2>Retention Warnings</h2>
<div class="stack">
{% if retention_warning.prune_exceeded %}
<div>
Scheduled pruning would delete {{ retention_warning.delete_count }} snapshot(s), above max delete
{{ retention_warning.max_delete }}. Scheduled pruning will refuse this plan until the limit or retention
selection is adjusted.
</div>
{% endif %}
{% if retention_warning.incomplete_count %}
<div>
{{ retention_warning.incomplete_count }} incomplete snapshot(s) exist. Retention does not delete incomplete
snapshots automatically; inspect them before cleanup.
</div>
{% endif %}
{% if retention_warning.error %}
<div>{{ retention_warning.error }}</div>
{% endif %}
</div>
</section>
{% endif %}
{% if effective_config %}
<section class="panel">
<h2>Effective Config</h2>

View File

@@ -18,8 +18,31 @@
<div class="metric"><div class="label">Kind</div><div class="value">{{ plan.kind }}</div></div>
<div class="metric"><div class="label">Keep</div><div class="value">{{ plan.keep|length }}</div></div>
<div class="metric"><div class="label">Would Delete</div><div class="value">{{ plan.delete|length }}</div></div>
<div class="metric"><div class="label">Scheduled Limit</div><div class="value">{{ scheduled_prune_limit|default:"none" }}</div></div>
<div class="metric"><div class="label">Incomplete</div><div class="value">{{ plan.incomplete|length }}</div></div>
</section>
{% if scheduled_prune_exceeded %}
<section class="panel highlight warning">
<h2>Scheduled Prune Limit</h2>
<p>
This plan would delete {{ plan.delete|length }} snapshot(s), which exceeds the scheduled prune limit of
{{ scheduled_prune_limit }}. Scheduled pruning will refuse to apply this plan until the limit or retention
selection is adjusted.
</p>
</section>
{% endif %}
{% if plan.incomplete %}
<section class="panel highlight warning">
<h2>Incomplete Snapshots</h2>
<p>
{{ plan.incomplete|length }} incomplete snapshot(s) exist for this host. Retention does not delete incomplete
snapshots automatically because they can indicate an interrupted backup that should be inspected first.
</p>
</section>
{% endif %}
<section class="panel">
<h2>Policy</h2>
<div class="stack">
@@ -28,6 +51,17 @@
<div><strong>Monthly:</strong> {{ plan.retention.monthly }}</div>
<div><strong>Yearly:</strong> {{ plan.retention.yearly }}</div>
<div><strong>Protect bases:</strong> {{ protect_bases|yesno:"yes,no" }}</div>
<div class="muted">
{% if protect_bases %}
Base snapshots referenced by kept snapshots are also kept and marked with a base-of reason.
{% else %}
Base snapshots are only kept when they match the regular retention policy.
{% endif %}
</div>
{% if schedule %}
<div><strong>Schedule pruning:</strong> {{ schedule.prune|yesno:"enabled,disabled" }}</div>
<div><strong>Schedule max delete:</strong> {{ schedule.prune_max_delete }}</div>
{% endif %}
</div>
</section>
@@ -40,6 +74,7 @@
<th>Dirname</th>
<th>Started</th>
<th>Status</th>
<th>Reason</th>
<th>Path</th>
</tr>
</thead>
@@ -50,10 +85,11 @@
<td>{{ snapshot.dirname }}</td>
<td>{{ snapshot.dt }}</td>
<td>{{ snapshot.status|default:"" }}</td>
<td>{{ snapshot.reason }}</td>
<td class="muted">{{ snapshot.path }}</td>
</tr>
{% empty %}
<tr><td colspan="5" class="muted">Retention would not delete snapshots for this selection.</td></tr>
<tr><td colspan="6" class="muted">Retention would not delete snapshots for this selection.</td></tr>
{% endfor %}
</tbody>
</table>
@@ -71,7 +107,7 @@
{{ apply_form.max_delete.errors }}
<label for="{{ apply_form.max_delete.id_for_label }}">Max delete</label>
{{ apply_form.max_delete }}
<div class="helptext">Must be at least the number of snapshots shown in Would Delete.</div>
<div class="helptext">Must be at least {{ plan.delete|length }} for the snapshots shown in Would Delete.</div>
</div>
<div class="field">
@@ -87,6 +123,13 @@
<div class="helptext">{{ apply_form.confirm_host.help_text }}</div>
</div>
<div class="field">
{{ apply_form.confirm_delete_count.errors }}
<label for="{{ apply_form.confirm_delete_count.id_for_label }}">Confirm delete count</label>
{{ apply_form.confirm_delete_count }}
<div class="helptext">{{ apply_form.confirm_delete_count.help_text }}</div>
</div>
<div class="actions">
<button type="submit">Apply retention</button>
</div>
@@ -99,20 +142,54 @@
<table>
<thead>
<tr>
<th>Kind</th>
<th>Dirname</th>
<th>Started</th>
<th>Status</th>
<th>Reasons</th>
</tr>
</thead>
<tbody>
{% for dirname, reasons in plan.reasons.items %}
{% for snapshot in plan.keep_items %}
<tr>
<td>{{ dirname }}</td>
<td>{{ reasons|join:", " }}</td>
<td>{{ snapshot.kind }}</td>
<td>{{ snapshot.dirname }}</td>
<td>{{ snapshot.dt }}</td>
<td>{{ snapshot.status|default:"" }}</td>
<td>{{ snapshot.reason }}</td>
</tr>
{% empty %}
<tr><td colspan="2" class="muted">No snapshots matched this retention selection.</td></tr>
<tr><td colspan="5" class="muted">No snapshots matched this retention selection.</td></tr>
{% endfor %}
</tbody>
</table>
</section>
{% if plan.incomplete %}
<section class="panel">
<h2>Incomplete Snapshots</h2>
<table>
<thead>
<tr>
<th>Dirname</th>
<th>Started</th>
<th>Status</th>
<th>Reason</th>
<th>Path</th>
</tr>
</thead>
<tbody>
{% for snapshot in plan.incomplete %}
<tr>
<td>{{ snapshot.dirname }}</td>
<td>{{ snapshot.dt }}</td>
<td>{{ snapshot.status|default:"" }}</td>
<td>{{ snapshot.reason }}</td>
<td class="muted">{{ snapshot.path }}</td>
</tr>
{% endfor %}
</tbody>
</table>
</section>
{% endif %}
{% endblock %}

View File

@@ -172,13 +172,49 @@
<div class="stack">
<div><strong>Status:</strong> {% if prune_result.ok %}ok{% else %}warning{% endif %}</div>
{% if prune_result.source %}<div><strong>Source:</strong> {{ prune_result.source }}</div>{% endif %}
{% if prune_result.kind %}<div><strong>Kind:</strong> {{ prune_result.kind }}</div>{% endif %}
{% if prune_result.planned_delete_count is not None %}<div><strong>Planned deletions:</strong> {{ prune_result.planned_delete_count }}</div>{% endif %}
{% if prune_result.deleted %}<div><strong>Deleted:</strong> {{ prune_result.deleted|length }}</div>{% endif %}
{% if prune_result.max_delete is not None %}<div><strong>Max delete:</strong> {{ prune_result.max_delete }}</div>{% endif %}
{% if prune_result.protect_bases is not None %}<div><strong>Protect bases:</strong> {{ prune_result.protect_bases|yesno:"yes,no" }}</div>{% endif %}
{% if prune_result.incomplete_ignored_count %}<div><strong>Incomplete ignored:</strong> {{ prune_result.incomplete_ignored_count }}</div>{% endif %}
{% if prune_result.actions %}
<div><strong>Actions:</strong></div>
<ul>
{% for action in prune_result.actions %}
<li>{{ action }}</li>
{% endfor %}
</ul>
{% endif %}
{% if prune_result.error %}<div><strong>Error:</strong> {{ prune_result.error }}</div>{% endif %}
{% if prune_result.type %}<div><strong>Type:</strong> {{ prune_result.type }}</div>{% endif %}
</div>
</section>
{% endif %}
{% if retention_warning.has_warning %}
<section class="panel highlight warning">
<h2>Retention Warnings</h2>
<div class="stack">
{% if retention_warning.prune_exceeded %}
<div>
Scheduled pruning for this host would delete {{ retention_warning.delete_count }} snapshot(s), above max
delete {{ retention_warning.max_delete }}.
</div>
{% endif %}
{% if retention_warning.incomplete_count %}
<div>
{{ retention_warning.incomplete_count }} incomplete snapshot(s) exist for this host and are excluded from
retention cleanup.
</div>
{% endif %}
{% if retention_warning.error %}
<div>{{ retention_warning.error }}</div>
{% endif %}
</div>
</section>
{% endif %}
<section class="panel">
<h2>Raw Result</h2>
<pre>{{ result_json }}</pre>

View File

@@ -8,7 +8,7 @@ from zoneinfo import ZoneInfo
from django.test import SimpleTestCase, TestCase
from pobsync_backend.management.commands.run_pobsync_scheduler import Command
from pobsync_backend.models import HostConfig, ScheduleConfig
from pobsync_backend.models import BackupRun, HostConfig, ScheduleConfig
from pobsync_backend.scheduler import due_key, is_due, next_due_after
@@ -64,3 +64,30 @@ class SchedulerCommandTests(TestCase):
self.assertEqual(call.call_count, 1)
schedule = ScheduleConfig.objects.get(host=host)
self.assertEqual(schedule.last_status, "success")
def test_run_due_records_warning_status_from_scheduled_backup_run(self) -> None:
host = HostConfig.objects.create(host="web-01", address="web-01.example.test")
ScheduleConfig.objects.create(host=host, cron_expr="* * * * *", prune=True, prune_max_delete=1)
def create_warning_run(*args, **kwargs) -> None:
BackupRun.objects.create(
host=host,
run_type=BackupRun.RunType.SCHEDULED,
status=BackupRun.Status.WARNING,
result={
"ok": True,
"prune": {
"ok": False,
"type": "ConfigError",
"error": "Refusing to delete 2 snapshots (exceeds --max-delete=1)",
},
},
)
command = Command()
with patch("pobsync_backend.management.commands.run_pobsync_scheduler.call_command", side_effect=create_warning_run):
count = command._run_due(prefix=Path("/opt/pobsync"), dry_run=False)
self.assertEqual(count, 1)
schedule = ScheduleConfig.objects.get(host=host)
self.assertEqual(schedule.last_status, "warning")

View File

@@ -32,7 +32,10 @@ class SqlRetentionTests(TestCase):
self.assertEqual(plan["source"], "sql")
self.assertEqual(plan["keep"], [new.dirname])
self.assertEqual([item["dirname"] for item in plan["keep_items"]], [new.dirname])
self.assertEqual([item["dirname"] for item in plan["delete"]], [old.dirname])
self.assertEqual(plan["delete"][0]["reason"], "outside retention policy")
self.assertEqual(plan["incomplete"], [])
def test_plan_can_protect_base_snapshot_from_sql_relation(self) -> None:
host = HostConfig.objects.create(
@@ -85,6 +88,9 @@ class SqlRetentionTests(TestCase):
self.assertTrue(SnapshotRecord.objects.filter(pk=new.pk).exists())
self.assertFalse(SnapshotRecord.objects.filter(pk=old.pk).exists())
self.assertEqual(result["deleted"], [{"dirname": old.dirname, "kind": "scheduled", "path": str(old_dir)}])
self.assertEqual(result["planned_delete_count"], 1)
self.assertEqual(result["max_delete"], 1)
self.assertEqual(result["incomplete_ignored_count"], 0)
def test_apply_deletes_snapshot_with_readonly_data_directory(self) -> None:
with TemporaryDirectory() as tmp:

View File

@@ -99,6 +99,35 @@ class ViewTests(TestCase):
self.assertContains(response, "manual")
self.assertContains(response, "1000")
def test_dashboard_surfaces_retention_warnings(self) -> None:
self.client.force_login(self.staff_user)
host = HostConfig.objects.create(
host="web-01",
address="web-01.example.test",
retention_daily=0,
retention_weekly=0,
retention_monthly=0,
retention_yearly=0,
)
ScheduleConfig.objects.create(host=host, cron_expr="15 2 * * *", enabled=True, prune=True, prune_max_delete=1)
self._snapshot(host, "20260517-021500Z__OLDSNP1")
self._snapshot(host, "20260518-021500Z__OLDSNP2")
self._snapshot(host, "20260519-021500Z__NEWSNAP")
SnapshotRecord.objects.create(
host=host,
kind=SnapshotRecord.Kind.INCOMPLETE,
dirname="20260519-031500Z__BROKEN01",
path=f"/backups/{host.host}/.incomplete/20260519-031500Z__BROKEN01",
status="failed",
started_at=datetime(2026, 5, 19, 3, 15, tzinfo=timezone.utc),
)
response = self.client.get(reverse("dashboard"))
self.assertEqual(response.status_code, 200)
self.assertContains(response, "Scheduled prune would delete 2 snapshot(s), above max 1.")
self.assertContains(response, "1 incomplete snapshot(s) need review.")
def test_dashboard_links_latest_snapshot_for_each_host(self) -> None:
self.client.force_login(self.staff_user)
host = HostConfig.objects.create(host="web-01", address="web-01.example.test")
@@ -1168,9 +1197,15 @@ class ViewTests(TestCase):
"hint": "Check network connectivity.",
},
"prune": {
"ok": False,
"type": "ConfigError",
"error": "Deletion blocked by --max-delete=0",
"ok": True,
"source": "sql",
"kind": "scheduled",
"planned_delete_count": 1,
"max_delete": 1,
"protect_bases": True,
"incomplete_ignored_count": 1,
"deleted": [{"dirname": "20260518-021500Z__OLD"}],
"actions": ["deleted scheduled 20260518-021500Z__OLD"],
},
},
)
@@ -1182,7 +1217,42 @@ class ViewTests(TestCase):
self.assertContains(response, "transport")
self.assertContains(response, "Check network connectivity.")
self.assertContains(response, "Retention")
self.assertContains(response, "Deletion blocked by --max-delete=0")
self.assertContains(response, "Planned deletions")
self.assertContains(response, "Max delete")
self.assertContains(response, "Protect bases")
self.assertContains(response, "Incomplete ignored")
self.assertContains(response, "deleted scheduled 20260518-021500Z__OLD")
def test_run_detail_surfaces_host_retention_warnings(self) -> None:
self.client.force_login(self.staff_user)
host = HostConfig.objects.create(
host="web-01",
address="web-01.example.test",
retention_daily=0,
retention_weekly=0,
retention_monthly=0,
retention_yearly=0,
)
ScheduleConfig.objects.create(host=host, cron_expr="15 2 * * *", enabled=True, prune=True, prune_max_delete=1)
self._snapshot(host, "20260517-021500Z__OLDSNP1")
self._snapshot(host, "20260518-021500Z__OLDSNP2")
self._snapshot(host, "20260519-021500Z__NEWSNAP")
SnapshotRecord.objects.create(
host=host,
kind=SnapshotRecord.Kind.INCOMPLETE,
dirname="20260519-031500Z__BROKEN01",
path=f"/backups/{host.host}/.incomplete/20260519-031500Z__BROKEN01",
status="failed",
started_at=datetime(2026, 5, 19, 3, 15, tzinfo=timezone.utc),
)
run = BackupRun.objects.create(host=host, status=BackupRun.Status.SUCCESS, result={"ok": True})
response = self.client.get(reverse("run_detail", args=[run.id]))
self.assertEqual(response.status_code, 200)
self.assertContains(response, "Retention Warnings")
self.assertContains(response, "Scheduled pruning for this host would delete 2 snapshot(s)")
self.assertContains(response, "1 incomplete snapshot(s) exist for this host")
def test_run_detail_infers_rsync_log_from_snapshot_path(self) -> None:
self.client.force_login(self.staff_user)
@@ -1351,6 +1421,32 @@ class ViewTests(TestCase):
self.assertContains(response, new_snapshot.dirname)
self.assertContains(response, "newest")
self.assertContains(response, "Would Delete")
self.assertContains(response, "outside retention policy")
self.assertContains(response, "Confirm delete count")
self.assertContains(response, "Type 1 to confirm the current number of planned deletions.")
def test_retention_plan_warns_when_scheduled_prune_limit_is_exceeded(self) -> None:
self.client.force_login(self.staff_user)
host = HostConfig.objects.create(
host="web-01",
address="web-01.example.test",
retention_daily=0,
retention_weekly=0,
retention_monthly=0,
retention_yearly=0,
)
ScheduleConfig.objects.create(host=host, cron_expr="15 2 * * *", enabled=True, prune=True, prune_max_delete=1)
self._snapshot(host, "20260517-021500Z__OLDSNP1")
self._snapshot(host, "20260518-021500Z__OLDSNP2")
self._snapshot(host, "20260519-021500Z__NEWSNAP")
response = self.client.get(reverse("host_retention_plan", args=[host.host]))
self.assertEqual(response.status_code, 200)
self.assertContains(response, "Scheduled Prune Limit")
self.assertContains(response, "would delete 2 snapshot(s)")
self.assertContains(response, "scheduled prune limit of")
self.assertContains(response, "Schedule max delete:</strong> 1")
def test_retention_plan_can_enable_base_protection(self) -> None:
self.client.force_login(self.staff_user)
@@ -1371,8 +1467,58 @@ class ViewTests(TestCase):
self.assertEqual(response.status_code, 200)
self.assertContains(response, "Protect bases:</strong> yes")
self.assertContains(response, "Base snapshots referenced by kept snapshots")
self.assertContains(response, f"base-of:{child.dirname}")
def test_retention_plan_surfaces_incomplete_snapshots_without_deleting_them(self) -> None:
self.client.force_login(self.staff_user)
host = HostConfig.objects.create(
host="web-01",
address="web-01.example.test",
retention_daily=0,
retention_weekly=0,
retention_monthly=0,
retention_yearly=0,
)
self._snapshot(host, "20260518-021500Z__OLDSNAP")
self._snapshot(host, "20260519-021500Z__NEWSNAP")
SnapshotRecord.objects.create(
host=host,
kind=SnapshotRecord.Kind.INCOMPLETE,
dirname="20260519-031500Z__BROKEN01",
path=f"/backups/{host.host}/.incomplete/20260519-031500Z__BROKEN01",
status="failed",
started_at=datetime(2026, 5, 19, 3, 15, tzinfo=timezone.utc),
)
response = self.client.get(reverse("host_retention_plan", args=[host.host]))
self.assertEqual(response.status_code, 200)
self.assertContains(response, "Incomplete Snapshots")
self.assertContains(response, "20260519-031500Z__BROKEN01")
self.assertContains(response, "excluded from retention cleanup")
def test_host_detail_surfaces_retention_warnings(self) -> None:
self.client.force_login(self.staff_user)
host = HostConfig.objects.create(
host="web-01",
address="web-01.example.test",
retention_daily=0,
retention_weekly=0,
retention_monthly=0,
retention_yearly=0,
)
ScheduleConfig.objects.create(host=host, cron_expr="15 2 * * *", enabled=True, prune=True, prune_max_delete=1)
self._snapshot(host, "20260517-021500Z__OLDSNP1")
self._snapshot(host, "20260518-021500Z__OLDSNP2")
self._snapshot(host, "20260519-021500Z__NEWSNAP")
response = self.client.get(reverse("host_detail", args=[host.host]))
self.assertEqual(response.status_code, 200)
self.assertContains(response, "Retention Warnings")
self.assertContains(response, "Scheduled pruning would delete 2 snapshot(s), above max delete")
def test_retention_plan_rejects_invalid_kind(self) -> None:
self.client.force_login(self.staff_user)
host = HostConfig.objects.create(host="web-01", address="web-01.example.test")
@@ -1412,6 +1558,7 @@ class ViewTests(TestCase):
"kind": "scheduled",
"max_delete": "1",
"confirm_host": host.host,
"confirm_delete_count": "1",
},
follow=True,
)
@@ -1435,6 +1582,7 @@ class ViewTests(TestCase):
"kind": "scheduled",
"max_delete": "1",
"confirm_host": "wrong",
"confirm_delete_count": "1",
},
follow=True,
)
@@ -1443,6 +1591,34 @@ class ViewTests(TestCase):
self.assertContains(response, "Retention apply confirmation is invalid.")
self.assertEqual(SnapshotRecord.objects.count(), 1)
def test_retention_apply_rejects_mismatched_delete_count_confirmation(self) -> None:
self.client.force_login(self.staff_user)
host = HostConfig.objects.create(
host="web-01",
address="web-01.example.test",
retention_daily=0,
retention_weekly=0,
retention_monthly=0,
retention_yearly=0,
)
self._snapshot(host, "20260518-021500Z__OLDSNAP")
self._snapshot(host, "20260519-021500Z__NEWSNAP")
response = self.client.post(
reverse("apply_host_retention", args=[host.host]),
{
"kind": "scheduled",
"max_delete": "1",
"confirm_host": host.host,
"confirm_delete_count": "0",
},
follow=True,
)
self.assertRedirects(response, reverse("host_retention_plan", args=[host.host]))
self.assertContains(response, "Retention apply confirmation is invalid.")
self.assertEqual(SnapshotRecord.objects.count(), 2)
def test_retention_apply_requires_post(self) -> None:
self.client.force_login(self.staff_user)
host = HostConfig.objects.create(host="web-01", address="web-01.example.test")

View File

@@ -55,6 +55,7 @@ def dashboard(request):
.first()
)
host_config.next_run_at = _next_run_for_host(host_config)
host_config.retention_warning = _retention_warning_for_host(host_config, _schedule_for_host(host_config))
stats_summary = collect_dashboard_stats(hosts=hosts, global_config=global_config)
context = {
"hosts": hosts,
@@ -274,6 +275,7 @@ def host_detail(request, host: str):
context = {
"host": host_config,
"schedule": schedule,
"retention_warning": _retention_warning_for_host(host_config, schedule),
"next_run_at": _next_run_for_schedule(schedule, host_config),
"scheduler_timezone": timezone.get_current_timezone_name(),
"discovery": inspect_snapshot_discovery(host=host_config),
@@ -427,6 +429,7 @@ def run_detail(request, run_id: int):
"failure_summary": failure.get("message") or failure.get("summary") or "",
"prune_result": prune_result,
"has_prune_result": bool(prune_result),
"retention_warning": _retention_warning_for_host(run.host, _schedule_for_host(run.host)),
"rsync_log_path": str(rsync_log_path) if rsync_log_path is not None else "",
"rsync_log_exists": bool(rsync_log_path and rsync_log_path.exists()),
"rsync_log_tail": rsync_log_tail,
@@ -526,17 +529,25 @@ def host_retention_plan(request, host: str):
except PobsyncError as exc:
messages.error(request, str(exc))
return redirect("host_detail", host=host_config.host)
schedule = _schedule_for_host(host_config)
scheduled_prune_limit = schedule.prune_max_delete if schedule and schedule.prune else None
delete_count = len(plan["delete"])
context = {
"host": host_config,
"kind": kind,
"protect_bases": protect_bases,
"plan": plan,
"schedule": schedule,
"scheduled_prune_limit": scheduled_prune_limit,
"scheduled_prune_exceeded": scheduled_prune_limit is not None and delete_count > scheduled_prune_limit,
"apply_form": RetentionApplyForm(
host_name=host_config.host,
expected_delete_count=delete_count,
initial={
"kind": kind,
"protect_bases": protect_bases,
"max_delete": len(plan["delete"]),
"max_delete": delete_count,
"confirm_delete_count": delete_count,
},
),
}
@@ -547,7 +558,26 @@ def host_retention_plan(request, host: str):
@require_POST
def apply_host_retention(request, host: str):
host_config = get_object_or_404(HostConfig, host=host)
form = RetentionApplyForm(request.POST, host_name=host_config.host)
raw_kind = request.POST.get("kind", "scheduled")
raw_protect_bases = request.POST.get("protect_bases") in {"1", "true", "on", "yes"}
expected_delete_count = None
if raw_kind in {"scheduled", "manual", "all"}:
try:
plan = run_sql_retention_plan(
host=host_config.host,
kind=raw_kind,
protect_bases=raw_protect_bases,
)
except PobsyncError as exc:
messages.error(request, str(exc))
return redirect("host_retention_plan", host=host_config.host)
expected_delete_count = len(plan.get("delete") or [])
form = RetentionApplyForm(
request.POST,
host_name=host_config.host,
expected_delete_count=expected_delete_count,
)
if not form.is_valid():
messages.error(request, "Retention apply confirmation is invalid.")
return redirect("host_retention_plan", host=host_config.host)
@@ -652,6 +682,43 @@ def _next_run_for_schedule(schedule: ScheduleConfig | None, host_config: HostCon
return None
def _retention_warning_for_host(host_config: HostConfig, schedule: ScheduleConfig | None) -> dict[str, object]:
incomplete_count = host_config.snapshots.filter(kind=SnapshotRecord.Kind.INCOMPLETE).count()
warning: dict[str, object] = {
"has_warning": incomplete_count > 0,
"incomplete_count": incomplete_count,
}
if schedule is None or not schedule.prune or not host_config.enabled:
return warning
try:
plan = run_sql_retention_plan(
host=host_config.host,
kind="scheduled",
protect_bases=bool(schedule.prune_protect_bases),
)
except PobsyncError as exc:
warning.update(
{
"has_warning": True,
"error": str(exc),
}
)
return warning
delete_count = len(plan.get("delete") or [])
warning.update(
{
"delete_count": delete_count,
"max_delete": schedule.prune_max_delete,
"protect_bases": bool(schedule.prune_protect_bases),
"prune_exceeded": delete_count > schedule.prune_max_delete,
}
)
if warning["prune_exceeded"]:
warning["has_warning"] = True
return warning
def _default_schedule_initial() -> dict[str, object]:
return {
"cron_expr": "15 2 * * *",