Merge pull request '(feature) Require review before incomplete cleanup' (#67) from issue-47-reviewed-incomplete-cleanup into master

Reviewed-on: #67
This commit was merged in pull request #67.
This commit is contained in:
2026-05-28 21:08:21 +02:00
5 changed files with 187 additions and 44 deletions

View File

@@ -23,7 +23,7 @@ def run_sql_retention_plan(*, host: str, kind: str, protect_bases: bool) -> dict
host_config = _enabled_host_config(host)
retention = _retention_for_host(host_config)
snapshots = _snapshots_for_retention(host_config=host_config, kind=kind)
incomplete_snapshots = _incomplete_snapshots_for_host(host_config)
incomplete_items = _incomplete_snapshot_items_for_host(host_config)
plan = build_retention_plan(
snapshots=snapshots,
@@ -49,10 +49,9 @@ def run_sql_retention_plan(*, host: str, kind: str, protect_bases: bool) -> dict
"keep": sorted(keep),
"keep_items": [_snapshot_to_item(snapshot, reasons=reasons.get(snapshot.dirname, [])) for snapshot in keep_items],
"delete": [_snapshot_to_item(snapshot, reasons=["outside retention policy"]) for snapshot in delete],
"incomplete": [
_snapshot_to_item(snapshot, reasons=["incomplete snapshot; excluded from retention cleanup"])
for snapshot in incomplete_snapshots
],
"incomplete": incomplete_items,
"incomplete_reviewed_count": sum(1 for item in incomplete_items if item["reviewed"]),
"incomplete_unreviewed_count": sum(1 for item in incomplete_items if not item["reviewed"]),
"reasons": reasons,
}
@@ -164,9 +163,15 @@ def run_incomplete_cleanup(
def _do_cleanup() -> dict[str, Any]:
host_config = _enabled_host_config(host)
unreviewed_count = _unreviewed_incomplete_count(host_config)
if unreviewed_count:
raise ConfigError(
f"Refusing to delete {unreviewed_count} incomplete snapshot(s) that have not been reviewed."
)
incomplete_list = [
_snapshot_to_item(snapshot, reasons=["manual incomplete cleanup"])
for snapshot in _incomplete_snapshots_for_host(host_config)
for snapshot in _reviewed_incomplete_snapshots_for_host(host_config)
]
if max_delete == 0 and len(incomplete_list) > 0:
raise ConfigError("Incomplete cleanup blocked by --max-delete=0")
@@ -253,15 +258,39 @@ def _snapshots_for_retention(*, host_config: HostConfig, kind: str) -> list[Snap
return [_snapshot_from_record(record) for record in records]
def _incomplete_snapshots_for_host(host_config: HostConfig) -> list[Snapshot]:
def _incomplete_snapshot_items_for_host(host_config: HostConfig) -> list[dict[str, Any]]:
records = (
SnapshotRecord.objects.filter(host=host_config, kind=SnapshotRecord.Kind.INCOMPLETE)
.select_related("base")
.order_by("-started_at", "dirname")
)
return [
_snapshot_record_to_item(record, reasons=["incomplete snapshot; excluded from retention cleanup"])
for record in records
]
def _reviewed_incomplete_snapshots_for_host(host_config: HostConfig) -> list[Snapshot]:
records = (
SnapshotRecord.objects.filter(
host=host_config,
kind=SnapshotRecord.Kind.INCOMPLETE,
reviewed_at__isnull=False,
)
.select_related("base")
.order_by("-started_at", "dirname")
)
return [_snapshot_from_record(record) for record in records]
def _unreviewed_incomplete_count(host_config: HostConfig) -> int:
return SnapshotRecord.objects.filter(
host=host_config,
kind=SnapshotRecord.Kind.INCOMPLETE,
reviewed_at__isnull=True,
).count()
def _snapshot_from_record(record: SnapshotRecord) -> Snapshot:
return Snapshot(
kind=record.kind,
@@ -301,6 +330,14 @@ def _snapshot_to_item(snapshot: Snapshot, *, reasons: list[str]) -> dict[str, An
}
def _snapshot_record_to_item(record: SnapshotRecord, *, reasons: list[str]) -> dict[str, Any]:
item = _snapshot_to_item(_snapshot_from_record(record), reasons=reasons)
item["reviewed"] = record.reviewed_at is not None
item["reviewed_at"] = record.reviewed_at.isoformat() if record.reviewed_at else ""
item["reviewed_by"] = record.reviewed_by
return item
def _snapshot_delete_path(*, path: Path, dirname: str) -> Path:
if path.name == "data" and path.parent.name == dirname:
return path.parent

View File

@@ -45,8 +45,9 @@
snapshots automatically because they can indicate an interrupted backup that should be inspected first.
</p>
<p>
After inspection, use the dedicated cleanup form below to delete only incomplete snapshot directories and their
tracking records. Successful scheduled and manual snapshots are not touched by this cleanup.
{{ incomplete_unreviewed_count }} still need review. After inspection, mark them reviewed and use the dedicated
cleanup form below to delete only incomplete snapshot directories and their tracking records. Successful
scheduled and manual snapshots are not touched by this cleanup.
</p>
</section>
{% endif %}
@@ -187,6 +188,7 @@
<th>Dirname</th>
<th>Started</th>
<th>Status</th>
<th>Review</th>
<th>Reason</th>
<th>Path</th>
</tr>
@@ -197,6 +199,14 @@
<td>{{ snapshot.dirname }}</td>
<td>{{ snapshot.dt }}</td>
<td>{{ snapshot.status|default:"" }}</td>
<td>
{% if snapshot.reviewed %}
<span class="status ok">reviewed</span>
<span class="muted">{{ snapshot.reviewed_by|default:"unknown" }}</span>
{% else %}
<span class="status warning">needs review</span>
{% endif %}
</td>
<td>{{ snapshot.reason }}</td>
<td class="muted">{{ snapshot.path }}</td>
</tr>
@@ -205,40 +215,51 @@
</table>
<h3>Cleanup Incomplete Snapshots</h3>
<p class="muted">
This deletes only incomplete snapshot directories and their tracking records. Successful manual and scheduled
snapshots are not touched.
</p>
<form method="post" action="{% url 'cleanup_host_incomplete_snapshots' host.host %}" class="form-grid">
{% csrf_token %}
{{ incomplete_cleanup_form.non_field_errors }}
{% if incomplete_unreviewed_count %}
<p class="muted">
Cleanup is blocked until all incomplete snapshots are reviewed. This extra step makes it explicit that the
interrupted backup was inspected before deletion.
</p>
<form method="post" action="{% url 'resolve_host_incomplete_reviews' host.host %}" class="actions inline">
{% csrf_token %}
<button type="submit" class="secondary">Mark incomplete snapshots reviewed</button>
</form>
{% else %}
<p class="muted">
This deletes only reviewed incomplete snapshot directories and their tracking records. Successful manual and
scheduled snapshots are not touched.
</p>
<form method="post" action="{% url 'cleanup_host_incomplete_snapshots' host.host %}" class="form-grid">
{% csrf_token %}
{{ incomplete_cleanup_form.non_field_errors }}
<div class="field">
{{ incomplete_cleanup_form.max_delete.errors }}
<label for="{{ incomplete_cleanup_form.max_delete.id_for_label }}">Max delete</label>
{{ incomplete_cleanup_form.max_delete }}
<div class="helptext">{{ incomplete_cleanup_form.max_delete.help_text }}</div>
</div>
<div class="field">
{{ incomplete_cleanup_form.max_delete.errors }}
<label for="{{ incomplete_cleanup_form.max_delete.id_for_label }}">Max delete</label>
{{ incomplete_cleanup_form.max_delete }}
<div class="helptext">{{ incomplete_cleanup_form.max_delete.help_text }}</div>
</div>
<div class="field">
{{ incomplete_cleanup_form.confirm_host.errors }}
<label for="{{ incomplete_cleanup_form.confirm_host.id_for_label }}">Confirm host</label>
{{ incomplete_cleanup_form.confirm_host }}
<div class="helptext">{{ incomplete_cleanup_form.confirm_host.help_text }}</div>
</div>
<div class="field">
{{ incomplete_cleanup_form.confirm_host.errors }}
<label for="{{ incomplete_cleanup_form.confirm_host.id_for_label }}">Confirm host</label>
{{ incomplete_cleanup_form.confirm_host }}
<div class="helptext">{{ incomplete_cleanup_form.confirm_host.help_text }}</div>
</div>
<div class="field">
{{ incomplete_cleanup_form.confirm_delete_count.errors }}
<label for="{{ incomplete_cleanup_form.confirm_delete_count.id_for_label }}">Confirm incomplete count</label>
{{ incomplete_cleanup_form.confirm_delete_count }}
<div class="helptext">{{ incomplete_cleanup_form.confirm_delete_count.help_text }}</div>
</div>
<div class="field">
{{ incomplete_cleanup_form.confirm_delete_count.errors }}
<label for="{{ incomplete_cleanup_form.confirm_delete_count.id_for_label }}">Confirm incomplete count</label>
{{ incomplete_cleanup_form.confirm_delete_count }}
<div class="helptext">{{ incomplete_cleanup_form.confirm_delete_count.help_text }}</div>
</div>
<div class="form-actions">
<button type="submit" class="danger">Delete incomplete snapshots</button>
<a class="button-link secondary" href="{% url 'host_retention_plan' host.host %}?kind={{ kind }}">Cancel</a>
</div>
</form>
<div class="form-actions">
<button type="submit" class="danger">Delete incomplete snapshots</button>
<a class="button-link secondary" href="{% url 'host_retention_plan' host.host %}?kind={{ kind }}">Cancel</a>
</div>
</form>
{% endif %}
</section>
{% endif %}
{% endblock %}

View File

@@ -254,6 +254,8 @@ class SqlRetentionTests(TestCase):
path=str(incomplete_dir),
status="failed",
started_at=datetime(2026, 5, 19, 3, 15, tzinfo=timezone.utc),
reviewed_at=datetime(2026, 5, 19, 4, 15, tzinfo=timezone.utc),
reviewed_by="admin",
)
result = run_incomplete_cleanup(
@@ -291,6 +293,8 @@ class SqlRetentionTests(TestCase):
path=str(incomplete_dir),
status="failed",
started_at=datetime(2026, 5, 19, 3, 15, tzinfo=timezone.utc),
reviewed_at=datetime(2026, 5, 19, 4, 15, tzinfo=timezone.utc),
reviewed_by="admin",
)
result = run_incomplete_cleanup(
@@ -305,6 +309,26 @@ class SqlRetentionTests(TestCase):
self.assertFalse(SnapshotRecord.objects.filter(pk=record.pk).exists())
self.assertEqual(result["deleted"][0]["dirname"], incomplete_dir.name)
def test_incomplete_cleanup_requires_reviewed_snapshots(self) -> None:
host = HostConfig.objects.create(host="web-01", address="web-01.example.test")
SnapshotRecord.objects.create(
host=host,
kind=SnapshotRecord.Kind.INCOMPLETE,
dirname="20260519-031500Z__BROKEN01",
path=f"/backups/{host.host}/.incomplete/20260519-031500Z__BROKEN01",
status="failed",
started_at=datetime(2026, 5, 19, 3, 15, tzinfo=timezone.utc),
)
with self.assertRaisesRegex(ConfigError, "have not been reviewed"):
run_incomplete_cleanup(
prefix=Path("/tmp/pobsync-test"),
host=host.host,
yes=True,
max_delete=1,
acquire_lock=False,
)
def test_incomplete_cleanup_respects_max_delete(self) -> None:
host = HostConfig.objects.create(host="web-01", address="web-01.example.test")
SnapshotRecord.objects.create(
@@ -314,6 +338,8 @@ class SqlRetentionTests(TestCase):
path=f"/backups/{host.host}/.incomplete/20260519-031500Z__BROKEN01",
status="failed",
started_at=datetime(2026, 5, 19, 3, 15, tzinfo=timezone.utc),
reviewed_at=datetime(2026, 5, 19, 4, 15, tzinfo=timezone.utc),
reviewed_by="admin",
)
with self.assertRaisesRegex(ConfigError, "blocked by --max-delete=0"):
@@ -334,6 +360,8 @@ class SqlRetentionTests(TestCase):
path=f"/backups/{host.host}/scheduled/20260519-031500Z__BROKEN01",
status="failed",
started_at=datetime(2026, 5, 19, 3, 15, tzinfo=timezone.utc),
reviewed_at=datetime(2026, 5, 19, 4, 15, tzinfo=timezone.utc),
reviewed_by="admin",
)
with self.assertRaisesRegex(ConfigError, "unexpected incomplete snapshot path"):

View File

@@ -2226,9 +2226,33 @@ class ViewTests(TestCase):
self.assertContains(response, "Incomplete Snapshots")
self.assertContains(response, "20260519-031500Z__BROKEN01")
self.assertContains(response, "excluded from retention cleanup")
self.assertContains(response, "needs review")
self.assertContains(response, "Cleanup is blocked until all incomplete snapshots are reviewed.")
self.assertContains(response, "Mark incomplete snapshots reviewed")
self.assertContains(response, "delete only incomplete snapshot directories")
self.assertNotContains(response, "Delete incomplete snapshots")
def test_retention_plan_offers_incomplete_cleanup_after_review(self) -> None:
self.client.force_login(self.staff_user)
host = HostConfig.objects.create(host="web-01", address="web-01.example.test")
SnapshotRecord.objects.create(
host=host,
kind=SnapshotRecord.Kind.INCOMPLETE,
dirname="20260519-031500Z__BROKEN01",
path=f"/backups/{host.host}/.incomplete/20260519-031500Z__BROKEN01",
status="failed",
started_at=datetime(2026, 5, 19, 3, 15, tzinfo=timezone.utc),
reviewed_at=datetime(2026, 5, 19, 4, 15, tzinfo=timezone.utc),
reviewed_by="admin",
)
response = self.client.get(reverse("host_retention_plan", args=[host.host]))
self.assertEqual(response.status_code, 200)
self.assertContains(response, "reviewed")
self.assertContains(response, "Delete incomplete snapshots")
self.assertContains(response, "Type 1 to confirm the current number of incomplete snapshots.")
self.assertContains(response, "This deletes only incomplete snapshot directories")
self.assertContains(response, "This deletes only reviewed incomplete snapshot directories")
self.assertContains(response, 'class="danger"', html=False)
def test_incomplete_cleanup_deletes_incomplete_snapshot_after_confirmation(self) -> None:
@@ -2246,6 +2270,8 @@ class ViewTests(TestCase):
path=str(incomplete_dir),
status="failed",
started_at=datetime(2026, 5, 19, 3, 15, tzinfo=timezone.utc),
reviewed_at=datetime(2026, 5, 19, 4, 15, tzinfo=timezone.utc),
reviewed_by="admin",
)
with override_settings(POBSYNC_HOME=str(home)):
@@ -2291,6 +2317,33 @@ class ViewTests(TestCase):
self.assertContains(response, "Incomplete cleanup confirmation is invalid.")
self.assertEqual(SnapshotRecord.objects.filter(kind=SnapshotRecord.Kind.INCOMPLETE).count(), 1)
def test_incomplete_cleanup_rejects_unreviewed_snapshots(self) -> None:
self.client.force_login(self.staff_user)
host = HostConfig.objects.create(host="web-01", address="web-01.example.test")
SnapshotRecord.objects.create(
host=host,
kind=SnapshotRecord.Kind.INCOMPLETE,
dirname="20260519-031500Z__BROKEN01",
path=f"/backups/{host.host}/.incomplete/20260519-031500Z__BROKEN01",
status="failed",
started_at=datetime(2026, 5, 19, 3, 15, tzinfo=timezone.utc),
)
with TemporaryDirectory() as tmp, override_settings(POBSYNC_HOME=str(Path(tmp) / "home")):
response = self.client.post(
reverse("cleanup_host_incomplete_snapshots", args=[host.host]),
{
"max_delete": "0",
"confirm_host": host.host,
"confirm_delete_count": "0",
},
follow=True,
)
self.assertRedirects(response, reverse("host_retention_plan", args=[host.host]))
self.assertContains(response, "have not been reviewed")
self.assertEqual(SnapshotRecord.objects.filter(kind=SnapshotRecord.Kind.INCOMPLETE).count(), 1)
def test_host_detail_surfaces_retention_warnings(self) -> None:
self.client.force_login(self.staff_user)
host = HostConfig.objects.create(

View File

@@ -933,6 +933,8 @@ def host_retention_plan(request, host: str):
scheduled_prune_limit = schedule.prune_max_delete if schedule and schedule.prune else None
delete_count = len(plan["delete"])
incomplete_count = len(plan["incomplete"])
incomplete_reviewed_count = int(plan.get("incomplete_reviewed_count") or 0)
incomplete_unreviewed_count = int(plan.get("incomplete_unreviewed_count") or 0)
context = {
"host": host_config,
"kind": kind,
@@ -941,6 +943,8 @@ def host_retention_plan(request, host: str):
"schedule": schedule,
"scheduled_prune_limit": scheduled_prune_limit,
"scheduled_prune_exceeded": scheduled_prune_limit is not None and delete_count > scheduled_prune_limit,
"incomplete_reviewed_count": incomplete_reviewed_count,
"incomplete_unreviewed_count": incomplete_unreviewed_count,
"apply_form": RetentionApplyForm(
host_name=host_config.host,
expected_delete_count=delete_count,
@@ -953,10 +957,10 @@ def host_retention_plan(request, host: str):
),
"incomplete_cleanup_form": IncompleteCleanupForm(
host_name=host_config.host,
expected_delete_count=incomplete_count,
expected_delete_count=incomplete_reviewed_count,
initial={
"max_delete": incomplete_count,
"confirm_delete_count": incomplete_count,
"max_delete": incomplete_reviewed_count,
"confirm_delete_count": incomplete_reviewed_count,
},
),
}
@@ -1027,7 +1031,7 @@ def cleanup_host_incomplete_snapshots(request, host: str):
messages.error(request, str(exc))
return redirect("host_retention_plan", host=host_config.host)
incomplete_count = len(plan.get("incomplete") or [])
incomplete_count = int(plan.get("incomplete_reviewed_count") or 0)
form = IncompleteCleanupForm(
request.POST,
host_name=host_config.host,