(release) Add review resolution for operational tasks

Add reviewed state for failed/warning runs and incomplete snapshot records,
then use it to clear dashboard and host “need review” tasks after an operator
has acknowledged them.

Expose Mark reviewed actions on run detail and host retention warnings, keep
reviewed records available for audit/debug, and exclude reviewed problem runs
from operational counts and latest issue summaries.

Refs #19
Refs #8
This commit is contained in:
2026-05-21 03:34:41 +02:00
parent d0c23deb72
commit c2e5a534aa
9 changed files with 222 additions and 9 deletions

View File

@@ -0,0 +1,30 @@
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("pobsync_backend", "0011_remove_globalconfig_data"),
]
operations = [
migrations.AddField(
model_name="backuprun",
name="reviewed_at",
field=models.DateTimeField(blank=True, null=True),
),
migrations.AddField(
model_name="backuprun",
name="reviewed_by",
field=models.CharField(blank=True, max_length=150),
),
migrations.AddField(
model_name="snapshotrecord",
name="reviewed_at",
field=models.DateTimeField(blank=True, null=True),
),
migrations.AddField(
model_name="snapshotrecord",
name="reviewed_by",
field=models.CharField(blank=True, max_length=150),
),
]

View File

@@ -124,6 +124,8 @@ class BackupRun(models.Model):
rsync_exit_code = models.IntegerField(null=True, blank=True)
result = models.JSONField(default=dict, blank=True)
created_at = models.DateTimeField(auto_now_add=True)
reviewed_at = models.DateTimeField(null=True, blank=True)
reviewed_by = models.CharField(max_length=150, blank=True)
class Meta:
ordering = ["-created_at"]
@@ -158,6 +160,8 @@ class SnapshotRecord(models.Model):
ended_at = models.DateTimeField(null=True, blank=True)
metadata = models.JSONField(default=dict, blank=True)
discovered_at = models.DateTimeField(auto_now_add=True)
reviewed_at = models.DateTimeField(null=True, blank=True)
reviewed_by = models.CharField(max_length=150, blank=True)
class Meta:
constraints = [

View File

@@ -94,6 +94,7 @@ def _run_summary(run: BackupRun) -> dict[str, Any]:
"snapshot": run.snapshot,
"snapshot_path": run.snapshot_path,
"status": run.status,
"reviewed_at": run.reviewed_at,
"has_stats": bool(stats),
"duration_seconds": _int_at(stats, "duration_seconds"),
"rsync": stats.get("rsync") if isinstance(stats.get("rsync"), dict) else {},
@@ -130,7 +131,7 @@ def _is_real_run(run: BackupRun) -> bool:
def _first_run_with_status(runs: list[dict[str, Any]], statuses: set[str]) -> dict[str, Any]:
for run in runs:
if run["status"] in statuses:
if run["status"] in statuses and run.get("reviewed_at") is None:
return run
return {}

View File

@@ -68,7 +68,7 @@
{% endif %}
</div>
{% elif counts.hosts %}
<p><span class="status ok">ok</span> No queued, running, warning, or failed runs.</p>
<p><span class="status ok">ok</span> No queued, running, or unreviewed warning/failed runs.</p>
{% else %}
<p class="muted">Add a host to start tracking backup status here.</p>
{% endif %}
@@ -243,6 +243,10 @@
{% endif %}
{% if host.retention_warning.incomplete_count %}
{{ host.retention_warning.incomplete_count }} incomplete snapshot(s) need review.
<form method="post" action="{% url 'resolve_host_incomplete_reviews' host.host %}">
{% csrf_token %}
<button type="submit" class="secondary">Mark reviewed</button>
</form>
{% endif %}
{% if host.retention_warning.error %}
{{ host.retention_warning.error }}

View File

@@ -84,6 +84,10 @@
{{ retention_warning.incomplete_count }} incomplete snapshot(s) exist. Retention does not delete incomplete
snapshots automatically; inspect them before cleanup.
</div>
<form method="post" action="{% url 'resolve_host_incomplete_reviews' host.host %}">
{% csrf_token %}
<button type="submit" class="secondary">Mark incomplete reviewed</button>
</form>
{% endif %}
{% if retention_warning.error %}
<div>{{ retention_warning.error }}</div>

View File

@@ -13,6 +13,14 @@
<button type="submit" class="secondary">Cancel run</button>
</form>
{% endif %}
{% if run.status == "failed" or run.status == "warning" %}
{% if not run.reviewed_at %}
<form method="post" action="{% url 'resolve_run_review' run.id %}">
{% csrf_token %}
<button type="submit" class="secondary">Mark reviewed</button>
</form>
{% endif %}
{% endif %}
</section>
<section class="grid" aria-label="Run summary">
@@ -33,6 +41,16 @@
</section>
{% endif %}
{% if run.reviewed_at %}
<section class="panel highlight success">
<h2>Review</h2>
<div class="stack">
<div><strong>Reviewed:</strong> {{ run.reviewed_at }}</div>
<div><strong>Reviewed by:</strong> {{ run.reviewed_by|default:"unknown" }}</div>
</div>
</section>
{% endif %}
{% if dry_run_summary %}
<section class="panel highlight {{ dry_run_summary.highlight_class }}">
<h2>Dry Run Summary</h2>

View File

@@ -186,7 +186,7 @@ class ViewTests(TestCase):
self.assertEqual(response.status_code, 200)
self.assertContains(response, "Operational Status")
self.assertContains(response, "No queued, running, warning, or failed runs.")
self.assertContains(response, "No queued, running, or unreviewed warning/failed runs.")
def test_dashboard_surfaces_retention_warnings(self) -> None:
self.client.force_login(self.staff_user)
@@ -216,6 +216,30 @@ class ViewTests(TestCase):
self.assertEqual(response.status_code, 200)
self.assertContains(response, "Scheduled prune would delete 2 snapshot(s), above max 1.")
self.assertContains(response, "1 incomplete snapshot(s) need review.")
self.assertContains(response, "Mark reviewed")
def test_dashboard_ignores_reviewed_problem_runs(self) -> None:
self.client.force_login(self.staff_user)
host = HostConfig.objects.create(host="web-01", address="web-01.example.test")
BackupRun.objects.create(
host=host,
status=BackupRun.Status.FAILED,
reviewed_at=datetime(2026, 5, 19, 4, 15, tzinfo=timezone.utc),
reviewed_by="admin",
)
BackupRun.objects.create(
host=host,
status=BackupRun.Status.WARNING,
reviewed_at=datetime(2026, 5, 19, 4, 20, tzinfo=timezone.utc),
reviewed_by="admin",
)
response = self.client.get(reverse("dashboard"))
self.assertEqual(response.status_code, 200)
self.assertContains(response, "No queued, running, or unreviewed warning/failed runs.")
self.assertNotContains(response, "failed 1")
self.assertNotContains(response, "warning 1")
def test_dashboard_links_latest_snapshot_for_each_host(self) -> None:
self.client.force_login(self.staff_user)
@@ -1310,6 +1334,34 @@ class ViewTests(TestCase):
self.assertContains(response, "Incomplete ignored")
self.assertContains(response, "deleted scheduled 20260518-021500Z__OLD")
def test_run_review_action_marks_problem_run_reviewed(self) -> None:
self.client.force_login(self.staff_user)
host = HostConfig.objects.create(host="web-01", address="web-01.example.test")
run = BackupRun.objects.create(host=host, status=BackupRun.Status.FAILED, result={"ok": False})
response = self.client.post(reverse("resolve_run_review", args=[run.id]), follow=True)
run.refresh_from_db()
self.assertIsNotNone(run.reviewed_at)
self.assertEqual(run.reviewed_by, self.staff_user.username)
self.assertRedirects(response, reverse("run_detail", args=[run.id]))
self.assertContains(response, f"Run {run.id} marked reviewed.")
self.assertContains(response, "Review")
self.assertContains(response, self.staff_user.username)
self.assertNotContains(response, "Mark reviewed")
def test_run_review_action_ignores_successful_run(self) -> None:
self.client.force_login(self.staff_user)
host = HostConfig.objects.create(host="web-01", address="web-01.example.test")
run = BackupRun.objects.create(host=host, status=BackupRun.Status.SUCCESS, result={"ok": True})
response = self.client.post(reverse("resolve_run_review", args=[run.id]), follow=True)
run.refresh_from_db()
self.assertIsNone(run.reviewed_at)
self.assertRedirects(response, reverse("run_detail", args=[run.id]))
self.assertContains(response, f"Run {run.id} does not need review.")
def test_run_detail_surfaces_host_retention_warnings(self) -> None:
self.client.force_login(self.staff_user)
host = HostConfig.objects.create(
@@ -1704,6 +1756,46 @@ class ViewTests(TestCase):
self.assertContains(response, "Retention Warnings")
self.assertContains(response, "Scheduled pruning would delete 2 snapshot(s), above max delete")
def test_host_detail_can_mark_incomplete_snapshots_reviewed(self) -> None:
self.client.force_login(self.staff_user)
host = HostConfig.objects.create(host="web-01", address="web-01.example.test")
incomplete = SnapshotRecord.objects.create(
host=host,
kind=SnapshotRecord.Kind.INCOMPLETE,
dirname="20260519-031500Z__BROKEN01",
path=f"/backups/{host.host}/.incomplete/20260519-031500Z__BROKEN01",
status="failed",
started_at=datetime(2026, 5, 19, 3, 15, tzinfo=timezone.utc),
)
response = self.client.post(reverse("resolve_host_incomplete_reviews", args=[host.host]), follow=True)
incomplete.refresh_from_db()
self.assertIsNotNone(incomplete.reviewed_at)
self.assertEqual(incomplete.reviewed_by, self.staff_user.username)
self.assertRedirects(response, reverse("host_detail", args=[host.host]))
self.assertContains(response, "Marked 1 incomplete snapshot(s) reviewed for web-01.")
self.assertNotContains(response, "Retention Warnings")
def test_host_detail_does_not_warn_for_reviewed_incomplete_snapshots(self) -> None:
self.client.force_login(self.staff_user)
host = HostConfig.objects.create(host="web-01", address="web-01.example.test")
SnapshotRecord.objects.create(
host=host,
kind=SnapshotRecord.Kind.INCOMPLETE,
dirname="20260519-031500Z__BROKEN01",
path=f"/backups/{host.host}/.incomplete/20260519-031500Z__BROKEN01",
status="failed",
started_at=datetime(2026, 5, 19, 3, 15, tzinfo=timezone.utc),
reviewed_at=datetime(2026, 5, 19, 4, 15, tzinfo=timezone.utc),
reviewed_by="admin",
)
response = self.client.get(reverse("host_detail", args=[host.host]))
self.assertEqual(response.status_code, 200)
self.assertNotContains(response, "Retention Warnings")
def test_retention_plan_rejects_invalid_kind(self) -> None:
self.client.force_login(self.staff_user)
host = HostConfig.objects.create(host="web-01", address="web-01.example.test")

View File

@@ -53,8 +53,16 @@ def dashboard(request):
run_count=Count("runs", distinct=True),
queued_run_count=Count("runs", filter=Q(runs__status=BackupRun.Status.QUEUED), distinct=True),
running_run_count=Count("runs", filter=Q(runs__status=BackupRun.Status.RUNNING), distinct=True),
warning_run_count=Count("runs", filter=Q(runs__status=BackupRun.Status.WARNING), distinct=True),
failed_run_count=Count("runs", filter=Q(runs__status=BackupRun.Status.FAILED), distinct=True),
warning_run_count=Count(
"runs",
filter=Q(runs__status=BackupRun.Status.WARNING, runs__reviewed_at__isnull=True),
distinct=True,
),
failed_run_count=Count(
"runs",
filter=Q(runs__status=BackupRun.Status.FAILED, runs__reviewed_at__isnull=True),
distinct=True,
),
)
.order_by("host")
)
@@ -83,8 +91,14 @@ def dashboard(request):
"runs": BackupRun.objects.count(),
"queued_runs": BackupRun.objects.filter(status=BackupRun.Status.QUEUED).count(),
"running_runs": BackupRun.objects.filter(status=BackupRun.Status.RUNNING).count(),
"warning_runs": BackupRun.objects.filter(status=BackupRun.Status.WARNING).count(),
"failed_runs": BackupRun.objects.filter(status=BackupRun.Status.FAILED).count(),
"warning_runs": BackupRun.objects.filter(
status=BackupRun.Status.WARNING,
reviewed_at__isnull=True,
).count(),
"failed_runs": BackupRun.objects.filter(
status=BackupRun.Status.FAILED,
reviewed_at__isnull=True,
).count(),
},
}
return render(request, "pobsync_backend/dashboard.html", context)
@@ -331,7 +345,10 @@ def host_detail(request, host: str):
"runs": host_config.runs.count(),
"queued_runs": queued_runs.count(),
"running_runs": running_runs.count(),
"failed_runs": host_config.runs.filter(status=BackupRun.Status.FAILED).count(),
"failed_runs": host_config.runs.filter(
status=BackupRun.Status.FAILED,
reviewed_at__isnull=True,
).count(),
"incomplete_snapshots": host_config.snapshots.filter(kind=SnapshotRecord.Kind.INCOMPLETE).count(),
},
}
@@ -515,6 +532,40 @@ def cancel_run(request, run_id: int):
return redirect("run_detail", run_id=run.id)
@staff_member_required
@require_POST
def resolve_run_review(request, run_id: int):
run = get_object_or_404(BackupRun.objects.select_related("host"), id=run_id)
if run.status not in {BackupRun.Status.FAILED, BackupRun.Status.WARNING}:
messages.warning(request, f"Run {run.id} does not need review.")
return redirect("run_detail", run_id=run.id)
if run.reviewed_at:
messages.info(request, f"Run {run.id} was already marked reviewed.")
return redirect("run_detail", run_id=run.id)
run.reviewed_at = timezone.now()
run.reviewed_by = request.user.get_username()
run.save(update_fields=["reviewed_at", "reviewed_by"])
messages.success(request, f"Run {run.id} marked reviewed.")
return redirect("run_detail", run_id=run.id)
@staff_member_required
@require_POST
def resolve_host_incomplete_reviews(request, host: str):
host_config = get_object_or_404(HostConfig, host=host)
reviewed_count = host_config.snapshots.filter(
kind=SnapshotRecord.Kind.INCOMPLETE,
reviewed_at__isnull=True,
).update(reviewed_at=timezone.now(), reviewed_by=request.user.get_username())
if reviewed_count:
messages.success(request, f"Marked {reviewed_count} incomplete snapshot(s) reviewed for {host_config.host}.")
else:
messages.info(request, f"No incomplete snapshots needed review for {host_config.host}.")
return redirect("host_detail", host=host_config.host)
@staff_member_required
def snapshot_detail(request, snapshot_id: int):
snapshot = get_object_or_404(
@@ -764,7 +815,10 @@ def _next_run_for_schedule(schedule: ScheduleConfig | None, host_config: HostCon
def _retention_warning_for_host(host_config: HostConfig, schedule: ScheduleConfig | None) -> dict[str, object]:
incomplete_count = host_config.snapshots.filter(kind=SnapshotRecord.Kind.INCOMPLETE).count()
incomplete_count = host_config.snapshots.filter(
kind=SnapshotRecord.Kind.INCOMPLETE,
reviewed_at__isnull=True,
).count()
warning: dict[str, object] = {
"has_warning": incomplete_count > 0,
"incomplete_count": incomplete_count,