release-hardening_1.0 #21

Merged
parkel merged 8 commits from issue-8-release-hardening into master 2026-05-21 03:56:25 +02:00
9 changed files with 222 additions and 9 deletions
Showing only changes of commit c2e5a534aa - Show all commits

View File

@@ -0,0 +1,30 @@
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("pobsync_backend", "0011_remove_globalconfig_data"),
]
operations = [
migrations.AddField(
model_name="backuprun",
name="reviewed_at",
field=models.DateTimeField(blank=True, null=True),
),
migrations.AddField(
model_name="backuprun",
name="reviewed_by",
field=models.CharField(blank=True, max_length=150),
),
migrations.AddField(
model_name="snapshotrecord",
name="reviewed_at",
field=models.DateTimeField(blank=True, null=True),
),
migrations.AddField(
model_name="snapshotrecord",
name="reviewed_by",
field=models.CharField(blank=True, max_length=150),
),
]

View File

@@ -124,6 +124,8 @@ class BackupRun(models.Model):
rsync_exit_code = models.IntegerField(null=True, blank=True) rsync_exit_code = models.IntegerField(null=True, blank=True)
result = models.JSONField(default=dict, blank=True) result = models.JSONField(default=dict, blank=True)
created_at = models.DateTimeField(auto_now_add=True) created_at = models.DateTimeField(auto_now_add=True)
reviewed_at = models.DateTimeField(null=True, blank=True)
reviewed_by = models.CharField(max_length=150, blank=True)
class Meta: class Meta:
ordering = ["-created_at"] ordering = ["-created_at"]
@@ -158,6 +160,8 @@ class SnapshotRecord(models.Model):
ended_at = models.DateTimeField(null=True, blank=True) ended_at = models.DateTimeField(null=True, blank=True)
metadata = models.JSONField(default=dict, blank=True) metadata = models.JSONField(default=dict, blank=True)
discovered_at = models.DateTimeField(auto_now_add=True) discovered_at = models.DateTimeField(auto_now_add=True)
reviewed_at = models.DateTimeField(null=True, blank=True)
reviewed_by = models.CharField(max_length=150, blank=True)
class Meta: class Meta:
constraints = [ constraints = [

View File

@@ -94,6 +94,7 @@ def _run_summary(run: BackupRun) -> dict[str, Any]:
"snapshot": run.snapshot, "snapshot": run.snapshot,
"snapshot_path": run.snapshot_path, "snapshot_path": run.snapshot_path,
"status": run.status, "status": run.status,
"reviewed_at": run.reviewed_at,
"has_stats": bool(stats), "has_stats": bool(stats),
"duration_seconds": _int_at(stats, "duration_seconds"), "duration_seconds": _int_at(stats, "duration_seconds"),
"rsync": stats.get("rsync") if isinstance(stats.get("rsync"), dict) else {}, "rsync": stats.get("rsync") if isinstance(stats.get("rsync"), dict) else {},
@@ -130,7 +131,7 @@ def _is_real_run(run: BackupRun) -> bool:
def _first_run_with_status(runs: list[dict[str, Any]], statuses: set[str]) -> dict[str, Any]: def _first_run_with_status(runs: list[dict[str, Any]], statuses: set[str]) -> dict[str, Any]:
for run in runs: for run in runs:
if run["status"] in statuses: if run["status"] in statuses and run.get("reviewed_at") is None:
return run return run
return {} return {}

View File

@@ -68,7 +68,7 @@
{% endif %} {% endif %}
</div> </div>
{% elif counts.hosts %} {% elif counts.hosts %}
<p><span class="status ok">ok</span> No queued, running, warning, or failed runs.</p> <p><span class="status ok">ok</span> No queued, running, or unreviewed warning/failed runs.</p>
{% else %} {% else %}
<p class="muted">Add a host to start tracking backup status here.</p> <p class="muted">Add a host to start tracking backup status here.</p>
{% endif %} {% endif %}
@@ -243,6 +243,10 @@
{% endif %} {% endif %}
{% if host.retention_warning.incomplete_count %} {% if host.retention_warning.incomplete_count %}
{{ host.retention_warning.incomplete_count }} incomplete snapshot(s) need review. {{ host.retention_warning.incomplete_count }} incomplete snapshot(s) need review.
<form method="post" action="{% url 'resolve_host_incomplete_reviews' host.host %}">
{% csrf_token %}
<button type="submit" class="secondary">Mark reviewed</button>
</form>
{% endif %} {% endif %}
{% if host.retention_warning.error %} {% if host.retention_warning.error %}
{{ host.retention_warning.error }} {{ host.retention_warning.error }}

View File

@@ -84,6 +84,10 @@
{{ retention_warning.incomplete_count }} incomplete snapshot(s) exist. Retention does not delete incomplete {{ retention_warning.incomplete_count }} incomplete snapshot(s) exist. Retention does not delete incomplete
snapshots automatically; inspect them before cleanup. snapshots automatically; inspect them before cleanup.
</div> </div>
<form method="post" action="{% url 'resolve_host_incomplete_reviews' host.host %}">
{% csrf_token %}
<button type="submit" class="secondary">Mark incomplete reviewed</button>
</form>
{% endif %} {% endif %}
{% if retention_warning.error %} {% if retention_warning.error %}
<div>{{ retention_warning.error }}</div> <div>{{ retention_warning.error }}</div>

View File

@@ -13,6 +13,14 @@
<button type="submit" class="secondary">Cancel run</button> <button type="submit" class="secondary">Cancel run</button>
</form> </form>
{% endif %} {% endif %}
{% if run.status == "failed" or run.status == "warning" %}
{% if not run.reviewed_at %}
<form method="post" action="{% url 'resolve_run_review' run.id %}">
{% csrf_token %}
<button type="submit" class="secondary">Mark reviewed</button>
</form>
{% endif %}
{% endif %}
</section> </section>
<section class="grid" aria-label="Run summary"> <section class="grid" aria-label="Run summary">
@@ -33,6 +41,16 @@
</section> </section>
{% endif %} {% endif %}
{% if run.reviewed_at %}
<section class="panel highlight success">
<h2>Review</h2>
<div class="stack">
<div><strong>Reviewed:</strong> {{ run.reviewed_at }}</div>
<div><strong>Reviewed by:</strong> {{ run.reviewed_by|default:"unknown" }}</div>
</div>
</section>
{% endif %}
{% if dry_run_summary %} {% if dry_run_summary %}
<section class="panel highlight {{ dry_run_summary.highlight_class }}"> <section class="panel highlight {{ dry_run_summary.highlight_class }}">
<h2>Dry Run Summary</h2> <h2>Dry Run Summary</h2>

View File

@@ -186,7 +186,7 @@ class ViewTests(TestCase):
self.assertEqual(response.status_code, 200) self.assertEqual(response.status_code, 200)
self.assertContains(response, "Operational Status") self.assertContains(response, "Operational Status")
self.assertContains(response, "No queued, running, warning, or failed runs.") self.assertContains(response, "No queued, running, or unreviewed warning/failed runs.")
def test_dashboard_surfaces_retention_warnings(self) -> None: def test_dashboard_surfaces_retention_warnings(self) -> None:
self.client.force_login(self.staff_user) self.client.force_login(self.staff_user)
@@ -216,6 +216,30 @@ class ViewTests(TestCase):
self.assertEqual(response.status_code, 200) self.assertEqual(response.status_code, 200)
self.assertContains(response, "Scheduled prune would delete 2 snapshot(s), above max 1.") self.assertContains(response, "Scheduled prune would delete 2 snapshot(s), above max 1.")
self.assertContains(response, "1 incomplete snapshot(s) need review.") self.assertContains(response, "1 incomplete snapshot(s) need review.")
self.assertContains(response, "Mark reviewed")
def test_dashboard_ignores_reviewed_problem_runs(self) -> None:
self.client.force_login(self.staff_user)
host = HostConfig.objects.create(host="web-01", address="web-01.example.test")
BackupRun.objects.create(
host=host,
status=BackupRun.Status.FAILED,
reviewed_at=datetime(2026, 5, 19, 4, 15, tzinfo=timezone.utc),
reviewed_by="admin",
)
BackupRun.objects.create(
host=host,
status=BackupRun.Status.WARNING,
reviewed_at=datetime(2026, 5, 19, 4, 20, tzinfo=timezone.utc),
reviewed_by="admin",
)
response = self.client.get(reverse("dashboard"))
self.assertEqual(response.status_code, 200)
self.assertContains(response, "No queued, running, or unreviewed warning/failed runs.")
self.assertNotContains(response, "failed 1")
self.assertNotContains(response, "warning 1")
def test_dashboard_links_latest_snapshot_for_each_host(self) -> None: def test_dashboard_links_latest_snapshot_for_each_host(self) -> None:
self.client.force_login(self.staff_user) self.client.force_login(self.staff_user)
@@ -1310,6 +1334,34 @@ class ViewTests(TestCase):
self.assertContains(response, "Incomplete ignored") self.assertContains(response, "Incomplete ignored")
self.assertContains(response, "deleted scheduled 20260518-021500Z__OLD") self.assertContains(response, "deleted scheduled 20260518-021500Z__OLD")
def test_run_review_action_marks_problem_run_reviewed(self) -> None:
self.client.force_login(self.staff_user)
host = HostConfig.objects.create(host="web-01", address="web-01.example.test")
run = BackupRun.objects.create(host=host, status=BackupRun.Status.FAILED, result={"ok": False})
response = self.client.post(reverse("resolve_run_review", args=[run.id]), follow=True)
run.refresh_from_db()
self.assertIsNotNone(run.reviewed_at)
self.assertEqual(run.reviewed_by, self.staff_user.username)
self.assertRedirects(response, reverse("run_detail", args=[run.id]))
self.assertContains(response, f"Run {run.id} marked reviewed.")
self.assertContains(response, "Review")
self.assertContains(response, self.staff_user.username)
self.assertNotContains(response, "Mark reviewed")
def test_run_review_action_ignores_successful_run(self) -> None:
self.client.force_login(self.staff_user)
host = HostConfig.objects.create(host="web-01", address="web-01.example.test")
run = BackupRun.objects.create(host=host, status=BackupRun.Status.SUCCESS, result={"ok": True})
response = self.client.post(reverse("resolve_run_review", args=[run.id]), follow=True)
run.refresh_from_db()
self.assertIsNone(run.reviewed_at)
self.assertRedirects(response, reverse("run_detail", args=[run.id]))
self.assertContains(response, f"Run {run.id} does not need review.")
def test_run_detail_surfaces_host_retention_warnings(self) -> None: def test_run_detail_surfaces_host_retention_warnings(self) -> None:
self.client.force_login(self.staff_user) self.client.force_login(self.staff_user)
host = HostConfig.objects.create( host = HostConfig.objects.create(
@@ -1704,6 +1756,46 @@ class ViewTests(TestCase):
self.assertContains(response, "Retention Warnings") self.assertContains(response, "Retention Warnings")
self.assertContains(response, "Scheduled pruning would delete 2 snapshot(s), above max delete") self.assertContains(response, "Scheduled pruning would delete 2 snapshot(s), above max delete")
def test_host_detail_can_mark_incomplete_snapshots_reviewed(self) -> None:
self.client.force_login(self.staff_user)
host = HostConfig.objects.create(host="web-01", address="web-01.example.test")
incomplete = SnapshotRecord.objects.create(
host=host,
kind=SnapshotRecord.Kind.INCOMPLETE,
dirname="20260519-031500Z__BROKEN01",
path=f"/backups/{host.host}/.incomplete/20260519-031500Z__BROKEN01",
status="failed",
started_at=datetime(2026, 5, 19, 3, 15, tzinfo=timezone.utc),
)
response = self.client.post(reverse("resolve_host_incomplete_reviews", args=[host.host]), follow=True)
incomplete.refresh_from_db()
self.assertIsNotNone(incomplete.reviewed_at)
self.assertEqual(incomplete.reviewed_by, self.staff_user.username)
self.assertRedirects(response, reverse("host_detail", args=[host.host]))
self.assertContains(response, "Marked 1 incomplete snapshot(s) reviewed for web-01.")
self.assertNotContains(response, "Retention Warnings")
def test_host_detail_does_not_warn_for_reviewed_incomplete_snapshots(self) -> None:
self.client.force_login(self.staff_user)
host = HostConfig.objects.create(host="web-01", address="web-01.example.test")
SnapshotRecord.objects.create(
host=host,
kind=SnapshotRecord.Kind.INCOMPLETE,
dirname="20260519-031500Z__BROKEN01",
path=f"/backups/{host.host}/.incomplete/20260519-031500Z__BROKEN01",
status="failed",
started_at=datetime(2026, 5, 19, 3, 15, tzinfo=timezone.utc),
reviewed_at=datetime(2026, 5, 19, 4, 15, tzinfo=timezone.utc),
reviewed_by="admin",
)
response = self.client.get(reverse("host_detail", args=[host.host]))
self.assertEqual(response.status_code, 200)
self.assertNotContains(response, "Retention Warnings")
def test_retention_plan_rejects_invalid_kind(self) -> None: def test_retention_plan_rejects_invalid_kind(self) -> None:
self.client.force_login(self.staff_user) self.client.force_login(self.staff_user)
host = HostConfig.objects.create(host="web-01", address="web-01.example.test") host = HostConfig.objects.create(host="web-01", address="web-01.example.test")

View File

@@ -53,8 +53,16 @@ def dashboard(request):
run_count=Count("runs", distinct=True), run_count=Count("runs", distinct=True),
queued_run_count=Count("runs", filter=Q(runs__status=BackupRun.Status.QUEUED), distinct=True), queued_run_count=Count("runs", filter=Q(runs__status=BackupRun.Status.QUEUED), distinct=True),
running_run_count=Count("runs", filter=Q(runs__status=BackupRun.Status.RUNNING), distinct=True), running_run_count=Count("runs", filter=Q(runs__status=BackupRun.Status.RUNNING), distinct=True),
warning_run_count=Count("runs", filter=Q(runs__status=BackupRun.Status.WARNING), distinct=True), warning_run_count=Count(
failed_run_count=Count("runs", filter=Q(runs__status=BackupRun.Status.FAILED), distinct=True), "runs",
filter=Q(runs__status=BackupRun.Status.WARNING, runs__reviewed_at__isnull=True),
distinct=True,
),
failed_run_count=Count(
"runs",
filter=Q(runs__status=BackupRun.Status.FAILED, runs__reviewed_at__isnull=True),
distinct=True,
),
) )
.order_by("host") .order_by("host")
) )
@@ -83,8 +91,14 @@ def dashboard(request):
"runs": BackupRun.objects.count(), "runs": BackupRun.objects.count(),
"queued_runs": BackupRun.objects.filter(status=BackupRun.Status.QUEUED).count(), "queued_runs": BackupRun.objects.filter(status=BackupRun.Status.QUEUED).count(),
"running_runs": BackupRun.objects.filter(status=BackupRun.Status.RUNNING).count(), "running_runs": BackupRun.objects.filter(status=BackupRun.Status.RUNNING).count(),
"warning_runs": BackupRun.objects.filter(status=BackupRun.Status.WARNING).count(), "warning_runs": BackupRun.objects.filter(
"failed_runs": BackupRun.objects.filter(status=BackupRun.Status.FAILED).count(), status=BackupRun.Status.WARNING,
reviewed_at__isnull=True,
).count(),
"failed_runs": BackupRun.objects.filter(
status=BackupRun.Status.FAILED,
reviewed_at__isnull=True,
).count(),
}, },
} }
return render(request, "pobsync_backend/dashboard.html", context) return render(request, "pobsync_backend/dashboard.html", context)
@@ -331,7 +345,10 @@ def host_detail(request, host: str):
"runs": host_config.runs.count(), "runs": host_config.runs.count(),
"queued_runs": queued_runs.count(), "queued_runs": queued_runs.count(),
"running_runs": running_runs.count(), "running_runs": running_runs.count(),
"failed_runs": host_config.runs.filter(status=BackupRun.Status.FAILED).count(), "failed_runs": host_config.runs.filter(
status=BackupRun.Status.FAILED,
reviewed_at__isnull=True,
).count(),
"incomplete_snapshots": host_config.snapshots.filter(kind=SnapshotRecord.Kind.INCOMPLETE).count(), "incomplete_snapshots": host_config.snapshots.filter(kind=SnapshotRecord.Kind.INCOMPLETE).count(),
}, },
} }
@@ -515,6 +532,40 @@ def cancel_run(request, run_id: int):
return redirect("run_detail", run_id=run.id) return redirect("run_detail", run_id=run.id)
@staff_member_required
@require_POST
def resolve_run_review(request, run_id: int):
run = get_object_or_404(BackupRun.objects.select_related("host"), id=run_id)
if run.status not in {BackupRun.Status.FAILED, BackupRun.Status.WARNING}:
messages.warning(request, f"Run {run.id} does not need review.")
return redirect("run_detail", run_id=run.id)
if run.reviewed_at:
messages.info(request, f"Run {run.id} was already marked reviewed.")
return redirect("run_detail", run_id=run.id)
run.reviewed_at = timezone.now()
run.reviewed_by = request.user.get_username()
run.save(update_fields=["reviewed_at", "reviewed_by"])
messages.success(request, f"Run {run.id} marked reviewed.")
return redirect("run_detail", run_id=run.id)
@staff_member_required
@require_POST
def resolve_host_incomplete_reviews(request, host: str):
host_config = get_object_or_404(HostConfig, host=host)
reviewed_count = host_config.snapshots.filter(
kind=SnapshotRecord.Kind.INCOMPLETE,
reviewed_at__isnull=True,
).update(reviewed_at=timezone.now(), reviewed_by=request.user.get_username())
if reviewed_count:
messages.success(request, f"Marked {reviewed_count} incomplete snapshot(s) reviewed for {host_config.host}.")
else:
messages.info(request, f"No incomplete snapshots needed review for {host_config.host}.")
return redirect("host_detail", host=host_config.host)
@staff_member_required @staff_member_required
def snapshot_detail(request, snapshot_id: int): def snapshot_detail(request, snapshot_id: int):
snapshot = get_object_or_404( snapshot = get_object_or_404(
@@ -764,7 +815,10 @@ def _next_run_for_schedule(schedule: ScheduleConfig | None, host_config: HostCon
def _retention_warning_for_host(host_config: HostConfig, schedule: ScheduleConfig | None) -> dict[str, object]: def _retention_warning_for_host(host_config: HostConfig, schedule: ScheduleConfig | None) -> dict[str, object]:
incomplete_count = host_config.snapshots.filter(kind=SnapshotRecord.Kind.INCOMPLETE).count() incomplete_count = host_config.snapshots.filter(
kind=SnapshotRecord.Kind.INCOMPLETE,
reviewed_at__isnull=True,
).count()
warning: dict[str, object] = { warning: dict[str, object] = {
"has_warning": incomplete_count > 0, "has_warning": incomplete_count > 0,
"incomplete_count": incomplete_count, "incomplete_count": incomplete_count,

View File

@@ -36,6 +36,12 @@ urlpatterns = [
path("runs/<int:run_id>/", views.run_detail, name="run_detail"), path("runs/<int:run_id>/", views.run_detail, name="run_detail"),
path("runs/<int:run_id>/rsync-log/", views.run_rsync_log, name="run_rsync_log"), path("runs/<int:run_id>/rsync-log/", views.run_rsync_log, name="run_rsync_log"),
path("runs/<int:run_id>/cancel/", views.cancel_run, name="cancel_run"), path("runs/<int:run_id>/cancel/", views.cancel_run, name="cancel_run"),
path("runs/<int:run_id>/resolve-review/", views.resolve_run_review, name="resolve_run_review"),
path(
"hosts/<str:host>/resolve-incomplete-reviews/",
views.resolve_host_incomplete_reviews,
name="resolve_host_incomplete_reviews",
),
path("snapshots/<int:snapshot_id>/", views.snapshot_detail, name="snapshot_detail"), path("snapshots/<int:snapshot_id>/", views.snapshot_detail, name="snapshot_detail"),
path("api/", api.api_index), path("api/", api.api_index),
path("api/status/", api.status), path("api/status/", api.status),