(ui) Separate healthy and problematic dashboard runs

Split dashboard host cards into last successful backup and latest warning or failed run so operators can quickly see whether a host is protected even when recent activity produced an issue. Also add queued and warning run counts to the dashboard summary metrics.
2026-05-21 01:34:38 +02:00
parent 17215fd191
commit ef1761385e
4 changed files with 54 additions and 7 deletions
--- a/src/pobsync_backend/stats_summary.py
+++ b/src/pobsync_backend/stats_summary.py
@@ -52,9 +52,10 @@ def collect_dashboard_stats(*, hosts: Iterable[HostConfig], global_config: Globa
 def collect_host_stats(*, host: HostConfig, limit: int = 8) -> dict[str, Any]:
-    runs = list(host.runs.select_related("snapshot").filter(status__in=_COMPLETED_BACKUP_STATUSES).order_by("-started_at", "-created_at")[:50])
+    runs = list(host.runs.select_related("snapshot").order_by("-started_at", "-created_at")[:50])
    real_runs = [_run_summary(run) for run in runs if _is_real_run(run)]
-    trend_runs = [run for run in real_runs if run["has_stats"]][:limit]
+    completed_real_runs = [run for run in real_runs if run["status"] in _COMPLETED_BACKUP_STATUSES]
    trend_runs = [run for run in completed_real_runs if run["has_stats"]][:limit]
    latest_snapshot = host.snapshots.order_by("-started_at", "-discovered_at", "-id").first()
    latest_snapshot_stats = _snapshot_summary(latest_snapshot) if latest_snapshot else {}
@@ -67,7 +68,9 @@ def collect_host_stats(*, host: HostConfig, limit: int = 8) -> dict[str, Any]:
    return {
        "runs": [_with_bar_percentages(run, max_literal=max_literal, max_matched=max_matched) for run in trend_runs],
-        "latest_run": real_runs[0] if real_runs else {},
+        "latest_run": completed_real_runs[0] if completed_real_runs else {},
        "latest_good_run": _first_run_with_status(real_runs, {BackupRun.Status.SUCCESS}),
        "latest_problem_run": _first_run_with_status(real_runs, {BackupRun.Status.WARNING, BackupRun.Status.FAILED}),
        "latest_snapshot": latest_snapshot_stats,
        "avg_literal_data_bytes": _average(literal_values),
        "avg_daily_literal_data_bytes": _average_daily_literal(trend_runs),
@@ -87,6 +90,7 @@ def _run_summary(run: BackupRun) -> dict[str, Any]:
        "ended_at": run.ended_at,
        "snapshot": run.snapshot,
        "snapshot_path": run.snapshot_path,
        "status": run.status,
        "has_stats": bool(stats),
        "duration_seconds": _int_at(stats, "duration_seconds"),
        "rsync": stats.get("rsync") if isinstance(stats.get("rsync"), dict) else {},
@@ -121,6 +125,13 @@ def _is_real_run(run: BackupRun) -> bool:
    return requested.get("dry_run") is not True
 def _first_run_with_status(runs: list[dict[str, Any]], statuses: set[str]) -> dict[str, Any]:
    for run in runs:
        if run["status"] in statuses:
            return run
    return {}
 def _capacity_from_system(global_config: GlobalConfig | None) -> dict[str, Any]:
    if global_config is None or not global_config.backup_root:
        return {}
--- a/src/pobsync_backend/templates/pobsync_backend/dashboard.html
+++ b/src/pobsync_backend/templates/pobsync_backend/dashboard.html
@@ -32,7 +32,9 @@
    <div class="metric"><div class="label">Schedules</div><div class="value">{{ counts.enabled_schedules }}/{{ counts.schedules }}</div></div>
    <div class="metric"><div class="label">Snapshots</div><div class="value">{{ counts.snapshots }}</div></div>
    <div class="metric"><div class="label">Runs</div><div class="value">{{ counts.runs }}</div></div>
    <div class="metric"><div class="label">Queued</div><div class="value">{{ counts.queued_runs }}</div></div>
    <div class="metric"><div class="label">Running</div><div class="value">{{ counts.running_runs }}</div></div>
    <div class="metric"><div class="label">Warnings</div><div class="value">{{ counts.warning_runs }}</div></div>
    <div class="metric"><div class="label">Failed</div><div class="value">{{ counts.failed_runs }}</div></div>
  </section>
@@ -77,11 +79,23 @@
                </div>
              </div>
              <div class="host-card-item">
-                <div class="label">Latest Run</div>
+                <div class="label">Last Good Backup</div>
                <div class="value">
-                {% if host.stats_summary.latest_run.id %}
+                {% if host.stats_summary.latest_good_run.id %}
-                  <a href="{% url 'run_detail' host.stats_summary.latest_run.id %}">Run {{ host.stats_summary.latest_run.id }}</a>
+                  <a href="{% url 'run_detail' host.stats_summary.latest_good_run.id %}">Run {{ host.stats_summary.latest_good_run.id }}</a>
-                  <div class="muted">{{ host.stats_summary.latest_run.run_type }} {{ host.stats_summary.latest_run.duration_seconds|default:"" }}{% if host.stats_summary.latest_run.duration_seconds is not None %}s{% endif %}</div>
+                  <div class="muted">{{ host.stats_summary.latest_good_run.run_type }} {{ host.stats_summary.latest_good_run.duration_seconds|default:"" }}{% if host.stats_summary.latest_good_run.duration_seconds is not None %}s{% endif %}</div>
                {% else %}
                  <span class="muted">none</span>
                {% endif %}
                </div>
              </div>
              <div class="host-card-item">
                <div class="label">Latest Issue</div>
                <div class="value">
                {% if host.stats_summary.latest_problem_run.id %}
                  <a href="{% url 'run_detail' host.stats_summary.latest_problem_run.id %}">Run {{ host.stats_summary.latest_problem_run.id }}</a>
                  <div><span class="status {{ host.stats_summary.latest_problem_run.status }}">{{ host.stats_summary.latest_problem_run.status }}</span></div>
                  <div class="muted">{{ host.stats_summary.latest_problem_run.run_type }} {{ host.stats_summary.latest_problem_run.duration_seconds|default:"" }}{% if host.stats_summary.latest_problem_run.duration_seconds is not None %}s{% endif %}</div>
                {% else %}
                  <span class="muted">none</span>
                {% endif %}
--- a/src/pobsync_backend/tests/test_views.py
+++ b/src/pobsync_backend/tests/test_views.py
@@ -42,6 +42,19 @@ class ViewTests(TestCase):
            snapshot=snapshot,
            started_at=datetime(2026, 5, 19, 2, 15, tzinfo=timezone.utc),
        )
        warning_run = BackupRun.objects.create(
            host=host,
            run_type=BackupRun.RunType.SCHEDULED,
            status=BackupRun.Status.WARNING,
            started_at=datetime(2026, 5, 19, 3, 15, tzinfo=timezone.utc),
            result={
                "ok": True,
                "prune": {
                    "ok": False,
                    "error": "Retention warning",
                },
            },
        )
        response = self.client.get(reverse("dashboard"))
@@ -50,8 +63,13 @@ class ViewTests(TestCase):
        self.assertContains(response, "web-01")
        self.assertContains(response, "20260519-021500Z__ABCDEFGH")
        self.assertContains(response, "success")
        self.assertContains(response, "Last Good Backup")
        self.assertContains(response, "Latest Issue")
        self.assertContains(response, f"Run {run.id}")
        self.assertContains(response, f"Run {warning_run.id}")
        self.assertContains(response, "warning")
        self.assertContains(response, "manual")
        self.assertContains(response, "scheduled")
    def test_dashboard_renders_backup_trend_summary(self) -> None:
        self.client.force_login(self.staff_user)
@@ -92,6 +110,8 @@ class ViewTests(TestCase):
        self.assertContains(response, "Runs Until Full")
        self.assertContains(response, "Avg Daily New")
        self.assertContains(response, "Days Until Full")
        self.assertContains(response, "Warnings")
        self.assertContains(response, "Queued")
        self.assertContains(response, "Next Run")
        self.assertContains(response, "UTC")
        self.assertContains(response, "10")
--- a/src/pobsync_backend/views.py
+++ b/src/pobsync_backend/views.py
@@ -71,7 +71,9 @@ def dashboard(request):
            "enabled_schedules": ScheduleConfig.objects.filter(enabled=True).count(),
            "snapshots": SnapshotRecord.objects.count(),
            "runs": BackupRun.objects.count(),
            "queued_runs": BackupRun.objects.filter(status=BackupRun.Status.QUEUED).count(),
            "running_runs": BackupRun.objects.filter(status=BackupRun.Status.RUNNING).count(),
            "warning_runs": BackupRun.objects.filter(status=BackupRun.Status.WARNING).count(),
            "failed_runs": BackupRun.objects.filter(status=BackupRun.Status.FAILED).count(),
        },
    }