(ui) Separate healthy and problematic dashboard runs

Split dashboard host cards into last successful backup and latest warning
or failed run so operators can quickly see whether a host is protected even
when recent activity produced an issue.

Also add queued and warning run counts to the dashboard summary metrics.
This commit is contained in:
2026-05-21 01:34:38 +02:00
parent 17215fd191
commit ef1761385e
4 changed files with 54 additions and 7 deletions

View File

@@ -52,9 +52,10 @@ def collect_dashboard_stats(*, hosts: Iterable[HostConfig], global_config: Globa
def collect_host_stats(*, host: HostConfig, limit: int = 8) -> dict[str, Any]: def collect_host_stats(*, host: HostConfig, limit: int = 8) -> dict[str, Any]:
runs = list(host.runs.select_related("snapshot").filter(status__in=_COMPLETED_BACKUP_STATUSES).order_by("-started_at", "-created_at")[:50]) runs = list(host.runs.select_related("snapshot").order_by("-started_at", "-created_at")[:50])
real_runs = [_run_summary(run) for run in runs if _is_real_run(run)] real_runs = [_run_summary(run) for run in runs if _is_real_run(run)]
trend_runs = [run for run in real_runs if run["has_stats"]][:limit] completed_real_runs = [run for run in real_runs if run["status"] in _COMPLETED_BACKUP_STATUSES]
trend_runs = [run for run in completed_real_runs if run["has_stats"]][:limit]
latest_snapshot = host.snapshots.order_by("-started_at", "-discovered_at", "-id").first() latest_snapshot = host.snapshots.order_by("-started_at", "-discovered_at", "-id").first()
latest_snapshot_stats = _snapshot_summary(latest_snapshot) if latest_snapshot else {} latest_snapshot_stats = _snapshot_summary(latest_snapshot) if latest_snapshot else {}
@@ -67,7 +68,9 @@ def collect_host_stats(*, host: HostConfig, limit: int = 8) -> dict[str, Any]:
return { return {
"runs": [_with_bar_percentages(run, max_literal=max_literal, max_matched=max_matched) for run in trend_runs], "runs": [_with_bar_percentages(run, max_literal=max_literal, max_matched=max_matched) for run in trend_runs],
"latest_run": real_runs[0] if real_runs else {}, "latest_run": completed_real_runs[0] if completed_real_runs else {},
"latest_good_run": _first_run_with_status(real_runs, {BackupRun.Status.SUCCESS}),
"latest_problem_run": _first_run_with_status(real_runs, {BackupRun.Status.WARNING, BackupRun.Status.FAILED}),
"latest_snapshot": latest_snapshot_stats, "latest_snapshot": latest_snapshot_stats,
"avg_literal_data_bytes": _average(literal_values), "avg_literal_data_bytes": _average(literal_values),
"avg_daily_literal_data_bytes": _average_daily_literal(trend_runs), "avg_daily_literal_data_bytes": _average_daily_literal(trend_runs),
@@ -87,6 +90,7 @@ def _run_summary(run: BackupRun) -> dict[str, Any]:
"ended_at": run.ended_at, "ended_at": run.ended_at,
"snapshot": run.snapshot, "snapshot": run.snapshot,
"snapshot_path": run.snapshot_path, "snapshot_path": run.snapshot_path,
"status": run.status,
"has_stats": bool(stats), "has_stats": bool(stats),
"duration_seconds": _int_at(stats, "duration_seconds"), "duration_seconds": _int_at(stats, "duration_seconds"),
"rsync": stats.get("rsync") if isinstance(stats.get("rsync"), dict) else {}, "rsync": stats.get("rsync") if isinstance(stats.get("rsync"), dict) else {},
@@ -121,6 +125,13 @@ def _is_real_run(run: BackupRun) -> bool:
return requested.get("dry_run") is not True return requested.get("dry_run") is not True
def _first_run_with_status(runs: list[dict[str, Any]], statuses: set[str]) -> dict[str, Any]:
for run in runs:
if run["status"] in statuses:
return run
return {}
def _capacity_from_system(global_config: GlobalConfig | None) -> dict[str, Any]: def _capacity_from_system(global_config: GlobalConfig | None) -> dict[str, Any]:
if global_config is None or not global_config.backup_root: if global_config is None or not global_config.backup_root:
return {} return {}

View File

@@ -32,7 +32,9 @@
<div class="metric"><div class="label">Schedules</div><div class="value">{{ counts.enabled_schedules }}/{{ counts.schedules }}</div></div> <div class="metric"><div class="label">Schedules</div><div class="value">{{ counts.enabled_schedules }}/{{ counts.schedules }}</div></div>
<div class="metric"><div class="label">Snapshots</div><div class="value">{{ counts.snapshots }}</div></div> <div class="metric"><div class="label">Snapshots</div><div class="value">{{ counts.snapshots }}</div></div>
<div class="metric"><div class="label">Runs</div><div class="value">{{ counts.runs }}</div></div> <div class="metric"><div class="label">Runs</div><div class="value">{{ counts.runs }}</div></div>
<div class="metric"><div class="label">Queued</div><div class="value">{{ counts.queued_runs }}</div></div>
<div class="metric"><div class="label">Running</div><div class="value">{{ counts.running_runs }}</div></div> <div class="metric"><div class="label">Running</div><div class="value">{{ counts.running_runs }}</div></div>
<div class="metric"><div class="label">Warnings</div><div class="value">{{ counts.warning_runs }}</div></div>
<div class="metric"><div class="label">Failed</div><div class="value">{{ counts.failed_runs }}</div></div> <div class="metric"><div class="label">Failed</div><div class="value">{{ counts.failed_runs }}</div></div>
</section> </section>
@@ -77,11 +79,23 @@
</div> </div>
</div> </div>
<div class="host-card-item"> <div class="host-card-item">
<div class="label">Latest Run</div> <div class="label">Last Good Backup</div>
<div class="value"> <div class="value">
{% if host.stats_summary.latest_run.id %} {% if host.stats_summary.latest_good_run.id %}
<a href="{% url 'run_detail' host.stats_summary.latest_run.id %}">Run {{ host.stats_summary.latest_run.id }}</a> <a href="{% url 'run_detail' host.stats_summary.latest_good_run.id %}">Run {{ host.stats_summary.latest_good_run.id }}</a>
<div class="muted">{{ host.stats_summary.latest_run.run_type }} {{ host.stats_summary.latest_run.duration_seconds|default:"" }}{% if host.stats_summary.latest_run.duration_seconds is not None %}s{% endif %}</div> <div class="muted">{{ host.stats_summary.latest_good_run.run_type }} {{ host.stats_summary.latest_good_run.duration_seconds|default:"" }}{% if host.stats_summary.latest_good_run.duration_seconds is not None %}s{% endif %}</div>
{% else %}
<span class="muted">none</span>
{% endif %}
</div>
</div>
<div class="host-card-item">
<div class="label">Latest Issue</div>
<div class="value">
{% if host.stats_summary.latest_problem_run.id %}
<a href="{% url 'run_detail' host.stats_summary.latest_problem_run.id %}">Run {{ host.stats_summary.latest_problem_run.id }}</a>
<div><span class="status {{ host.stats_summary.latest_problem_run.status }}">{{ host.stats_summary.latest_problem_run.status }}</span></div>
<div class="muted">{{ host.stats_summary.latest_problem_run.run_type }} {{ host.stats_summary.latest_problem_run.duration_seconds|default:"" }}{% if host.stats_summary.latest_problem_run.duration_seconds is not None %}s{% endif %}</div>
{% else %} {% else %}
<span class="muted">none</span> <span class="muted">none</span>
{% endif %} {% endif %}

View File

@@ -42,6 +42,19 @@ class ViewTests(TestCase):
snapshot=snapshot, snapshot=snapshot,
started_at=datetime(2026, 5, 19, 2, 15, tzinfo=timezone.utc), started_at=datetime(2026, 5, 19, 2, 15, tzinfo=timezone.utc),
) )
warning_run = BackupRun.objects.create(
host=host,
run_type=BackupRun.RunType.SCHEDULED,
status=BackupRun.Status.WARNING,
started_at=datetime(2026, 5, 19, 3, 15, tzinfo=timezone.utc),
result={
"ok": True,
"prune": {
"ok": False,
"error": "Retention warning",
},
},
)
response = self.client.get(reverse("dashboard")) response = self.client.get(reverse("dashboard"))
@@ -50,8 +63,13 @@ class ViewTests(TestCase):
self.assertContains(response, "web-01") self.assertContains(response, "web-01")
self.assertContains(response, "20260519-021500Z__ABCDEFGH") self.assertContains(response, "20260519-021500Z__ABCDEFGH")
self.assertContains(response, "success") self.assertContains(response, "success")
self.assertContains(response, "Last Good Backup")
self.assertContains(response, "Latest Issue")
self.assertContains(response, f"Run {run.id}") self.assertContains(response, f"Run {run.id}")
self.assertContains(response, f"Run {warning_run.id}")
self.assertContains(response, "warning")
self.assertContains(response, "manual") self.assertContains(response, "manual")
self.assertContains(response, "scheduled")
def test_dashboard_renders_backup_trend_summary(self) -> None: def test_dashboard_renders_backup_trend_summary(self) -> None:
self.client.force_login(self.staff_user) self.client.force_login(self.staff_user)
@@ -92,6 +110,8 @@ class ViewTests(TestCase):
self.assertContains(response, "Runs Until Full") self.assertContains(response, "Runs Until Full")
self.assertContains(response, "Avg Daily New") self.assertContains(response, "Avg Daily New")
self.assertContains(response, "Days Until Full") self.assertContains(response, "Days Until Full")
self.assertContains(response, "Warnings")
self.assertContains(response, "Queued")
self.assertContains(response, "Next Run") self.assertContains(response, "Next Run")
self.assertContains(response, "UTC") self.assertContains(response, "UTC")
self.assertContains(response, "10") self.assertContains(response, "10")

View File

@@ -71,7 +71,9 @@ def dashboard(request):
"enabled_schedules": ScheduleConfig.objects.filter(enabled=True).count(), "enabled_schedules": ScheduleConfig.objects.filter(enabled=True).count(),
"snapshots": SnapshotRecord.objects.count(), "snapshots": SnapshotRecord.objects.count(),
"runs": BackupRun.objects.count(), "runs": BackupRun.objects.count(),
"queued_runs": BackupRun.objects.filter(status=BackupRun.Status.QUEUED).count(),
"running_runs": BackupRun.objects.filter(status=BackupRun.Status.RUNNING).count(), "running_runs": BackupRun.objects.filter(status=BackupRun.Status.RUNNING).count(),
"warning_runs": BackupRun.objects.filter(status=BackupRun.Status.WARNING).count(),
"failed_runs": BackupRun.objects.filter(status=BackupRun.Status.FAILED).count(), "failed_runs": BackupRun.objects.filter(status=BackupRun.Status.FAILED).count(),
}, },
} }