(feature) Summarize backup trends in the Django UI

Add a stats summary layer that aggregates recent successful real backup runs
into dashboard and host-level trend metrics.

Show backup-root usage, available space, average new data, average duration,
estimated runs until full, and link-dest savings on the dashboard. Add a host
trend table with recent run duration, file count, new data, matched data, and
snapshot links.

Keep the implementation based on existing run and snapshot stats JSON so the
UI gains useful trend visibility without introducing a schema migration yet.
This commit is contained in:
2026-05-19 22:31:24 +02:00
parent 6940dc55b7
commit fc22842fc4
5 changed files with 298 additions and 3 deletions

View File

@@ -0,0 +1,156 @@
from __future__ import annotations
from pathlib import Path
from typing import Any, Iterable
from pobsync.run_stats import filesystem_capacity
from .models import BackupRun, GlobalConfig, HostConfig, SnapshotRecord
def collect_dashboard_stats(*, hosts: Iterable[HostConfig], global_config: GlobalConfig | None) -> dict[str, Any]:
runs = list(
BackupRun.objects.select_related("host", "snapshot")
.filter(status=BackupRun.Status.SUCCESS)
.order_by("-started_at", "-created_at")[:100]
)
real_runs = [_run_summary(run) for run in runs if _is_real_run(run)]
real_runs = [run for run in real_runs if run["has_stats"]]
for host in hosts:
host.stats_summary = collect_host_stats(host=host)
literal_values = [_int_at(run, "rsync", "literal_data_bytes") for run in real_runs]
literal_values = [value for value in literal_values if value is not None]
matched_values = [_int_at(run, "rsync", "matched_data_bytes") for run in real_runs]
matched_values = [value for value in matched_values if value is not None]
duration_values = [_int_at(run, "duration_seconds") for run in real_runs]
duration_values = [value for value in duration_values if value is not None]
avg_literal = _average(literal_values)
total_literal = sum(literal_values)
total_matched = sum(matched_values)
savings_basis = total_literal + total_matched
capacity = _capacity_from_system(global_config) or _latest_capacity_from_runs(real_runs) or {}
available = _int_at(capacity, "available_bytes")
return {
"runs_sampled": len(real_runs),
"avg_duration_seconds": _average(duration_values),
"avg_literal_data_bytes": avg_literal,
"total_literal_data_bytes": total_literal,
"total_matched_data_bytes": total_matched,
"link_dest_savings_ratio": round(total_matched / savings_basis, 4) if savings_basis else None,
"estimated_runs_until_full": int(available / avg_literal) if available and avg_literal else None,
"capacity": capacity,
}
def collect_host_stats(*, host: HostConfig, limit: int = 8) -> dict[str, Any]:
runs = list(host.runs.select_related("snapshot").filter(status=BackupRun.Status.SUCCESS).order_by("-started_at", "-created_at")[:50])
real_runs = [_run_summary(run) for run in runs if _is_real_run(run)]
real_runs = [run for run in real_runs if run["has_stats"]][:limit]
latest_snapshot = host.snapshots.order_by("-started_at", "-discovered_at", "-id").first()
latest_snapshot_stats = _snapshot_summary(latest_snapshot) if latest_snapshot else {}
literal_values = [_int_at(run, "rsync", "literal_data_bytes") for run in real_runs]
literal_values = [value for value in literal_values if value is not None]
matched_values = [_int_at(run, "rsync", "matched_data_bytes") for run in real_runs]
matched_values = [value for value in matched_values if value is not None]
return {
"runs": real_runs,
"latest_run": real_runs[0] if real_runs else {},
"latest_snapshot": latest_snapshot_stats,
"avg_literal_data_bytes": _average(literal_values),
"total_literal_data_bytes": sum(literal_values),
"total_matched_data_bytes": sum(matched_values),
}
def _run_summary(run: BackupRun) -> dict[str, Any]:
result = run.result if isinstance(run.result, dict) else {}
stats = result.get("stats") if isinstance(result.get("stats"), dict) else {}
return {
"id": run.id,
"host": run.host.host,
"started_at": run.started_at,
"ended_at": run.ended_at,
"snapshot": run.snapshot,
"snapshot_path": run.snapshot_path,
"has_stats": bool(stats),
"duration_seconds": _int_at(stats, "duration_seconds"),
"rsync": stats.get("rsync") if isinstance(stats.get("rsync"), dict) else {},
"storage": stats.get("storage") if isinstance(stats.get("storage"), dict) else {},
}
def _snapshot_summary(snapshot: SnapshotRecord | None) -> dict[str, Any]:
if snapshot is None:
return {}
metadata = snapshot.metadata if isinstance(snapshot.metadata, dict) else {}
stats = metadata.get("stats") if isinstance(metadata.get("stats"), dict) else {}
storage = stats.get("storage") if isinstance(stats.get("storage"), dict) else {}
snapshot_storage = storage.get("snapshot") if isinstance(storage.get("snapshot"), dict) else {}
return {
"id": snapshot.id,
"dirname": snapshot.dirname,
"kind": snapshot.kind,
"status": snapshot.status,
"started_at": snapshot.started_at,
"apparent_size_bytes": _int_at(snapshot_storage, "apparent_size_bytes"),
"allocated_size_bytes": _int_at(snapshot_storage, "allocated_size_bytes"),
"hardlinked_files": _int_at(snapshot_storage, "hardlinked_files"),
}
def _is_real_run(run: BackupRun) -> bool:
result = run.result if isinstance(run.result, dict) else {}
if result.get("dry_run") is True:
return False
requested = result.get("requested") if isinstance(result.get("requested"), dict) else {}
return requested.get("dry_run") is not True
def _capacity_from_system(global_config: GlobalConfig | None) -> dict[str, Any]:
if global_config is None or not global_config.backup_root:
return {}
return filesystem_capacity(Path(global_config.backup_root))
def _latest_capacity_from_runs(runs: list[dict[str, Any]]) -> dict[str, Any]:
for run in runs:
capacity = _dict_at(run, "storage", "capacity")
if capacity:
return capacity
return {}
def _average(values: list[int]) -> int | None:
if not values:
return None
return int(sum(values) / len(values))
def _dict_at(data: dict[str, Any], *keys: str) -> dict[str, Any]:
value: Any = data
for key in keys:
if not isinstance(value, dict):
return {}
value = value.get(key)
return value if isinstance(value, dict) else {}
def _int_at(data: dict[str, Any], *keys: str) -> int | None:
value: Any = data
for key in keys:
if not isinstance(value, dict):
return None
value = value.get(key)
if isinstance(value, bool):
return None
if isinstance(value, int):
return value
if isinstance(value, float):
return int(value)
return None

View File

@@ -37,6 +37,17 @@
<div class="metric"><div class="label">Failed</div><div class="value">{{ counts.failed_runs }}</div></div> <div class="metric"><div class="label">Failed</div><div class="value">{{ counts.failed_runs }}</div></div>
</section> </section>
{% if stats_summary.runs_sampled %}
<section class="grid" aria-label="Backup trends">
<div class="metric"><div class="label">Backup Root Used</div><div class="value">{{ stats_summary.capacity.used_percent|default:"" }}{% if stats_summary.capacity.used_percent is not None %}%{% endif %}</div></div>
<div class="metric"><div class="label">Available</div><div class="value">{{ stats_summary.capacity.available_bytes|filesizeformat }}</div></div>
<div class="metric"><div class="label">Avg New Data</div><div class="value">{{ stats_summary.avg_literal_data_bytes|filesizeformat }}</div></div>
<div class="metric"><div class="label">Avg Duration</div><div class="value">{{ stats_summary.avg_duration_seconds|default:"" }}{% if stats_summary.avg_duration_seconds is not None %}s{% endif %}</div></div>
<div class="metric"><div class="label">Link-Dest Savings</div><div class="value">{{ stats_summary.link_dest_savings_ratio|default:"" }}</div></div>
<div class="metric"><div class="label">Runs Until Full</div><div class="value">{{ stats_summary.estimated_runs_until_full|default:"" }}</div></div>
</section>
{% endif %}
<section class="panel"> <section class="panel">
<h2>Hosts</h2> <h2>Hosts</h2>
<table> <table>
@@ -47,6 +58,8 @@
<th>Enabled</th> <th>Enabled</th>
<th>Snapshots</th> <th>Snapshots</th>
<th>Latest Snapshot</th> <th>Latest Snapshot</th>
<th>Latest Run</th>
<th>New Data</th>
<th>Runs</th> <th>Runs</th>
<th>Retention</th> <th>Retention</th>
</tr> </tr>
@@ -66,11 +79,20 @@
<span class="muted">none</span> <span class="muted">none</span>
{% endif %} {% endif %}
</td> </td>
<td>
{% if host.stats_summary.latest_run.id %}
<a href="{% url 'run_detail' host.stats_summary.latest_run.id %}">Run {{ host.stats_summary.latest_run.id }}</a>
<div class="muted">{{ host.stats_summary.latest_run.duration_seconds|default:"" }}{% if host.stats_summary.latest_run.duration_seconds is not None %}s{% endif %}</div>
{% else %}
<span class="muted">none</span>
{% endif %}
</td>
<td>{{ host.stats_summary.latest_run.rsync.literal_data_bytes|filesizeformat }}</td>
<td>{{ host.run_count }}</td> <td>{{ host.run_count }}</td>
<td>d{{ host.retention_daily }} w{{ host.retention_weekly }} m{{ host.retention_monthly }} y{{ host.retention_yearly }}</td> <td>d{{ host.retention_daily }} w{{ host.retention_weekly }} m{{ host.retention_monthly }} y{{ host.retention_yearly }}</td>
</tr> </tr>
{% empty %} {% empty %}
<tr><td colspan="7" class="muted">No hosts configured yet.</td></tr> <tr><td colspan="9" class="muted">No hosts configured yet.</td></tr>
{% endfor %} {% endfor %}
</tbody> </tbody>
</table> </table>

View File

@@ -78,6 +78,44 @@
</div> </div>
</section> </section>
{% if stats_summary.runs %}
<section class="panel">
<h2>Backup Trends</h2>
<section class="grid" aria-label="Host backup trend summary">
<div class="metric"><div class="label">Avg New Data</div><div class="value">{{ stats_summary.avg_literal_data_bytes|filesizeformat }}</div></div>
<div class="metric"><div class="label">Total New Data</div><div class="value">{{ stats_summary.total_literal_data_bytes|filesizeformat }}</div></div>
<div class="metric"><div class="label">Matched Data</div><div class="value">{{ stats_summary.total_matched_data_bytes|filesizeformat }}</div></div>
<div class="metric"><div class="label">Latest Duration</div><div class="value">{{ stats_summary.latest_run.duration_seconds|default:"" }}{% if stats_summary.latest_run.duration_seconds is not None %}s{% endif %}</div></div>
</section>
<table>
<thead>
<tr>
<th>Run</th>
<th>Started</th>
<th>Duration</th>
<th>Files</th>
<th>New Data</th>
<th>Matched</th>
<th>Snapshot</th>
</tr>
</thead>
<tbody>
{% for run in stats_summary.runs %}
<tr>
<td><a href="{% url 'run_detail' run.id %}">Run {{ run.id }}</a></td>
<td>{{ run.started_at|default:"" }}</td>
<td>{{ run.duration_seconds|default:"" }}{% if run.duration_seconds is not None %}s{% endif %}</td>
<td>{{ run.rsync.files_total|default:"" }}</td>
<td>{{ run.rsync.literal_data_bytes|filesizeformat }}</td>
<td>{{ run.rsync.matched_data_bytes|filesizeformat }}</td>
<td>{% if run.snapshot %}<a href="{% url 'snapshot_detail' run.snapshot.id %}">{{ run.snapshot.dirname }}</a>{% else %}<span class="muted">{{ run.snapshot_path }}</span>{% endif %}</td>
</tr>
{% endfor %}
</tbody>
</table>
</section>
{% endif %}
<section class="panel"> <section class="panel">
<h2>Host Check</h2> <h2>Host Check</h2>
<section class="grid" aria-label="Host check summary"> <section class="grid" aria-label="Host check summary">

View File

@@ -35,7 +35,7 @@ class ViewTests(TestCase):
self.client.force_login(self.staff_user) self.client.force_login(self.staff_user)
host = HostConfig.objects.create(host="web-01", address="web-01.example.test") host = HostConfig.objects.create(host="web-01", address="web-01.example.test")
snapshot = self._snapshot(host, "20260519-021500Z__ABCDEFGH") snapshot = self._snapshot(host, "20260519-021500Z__ABCDEFGH")
BackupRun.objects.create( run = BackupRun.objects.create(
host=host, host=host,
status=BackupRun.Status.SUCCESS, status=BackupRun.Status.SUCCESS,
snapshot=snapshot, snapshot=snapshot,
@@ -50,6 +50,45 @@ class ViewTests(TestCase):
self.assertContains(response, "20260519-021500Z__ABCDEFGH") self.assertContains(response, "20260519-021500Z__ABCDEFGH")
self.assertContains(response, "success") self.assertContains(response, "success")
def test_dashboard_renders_backup_trend_summary(self) -> None:
self.client.force_login(self.staff_user)
GlobalConfig.objects.create(name="default", backup_root="/missing-backup-root")
host = HostConfig.objects.create(host="web-01", address="web-01.example.test")
snapshot = self._snapshot(host, "20260519-021500Z__ABCDEFGH")
run = BackupRun.objects.create(
host=host,
status=BackupRun.Status.SUCCESS,
snapshot=snapshot,
started_at=datetime(2026, 5, 19, 2, 15, tzinfo=timezone.utc),
result={
"ok": True,
"dry_run": False,
"stats": {
"duration_seconds": 30,
"rsync": {
"files_total": 100,
"literal_data_bytes": 1000,
"matched_data_bytes": 4000,
},
"storage": {
"capacity": {
"available_bytes": 10_000,
"used_percent": 25.0,
}
},
},
},
)
response = self.client.get(reverse("dashboard"))
self.assertEqual(response.status_code, 200)
self.assertContains(response, "Backup Root Used")
self.assertContains(response, "Runs Until Full")
self.assertContains(response, "10")
self.assertContains(response, f"Run {run.id}")
self.assertContains(response, "1000")
def test_dashboard_links_latest_snapshot_for_each_host(self) -> None: def test_dashboard_links_latest_snapshot_for_each_host(self) -> None:
self.client.force_login(self.staff_user) self.client.force_login(self.staff_user)
host = HostConfig.objects.create(host="web-01", address="web-01.example.test") host = HostConfig.objects.create(host="web-01", address="web-01.example.test")
@@ -520,6 +559,40 @@ class ViewTests(TestCase):
self.assertContains(response, reverse("run_detail", args=[BackupRun.objects.get().id])) self.assertContains(response, reverse("run_detail", args=[BackupRun.objects.get().id]))
self.assertContains(response, reverse("snapshot_detail", args=[snapshot.id])) self.assertContains(response, reverse("snapshot_detail", args=[snapshot.id]))
def test_host_detail_renders_backup_trends(self) -> None:
self.client.force_login(self.staff_user)
GlobalConfig.objects.create(name="default", backup_root="/backups")
host = HostConfig.objects.create(host="web-01", address="web-01.example.test")
snapshot = self._snapshot(host, "20260519-021500Z__ABCDEFGH")
BackupRun.objects.create(
host=host,
status=BackupRun.Status.SUCCESS,
snapshot=snapshot,
started_at=datetime(2026, 5, 19, 2, 15, tzinfo=timezone.utc),
result={
"ok": True,
"dry_run": False,
"stats": {
"duration_seconds": 45,
"rsync": {
"files_total": 250,
"literal_data_bytes": 2048,
"matched_data_bytes": 8192,
},
},
},
)
response = self.client.get(reverse("host_detail", args=[host.host]))
self.assertEqual(response.status_code, 200)
self.assertContains(response, "Backup Trends")
self.assertContains(response, "Avg New Data")
self.assertContains(response, "45s")
self.assertContains(response, "250")
self.assertContains(response, "2.0")
self.assertContains(response, "KB")
def test_prepare_host_directories_action_creates_missing_directories(self) -> None: def test_prepare_host_directories_action_creates_missing_directories(self) -> None:
self.client.force_login(self.staff_user) self.client.force_login(self.staff_user)
with TemporaryDirectory() as tmp: with TemporaryDirectory() as tmp:

View File

@@ -34,10 +34,12 @@ from .retention import run_sql_retention_apply, run_sql_retention_plan
from .self_check import collect_self_checks, summarize_self_checks from .self_check import collect_self_checks, summarize_self_checks
from .snapshot_discovery import discover_snapshots, inspect_snapshot_discovery from .snapshot_discovery import discover_snapshots, inspect_snapshot_discovery
from .ssh_keys import SshKeyError, delete_generated_key_files, generate_ssh_key, merge_known_hosts, scan_known_host from .ssh_keys import SshKeyError, delete_generated_key_files, generate_ssh_key, merge_known_hosts, scan_known_host
from .stats_summary import collect_dashboard_stats, collect_host_stats
@staff_member_required @staff_member_required
def dashboard(request): def dashboard(request):
global_config = GlobalConfig.objects.filter(name="default").first()
hosts = list( hosts = list(
HostConfig.objects.annotate(snapshot_count=Count("snapshots", distinct=True), run_count=Count("runs", distinct=True)) HostConfig.objects.annotate(snapshot_count=Count("snapshots", distinct=True), run_count=Count("runs", distinct=True))
.order_by("host") .order_by("host")
@@ -48,9 +50,11 @@ def dashboard(request):
.order_by("-started_at", "-discovered_at", "-id") .order_by("-started_at", "-discovered_at", "-id")
.first() .first()
) )
stats_summary = collect_dashboard_stats(hosts=hosts, global_config=global_config)
context = { context = {
"hosts": hosts, "hosts": hosts,
"global_config": GlobalConfig.objects.filter(name="default").first(), "global_config": global_config,
"stats_summary": stats_summary,
"latest_runs": BackupRun.objects.select_related("host", "snapshot").order_by("-created_at")[:10], "latest_runs": BackupRun.objects.select_related("host", "snapshot").order_by("-created_at")[:10],
"counts": { "counts": {
"global_configs": GlobalConfig.objects.count(), "global_configs": GlobalConfig.objects.count(),
@@ -258,12 +262,14 @@ def host_detail(request, host: str):
).order_by("created_at", "id").first() ).order_by("created_at", "id").first()
has_global_config = GlobalConfig.objects.filter(name="default").exists() has_global_config = GlobalConfig.objects.filter(name="default").exists()
host_checks = collect_host_checks(host_config) host_checks = collect_host_checks(host_config)
stats_summary = collect_host_stats(host=host_config, limit=10)
context = { context = {
"host": host_config, "host": host_config,
"schedule": _schedule_for_host(host_config), "schedule": _schedule_for_host(host_config),
"discovery": inspect_snapshot_discovery(host=host_config), "discovery": inspect_snapshot_discovery(host=host_config),
"host_checks": host_checks, "host_checks": host_checks,
"host_check_summary": summarize_self_checks(host_checks), "host_check_summary": summarize_self_checks(host_checks),
"stats_summary": stats_summary,
"manual_backup_form": ManualBackupForm(initial=_default_manual_backup_initial(host_config)), "manual_backup_form": ManualBackupForm(initial=_default_manual_backup_initial(host_config)),
"can_queue_backup": host_config.enabled and has_global_config, "can_queue_backup": host_config.enabled and has_global_config,
"has_global_config": has_global_config, "has_global_config": has_global_config,