From fc22842fc47dae52dc4d2023909a27a6e3e6a7f6 Mon Sep 17 00:00:00 2001 From: Peter van Arkel Date: Tue, 19 May 2026 22:31:24 +0200 Subject: [PATCH] (feature) Summarize backup trends in the Django UI Add a stats summary layer that aggregates recent successful real backup runs into dashboard and host-level trend metrics. Show backup-root usage, available space, average new data, average duration, estimated runs until full, and link-dest savings on the dashboard. Add a host trend table with recent run duration, file count, new data, matched data, and snapshot links. Keep the implementation based on existing run and snapshot stats JSON so the UI gains useful trend visibility without introducing a schema migration yet. --- src/pobsync_backend/stats_summary.py | 156 ++++++++++++++++++ .../templates/pobsync_backend/dashboard.html | 24 ++- .../pobsync_backend/host_detail.html | 38 +++++ src/pobsync_backend/tests/test_views.py | 75 ++++++++- src/pobsync_backend/views.py | 8 +- 5 files changed, 298 insertions(+), 3 deletions(-) create mode 100644 src/pobsync_backend/stats_summary.py diff --git a/src/pobsync_backend/stats_summary.py b/src/pobsync_backend/stats_summary.py new file mode 100644 index 0000000..bbeae97 --- /dev/null +++ b/src/pobsync_backend/stats_summary.py @@ -0,0 +1,156 @@ +from __future__ import annotations + +from pathlib import Path +from typing import Any, Iterable + +from pobsync.run_stats import filesystem_capacity + +from .models import BackupRun, GlobalConfig, HostConfig, SnapshotRecord + + +def collect_dashboard_stats(*, hosts: Iterable[HostConfig], global_config: GlobalConfig | None) -> dict[str, Any]: + runs = list( + BackupRun.objects.select_related("host", "snapshot") + .filter(status=BackupRun.Status.SUCCESS) + .order_by("-started_at", "-created_at")[:100] + ) + real_runs = [_run_summary(run) for run in runs if _is_real_run(run)] + real_runs = [run for run in real_runs if run["has_stats"]] + + for host in hosts: + host.stats_summary = collect_host_stats(host=host) + + literal_values = [_int_at(run, "rsync", "literal_data_bytes") for run in real_runs] + literal_values = [value for value in literal_values if value is not None] + matched_values = [_int_at(run, "rsync", "matched_data_bytes") for run in real_runs] + matched_values = [value for value in matched_values if value is not None] + duration_values = [_int_at(run, "duration_seconds") for run in real_runs] + duration_values = [value for value in duration_values if value is not None] + + avg_literal = _average(literal_values) + total_literal = sum(literal_values) + total_matched = sum(matched_values) + savings_basis = total_literal + total_matched + capacity = _capacity_from_system(global_config) or _latest_capacity_from_runs(real_runs) or {} + available = _int_at(capacity, "available_bytes") + + return { + "runs_sampled": len(real_runs), + "avg_duration_seconds": _average(duration_values), + "avg_literal_data_bytes": avg_literal, + "total_literal_data_bytes": total_literal, + "total_matched_data_bytes": total_matched, + "link_dest_savings_ratio": round(total_matched / savings_basis, 4) if savings_basis else None, + "estimated_runs_until_full": int(available / avg_literal) if available and avg_literal else None, + "capacity": capacity, + } + + +def collect_host_stats(*, host: HostConfig, limit: int = 8) -> dict[str, Any]: + runs = list(host.runs.select_related("snapshot").filter(status=BackupRun.Status.SUCCESS).order_by("-started_at", "-created_at")[:50]) + real_runs = [_run_summary(run) for run in runs if _is_real_run(run)] + real_runs = [run for run in real_runs if run["has_stats"]][:limit] + latest_snapshot = host.snapshots.order_by("-started_at", "-discovered_at", "-id").first() + latest_snapshot_stats = _snapshot_summary(latest_snapshot) if latest_snapshot else {} + + literal_values = [_int_at(run, "rsync", "literal_data_bytes") for run in real_runs] + literal_values = [value for value in literal_values if value is not None] + matched_values = [_int_at(run, "rsync", "matched_data_bytes") for run in real_runs] + matched_values = [value for value in matched_values if value is not None] + + return { + "runs": real_runs, + "latest_run": real_runs[0] if real_runs else {}, + "latest_snapshot": latest_snapshot_stats, + "avg_literal_data_bytes": _average(literal_values), + "total_literal_data_bytes": sum(literal_values), + "total_matched_data_bytes": sum(matched_values), + } + + +def _run_summary(run: BackupRun) -> dict[str, Any]: + result = run.result if isinstance(run.result, dict) else {} + stats = result.get("stats") if isinstance(result.get("stats"), dict) else {} + return { + "id": run.id, + "host": run.host.host, + "started_at": run.started_at, + "ended_at": run.ended_at, + "snapshot": run.snapshot, + "snapshot_path": run.snapshot_path, + "has_stats": bool(stats), + "duration_seconds": _int_at(stats, "duration_seconds"), + "rsync": stats.get("rsync") if isinstance(stats.get("rsync"), dict) else {}, + "storage": stats.get("storage") if isinstance(stats.get("storage"), dict) else {}, + } + + +def _snapshot_summary(snapshot: SnapshotRecord | None) -> dict[str, Any]: + if snapshot is None: + return {} + metadata = snapshot.metadata if isinstance(snapshot.metadata, dict) else {} + stats = metadata.get("stats") if isinstance(metadata.get("stats"), dict) else {} + storage = stats.get("storage") if isinstance(stats.get("storage"), dict) else {} + snapshot_storage = storage.get("snapshot") if isinstance(storage.get("snapshot"), dict) else {} + return { + "id": snapshot.id, + "dirname": snapshot.dirname, + "kind": snapshot.kind, + "status": snapshot.status, + "started_at": snapshot.started_at, + "apparent_size_bytes": _int_at(snapshot_storage, "apparent_size_bytes"), + "allocated_size_bytes": _int_at(snapshot_storage, "allocated_size_bytes"), + "hardlinked_files": _int_at(snapshot_storage, "hardlinked_files"), + } + + +def _is_real_run(run: BackupRun) -> bool: + result = run.result if isinstance(run.result, dict) else {} + if result.get("dry_run") is True: + return False + requested = result.get("requested") if isinstance(result.get("requested"), dict) else {} + return requested.get("dry_run") is not True + + +def _capacity_from_system(global_config: GlobalConfig | None) -> dict[str, Any]: + if global_config is None or not global_config.backup_root: + return {} + return filesystem_capacity(Path(global_config.backup_root)) + + +def _latest_capacity_from_runs(runs: list[dict[str, Any]]) -> dict[str, Any]: + for run in runs: + capacity = _dict_at(run, "storage", "capacity") + if capacity: + return capacity + return {} + + +def _average(values: list[int]) -> int | None: + if not values: + return None + return int(sum(values) / len(values)) + + +def _dict_at(data: dict[str, Any], *keys: str) -> dict[str, Any]: + value: Any = data + for key in keys: + if not isinstance(value, dict): + return {} + value = value.get(key) + return value if isinstance(value, dict) else {} + + +def _int_at(data: dict[str, Any], *keys: str) -> int | None: + value: Any = data + for key in keys: + if not isinstance(value, dict): + return None + value = value.get(key) + if isinstance(value, bool): + return None + if isinstance(value, int): + return value + if isinstance(value, float): + return int(value) + return None diff --git a/src/pobsync_backend/templates/pobsync_backend/dashboard.html b/src/pobsync_backend/templates/pobsync_backend/dashboard.html index 309f178..a187f67 100644 --- a/src/pobsync_backend/templates/pobsync_backend/dashboard.html +++ b/src/pobsync_backend/templates/pobsync_backend/dashboard.html @@ -37,6 +37,17 @@
Failed
{{ counts.failed_runs }}
+ {% if stats_summary.runs_sampled %} +
+
Backup Root Used
{{ stats_summary.capacity.used_percent|default:"" }}{% if stats_summary.capacity.used_percent is not None %}%{% endif %}
+
Available
{{ stats_summary.capacity.available_bytes|filesizeformat }}
+
Avg New Data
{{ stats_summary.avg_literal_data_bytes|filesizeformat }}
+
Avg Duration
{{ stats_summary.avg_duration_seconds|default:"" }}{% if stats_summary.avg_duration_seconds is not None %}s{% endif %}
+
Link-Dest Savings
{{ stats_summary.link_dest_savings_ratio|default:"" }}
+
Runs Until Full
{{ stats_summary.estimated_runs_until_full|default:"" }}
+
+ {% endif %} +

Hosts

@@ -47,6 +58,8 @@ + + @@ -66,11 +79,20 @@ none {% endif %} + + {% empty %} - + {% endfor %}
Enabled Snapshots Latest SnapshotLatest RunNew Data Runs Retention
+ {% if host.stats_summary.latest_run.id %} + Run {{ host.stats_summary.latest_run.id }} +
{{ host.stats_summary.latest_run.duration_seconds|default:"" }}{% if host.stats_summary.latest_run.duration_seconds is not None %}s{% endif %}
+ {% else %} + none + {% endif %} +
{{ host.stats_summary.latest_run.rsync.literal_data_bytes|filesizeformat }} {{ host.run_count }} d{{ host.retention_daily }} w{{ host.retention_weekly }} m{{ host.retention_monthly }} y{{ host.retention_yearly }}
No hosts configured yet.
No hosts configured yet.
diff --git a/src/pobsync_backend/templates/pobsync_backend/host_detail.html b/src/pobsync_backend/templates/pobsync_backend/host_detail.html index fc2be55..710f5cf 100644 --- a/src/pobsync_backend/templates/pobsync_backend/host_detail.html +++ b/src/pobsync_backend/templates/pobsync_backend/host_detail.html @@ -78,6 +78,44 @@
+ {% if stats_summary.runs %} +
+

Backup Trends

+
+
Avg New Data
{{ stats_summary.avg_literal_data_bytes|filesizeformat }}
+
Total New Data
{{ stats_summary.total_literal_data_bytes|filesizeformat }}
+
Matched Data
{{ stats_summary.total_matched_data_bytes|filesizeformat }}
+
Latest Duration
{{ stats_summary.latest_run.duration_seconds|default:"" }}{% if stats_summary.latest_run.duration_seconds is not None %}s{% endif %}
+
+ + + + + + + + + + + + + + {% for run in stats_summary.runs %} + + + + + + + + + + {% endfor %} + +
RunStartedDurationFilesNew DataMatchedSnapshot
Run {{ run.id }}{{ run.started_at|default:"" }}{{ run.duration_seconds|default:"" }}{% if run.duration_seconds is not None %}s{% endif %}{{ run.rsync.files_total|default:"" }}{{ run.rsync.literal_data_bytes|filesizeformat }}{{ run.rsync.matched_data_bytes|filesizeformat }}{% if run.snapshot %}{{ run.snapshot.dirname }}{% else %}{{ run.snapshot_path }}{% endif %}
+
+ {% endif %} +

Host Check

diff --git a/src/pobsync_backend/tests/test_views.py b/src/pobsync_backend/tests/test_views.py index 1a32184..c205552 100644 --- a/src/pobsync_backend/tests/test_views.py +++ b/src/pobsync_backend/tests/test_views.py @@ -35,7 +35,7 @@ class ViewTests(TestCase): self.client.force_login(self.staff_user) host = HostConfig.objects.create(host="web-01", address="web-01.example.test") snapshot = self._snapshot(host, "20260519-021500Z__ABCDEFGH") - BackupRun.objects.create( + run = BackupRun.objects.create( host=host, status=BackupRun.Status.SUCCESS, snapshot=snapshot, @@ -50,6 +50,45 @@ class ViewTests(TestCase): self.assertContains(response, "20260519-021500Z__ABCDEFGH") self.assertContains(response, "success") + def test_dashboard_renders_backup_trend_summary(self) -> None: + self.client.force_login(self.staff_user) + GlobalConfig.objects.create(name="default", backup_root="/missing-backup-root") + host = HostConfig.objects.create(host="web-01", address="web-01.example.test") + snapshot = self._snapshot(host, "20260519-021500Z__ABCDEFGH") + run = BackupRun.objects.create( + host=host, + status=BackupRun.Status.SUCCESS, + snapshot=snapshot, + started_at=datetime(2026, 5, 19, 2, 15, tzinfo=timezone.utc), + result={ + "ok": True, + "dry_run": False, + "stats": { + "duration_seconds": 30, + "rsync": { + "files_total": 100, + "literal_data_bytes": 1000, + "matched_data_bytes": 4000, + }, + "storage": { + "capacity": { + "available_bytes": 10_000, + "used_percent": 25.0, + } + }, + }, + }, + ) + + response = self.client.get(reverse("dashboard")) + + self.assertEqual(response.status_code, 200) + self.assertContains(response, "Backup Root Used") + self.assertContains(response, "Runs Until Full") + self.assertContains(response, "10") + self.assertContains(response, f"Run {run.id}") + self.assertContains(response, "1000") + def test_dashboard_links_latest_snapshot_for_each_host(self) -> None: self.client.force_login(self.staff_user) host = HostConfig.objects.create(host="web-01", address="web-01.example.test") @@ -520,6 +559,40 @@ class ViewTests(TestCase): self.assertContains(response, reverse("run_detail", args=[BackupRun.objects.get().id])) self.assertContains(response, reverse("snapshot_detail", args=[snapshot.id])) + def test_host_detail_renders_backup_trends(self) -> None: + self.client.force_login(self.staff_user) + GlobalConfig.objects.create(name="default", backup_root="/backups") + host = HostConfig.objects.create(host="web-01", address="web-01.example.test") + snapshot = self._snapshot(host, "20260519-021500Z__ABCDEFGH") + BackupRun.objects.create( + host=host, + status=BackupRun.Status.SUCCESS, + snapshot=snapshot, + started_at=datetime(2026, 5, 19, 2, 15, tzinfo=timezone.utc), + result={ + "ok": True, + "dry_run": False, + "stats": { + "duration_seconds": 45, + "rsync": { + "files_total": 250, + "literal_data_bytes": 2048, + "matched_data_bytes": 8192, + }, + }, + }, + ) + + response = self.client.get(reverse("host_detail", args=[host.host])) + + self.assertEqual(response.status_code, 200) + self.assertContains(response, "Backup Trends") + self.assertContains(response, "Avg New Data") + self.assertContains(response, "45s") + self.assertContains(response, "250") + self.assertContains(response, "2.0") + self.assertContains(response, "KB") + def test_prepare_host_directories_action_creates_missing_directories(self) -> None: self.client.force_login(self.staff_user) with TemporaryDirectory() as tmp: diff --git a/src/pobsync_backend/views.py b/src/pobsync_backend/views.py index d69ad22..ef1dcdb 100644 --- a/src/pobsync_backend/views.py +++ b/src/pobsync_backend/views.py @@ -34,10 +34,12 @@ from .retention import run_sql_retention_apply, run_sql_retention_plan from .self_check import collect_self_checks, summarize_self_checks from .snapshot_discovery import discover_snapshots, inspect_snapshot_discovery from .ssh_keys import SshKeyError, delete_generated_key_files, generate_ssh_key, merge_known_hosts, scan_known_host +from .stats_summary import collect_dashboard_stats, collect_host_stats @staff_member_required def dashboard(request): + global_config = GlobalConfig.objects.filter(name="default").first() hosts = list( HostConfig.objects.annotate(snapshot_count=Count("snapshots", distinct=True), run_count=Count("runs", distinct=True)) .order_by("host") @@ -48,9 +50,11 @@ def dashboard(request): .order_by("-started_at", "-discovered_at", "-id") .first() ) + stats_summary = collect_dashboard_stats(hosts=hosts, global_config=global_config) context = { "hosts": hosts, - "global_config": GlobalConfig.objects.filter(name="default").first(), + "global_config": global_config, + "stats_summary": stats_summary, "latest_runs": BackupRun.objects.select_related("host", "snapshot").order_by("-created_at")[:10], "counts": { "global_configs": GlobalConfig.objects.count(), @@ -258,12 +262,14 @@ def host_detail(request, host: str): ).order_by("created_at", "id").first() has_global_config = GlobalConfig.objects.filter(name="default").exists() host_checks = collect_host_checks(host_config) + stats_summary = collect_host_stats(host=host_config, limit=10) context = { "host": host_config, "schedule": _schedule_for_host(host_config), "discovery": inspect_snapshot_discovery(host=host_config), "host_checks": host_checks, "host_check_summary": summarize_self_checks(host_checks), + "stats_summary": stats_summary, "manual_backup_form": ManualBackupForm(initial=_default_manual_backup_initial(host_config)), "can_queue_backup": host_config.enabled and has_global_config, "has_global_config": has_global_config,