(feature) Summarize backup trends in the Django UI

Add a stats summary layer that aggregates recent successful real backup runs into dashboard and host-level trend metrics. Show backup-root usage, available space, average new data, average duration, estimated runs until full, and link-dest savings on the dashboard. Add a host trend table with recent run duration, file count, new data, matched data, and snapshot links. Keep the implementation based on existing run and snapshot stats JSON so the UI gains useful trend visibility without introducing a schema migration yet.
2026-05-19 22:31:24 +02:00
parent 6940dc55b7
commit fc22842fc4
5 changed files with 298 additions and 3 deletions
--- a/src/pobsync_backend/stats_summary.py
+++ b/src/pobsync_backend/stats_summary.py
@@ -0,0 +1,156 @@
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Any, Iterable
+
+from pobsync.run_stats import filesystem_capacity
+
+from .models import BackupRun, GlobalConfig, HostConfig, SnapshotRecord
+
+
+def collect_dashboard_stats(*, hosts: Iterable[HostConfig], global_config: GlobalConfig | None) -> dict[str, Any]:
+    runs = list(
+        BackupRun.objects.select_related("host", "snapshot")
+        .filter(status=BackupRun.Status.SUCCESS)
+        .order_by("-started_at", "-created_at")[:100]
+    )
+    real_runs = [_run_summary(run) for run in runs if _is_real_run(run)]
+    real_runs = [run for run in real_runs if run["has_stats"]]
+
+    for host in hosts:
+        host.stats_summary = collect_host_stats(host=host)
+
+    literal_values = [_int_at(run, "rsync", "literal_data_bytes") for run in real_runs]
+    literal_values = [value for value in literal_values if value is not None]
+    matched_values = [_int_at(run, "rsync", "matched_data_bytes") for run in real_runs]
+    matched_values = [value for value in matched_values if value is not None]
+    duration_values = [_int_at(run, "duration_seconds") for run in real_runs]
+    duration_values = [value for value in duration_values if value is not None]
+
+    avg_literal = _average(literal_values)
+    total_literal = sum(literal_values)
+    total_matched = sum(matched_values)
+    savings_basis = total_literal + total_matched
+    capacity = _capacity_from_system(global_config) or _latest_capacity_from_runs(real_runs) or {}
+    available = _int_at(capacity, "available_bytes")
+
+    return {
+        "runs_sampled": len(real_runs),
+        "avg_duration_seconds": _average(duration_values),
+        "avg_literal_data_bytes": avg_literal,
+        "total_literal_data_bytes": total_literal,
+        "total_matched_data_bytes": total_matched,
+        "link_dest_savings_ratio": round(total_matched / savings_basis, 4) if savings_basis else None,
+        "estimated_runs_until_full": int(available / avg_literal) if available and avg_literal else None,
+        "capacity": capacity,
+    }
+
+
+def collect_host_stats(*, host: HostConfig, limit: int = 8) -> dict[str, Any]:
+    runs = list(host.runs.select_related("snapshot").filter(status=BackupRun.Status.SUCCESS).order_by("-started_at", "-created_at")[:50])
+    real_runs = [_run_summary(run) for run in runs if _is_real_run(run)]
+    real_runs = [run for run in real_runs if run["has_stats"]][:limit]
+    latest_snapshot = host.snapshots.order_by("-started_at", "-discovered_at", "-id").first()
+    latest_snapshot_stats = _snapshot_summary(latest_snapshot) if latest_snapshot else {}
+
+    literal_values = [_int_at(run, "rsync", "literal_data_bytes") for run in real_runs]
+    literal_values = [value for value in literal_values if value is not None]
+    matched_values = [_int_at(run, "rsync", "matched_data_bytes") for run in real_runs]
+    matched_values = [value for value in matched_values if value is not None]
+
+    return {
+        "runs": real_runs,
+        "latest_run": real_runs[0] if real_runs else {},
+        "latest_snapshot": latest_snapshot_stats,
+        "avg_literal_data_bytes": _average(literal_values),
+        "total_literal_data_bytes": sum(literal_values),
+        "total_matched_data_bytes": sum(matched_values),
+    }
+
+
+def _run_summary(run: BackupRun) -> dict[str, Any]:
+    result = run.result if isinstance(run.result, dict) else {}
+    stats = result.get("stats") if isinstance(result.get("stats"), dict) else {}
+    return {
+        "id": run.id,
+        "host": run.host.host,
+        "started_at": run.started_at,
+        "ended_at": run.ended_at,
+        "snapshot": run.snapshot,
+        "snapshot_path": run.snapshot_path,
+        "has_stats": bool(stats),
+        "duration_seconds": _int_at(stats, "duration_seconds"),
+        "rsync": stats.get("rsync") if isinstance(stats.get("rsync"), dict) else {},
+        "storage": stats.get("storage") if isinstance(stats.get("storage"), dict) else {},
+    }
+
+
+def _snapshot_summary(snapshot: SnapshotRecord | None) -> dict[str, Any]:
+    if snapshot is None:
+        return {}
+    metadata = snapshot.metadata if isinstance(snapshot.metadata, dict) else {}
+    stats = metadata.get("stats") if isinstance(metadata.get("stats"), dict) else {}
+    storage = stats.get("storage") if isinstance(stats.get("storage"), dict) else {}
+    snapshot_storage = storage.get("snapshot") if isinstance(storage.get("snapshot"), dict) else {}
+    return {
+        "id": snapshot.id,
+        "dirname": snapshot.dirname,
+        "kind": snapshot.kind,
+        "status": snapshot.status,
+        "started_at": snapshot.started_at,
+        "apparent_size_bytes": _int_at(snapshot_storage, "apparent_size_bytes"),
+        "allocated_size_bytes": _int_at(snapshot_storage, "allocated_size_bytes"),
+        "hardlinked_files": _int_at(snapshot_storage, "hardlinked_files"),
+    }
+
+
+def _is_real_run(run: BackupRun) -> bool:
+    result = run.result if isinstance(run.result, dict) else {}
+    if result.get("dry_run") is True:
+        return False
+    requested = result.get("requested") if isinstance(result.get("requested"), dict) else {}
+    return requested.get("dry_run") is not True
+
+
+def _capacity_from_system(global_config: GlobalConfig | None) -> dict[str, Any]:
+    if global_config is None or not global_config.backup_root:
+        return {}
+    return filesystem_capacity(Path(global_config.backup_root))
+
+
+def _latest_capacity_from_runs(runs: list[dict[str, Any]]) -> dict[str, Any]:
+    for run in runs:
+        capacity = _dict_at(run, "storage", "capacity")
+        if capacity:
+            return capacity
+    return {}
+
+
+def _average(values: list[int]) -> int | None:
+    if not values:
+        return None
+    return int(sum(values) / len(values))
+
+
+def _dict_at(data: dict[str, Any], *keys: str) -> dict[str, Any]:
+    value: Any = data
+    for key in keys:
+        if not isinstance(value, dict):
+            return {}
+        value = value.get(key)
+    return value if isinstance(value, dict) else {}
+
+
+def _int_at(data: dict[str, Any], *keys: str) -> int | None:
+    value: Any = data
+    for key in keys:
+        if not isinstance(value, dict):
+            return None
+        value = value.get(key)
+    if isinstance(value, bool):
+        return None
+    if isinstance(value, int):
+        return value
+    if isinstance(value, float):
+        return int(value)
+    return None