(feature) Summarize backup trends in the Django UI

Add a stats summary layer that aggregates recent successful real backup runs
into dashboard and host-level trend metrics.

Show backup-root usage, available space, average new data, average duration,
estimated runs until full, and link-dest savings on the dashboard. Add a host
trend table with recent run duration, file count, new data, matched data, and
snapshot links.

Keep the implementation based on existing run and snapshot stats JSON so the
UI gains useful trend visibility without introducing a schema migration yet.
This commit is contained in:
2026-05-19 22:31:24 +02:00
parent 6940dc55b7
commit fc22842fc4
5 changed files with 298 additions and 3 deletions

View File

@@ -0,0 +1,156 @@
from __future__ import annotations
from pathlib import Path
from typing import Any, Iterable
from pobsync.run_stats import filesystem_capacity
from .models import BackupRun, GlobalConfig, HostConfig, SnapshotRecord
def collect_dashboard_stats(*, hosts: Iterable[HostConfig], global_config: GlobalConfig | None) -> dict[str, Any]:
runs = list(
BackupRun.objects.select_related("host", "snapshot")
.filter(status=BackupRun.Status.SUCCESS)
.order_by("-started_at", "-created_at")[:100]
)
real_runs = [_run_summary(run) for run in runs if _is_real_run(run)]
real_runs = [run for run in real_runs if run["has_stats"]]
for host in hosts:
host.stats_summary = collect_host_stats(host=host)
literal_values = [_int_at(run, "rsync", "literal_data_bytes") for run in real_runs]
literal_values = [value for value in literal_values if value is not None]
matched_values = [_int_at(run, "rsync", "matched_data_bytes") for run in real_runs]
matched_values = [value for value in matched_values if value is not None]
duration_values = [_int_at(run, "duration_seconds") for run in real_runs]
duration_values = [value for value in duration_values if value is not None]
avg_literal = _average(literal_values)
total_literal = sum(literal_values)
total_matched = sum(matched_values)
savings_basis = total_literal + total_matched
capacity = _capacity_from_system(global_config) or _latest_capacity_from_runs(real_runs) or {}
available = _int_at(capacity, "available_bytes")
return {
"runs_sampled": len(real_runs),
"avg_duration_seconds": _average(duration_values),
"avg_literal_data_bytes": avg_literal,
"total_literal_data_bytes": total_literal,
"total_matched_data_bytes": total_matched,
"link_dest_savings_ratio": round(total_matched / savings_basis, 4) if savings_basis else None,
"estimated_runs_until_full": int(available / avg_literal) if available and avg_literal else None,
"capacity": capacity,
}
def collect_host_stats(*, host: HostConfig, limit: int = 8) -> dict[str, Any]:
runs = list(host.runs.select_related("snapshot").filter(status=BackupRun.Status.SUCCESS).order_by("-started_at", "-created_at")[:50])
real_runs = [_run_summary(run) for run in runs if _is_real_run(run)]
real_runs = [run for run in real_runs if run["has_stats"]][:limit]
latest_snapshot = host.snapshots.order_by("-started_at", "-discovered_at", "-id").first()
latest_snapshot_stats = _snapshot_summary(latest_snapshot) if latest_snapshot else {}
literal_values = [_int_at(run, "rsync", "literal_data_bytes") for run in real_runs]
literal_values = [value for value in literal_values if value is not None]
matched_values = [_int_at(run, "rsync", "matched_data_bytes") for run in real_runs]
matched_values = [value for value in matched_values if value is not None]
return {
"runs": real_runs,
"latest_run": real_runs[0] if real_runs else {},
"latest_snapshot": latest_snapshot_stats,
"avg_literal_data_bytes": _average(literal_values),
"total_literal_data_bytes": sum(literal_values),
"total_matched_data_bytes": sum(matched_values),
}
def _run_summary(run: BackupRun) -> dict[str, Any]:
result = run.result if isinstance(run.result, dict) else {}
stats = result.get("stats") if isinstance(result.get("stats"), dict) else {}
return {
"id": run.id,
"host": run.host.host,
"started_at": run.started_at,
"ended_at": run.ended_at,
"snapshot": run.snapshot,
"snapshot_path": run.snapshot_path,
"has_stats": bool(stats),
"duration_seconds": _int_at(stats, "duration_seconds"),
"rsync": stats.get("rsync") if isinstance(stats.get("rsync"), dict) else {},
"storage": stats.get("storage") if isinstance(stats.get("storage"), dict) else {},
}
def _snapshot_summary(snapshot: SnapshotRecord | None) -> dict[str, Any]:
if snapshot is None:
return {}
metadata = snapshot.metadata if isinstance(snapshot.metadata, dict) else {}
stats = metadata.get("stats") if isinstance(metadata.get("stats"), dict) else {}
storage = stats.get("storage") if isinstance(stats.get("storage"), dict) else {}
snapshot_storage = storage.get("snapshot") if isinstance(storage.get("snapshot"), dict) else {}
return {
"id": snapshot.id,
"dirname": snapshot.dirname,
"kind": snapshot.kind,
"status": snapshot.status,
"started_at": snapshot.started_at,
"apparent_size_bytes": _int_at(snapshot_storage, "apparent_size_bytes"),
"allocated_size_bytes": _int_at(snapshot_storage, "allocated_size_bytes"),
"hardlinked_files": _int_at(snapshot_storage, "hardlinked_files"),
}
def _is_real_run(run: BackupRun) -> bool:
result = run.result if isinstance(run.result, dict) else {}
if result.get("dry_run") is True:
return False
requested = result.get("requested") if isinstance(result.get("requested"), dict) else {}
return requested.get("dry_run") is not True
def _capacity_from_system(global_config: GlobalConfig | None) -> dict[str, Any]:
if global_config is None or not global_config.backup_root:
return {}
return filesystem_capacity(Path(global_config.backup_root))
def _latest_capacity_from_runs(runs: list[dict[str, Any]]) -> dict[str, Any]:
for run in runs:
capacity = _dict_at(run, "storage", "capacity")
if capacity:
return capacity
return {}
def _average(values: list[int]) -> int | None:
if not values:
return None
return int(sum(values) / len(values))
def _dict_at(data: dict[str, Any], *keys: str) -> dict[str, Any]:
value: Any = data
for key in keys:
if not isinstance(value, dict):
return {}
value = value.get(key)
return value if isinstance(value, dict) else {}
def _int_at(data: dict[str, Any], *keys: str) -> int | None:
value: Any = data
for key in keys:
if not isinstance(value, dict):
return None
value = value.get(key)
if isinstance(value, bool):
return None
if isinstance(value, int):
return value
if isinstance(value, float):
return int(value)
return None