2 Commits

Author SHA1 Message Date
10e0293559 Merge pull request '(feature) Show backup data totals by snapshot kind' (#61) from issue-53-host-backup-data-totals into master
Reviewed-on: #61
2026-05-23 01:28:18 +02:00
9dd690bb3b (feature) Show backup data totals by snapshot kind
Aggregate snapshot storage metadata by snapshot kind so operators can see
scheduled, manual, incomplete, and total backup data.

Surface the totals per host and across all hosts on the dashboard, using
allocated snapshot size from recorded backup metadata without rescanning
backup storage.
2026-05-23 01:27:51 +02:00
6 changed files with 241 additions and 0 deletions

View File

@@ -11,6 +11,7 @@ from .models import BackupRun, GlobalConfig, HostConfig, SnapshotRecord
def collect_dashboard_stats(*, hosts: Iterable[HostConfig], global_config: GlobalConfig | None) -> dict[str, Any]: def collect_dashboard_stats(*, hosts: Iterable[HostConfig], global_config: GlobalConfig | None) -> dict[str, Any]:
hosts = list(hosts)
runs = list( runs = list(
BackupRun.objects.select_related("host", "snapshot") BackupRun.objects.select_related("host", "snapshot")
.filter(status__in=_COMPLETED_BACKUP_STATUSES) .filter(status__in=_COMPLETED_BACKUP_STATUSES)
@@ -21,6 +22,7 @@ def collect_dashboard_stats(*, hosts: Iterable[HostConfig], global_config: Globa
for host in hosts: for host in hosts:
host.stats_summary = collect_host_stats(host=host) host.stats_summary = collect_host_stats(host=host)
backup_data = _sum_backup_data_by_kind(host.stats_summary["backup_data"] for host in hosts)
literal_values = [_int_at(run, "rsync", "literal_data_bytes") for run in real_runs] literal_values = [_int_at(run, "rsync", "literal_data_bytes") for run in real_runs]
literal_values = [value for value in literal_values if value is not None] literal_values = [value for value in literal_values if value is not None]
@@ -51,6 +53,7 @@ def collect_dashboard_stats(*, hosts: Iterable[HostConfig], global_config: Globa
"estimated_runs_until_full": int(available / avg_literal) if available and avg_literal else None, "estimated_runs_until_full": int(available / avg_literal) if available and avg_literal else None,
"estimated_days_until_full": int(available / daily_literal) if available and daily_literal else None, "estimated_days_until_full": int(available / daily_literal) if available and daily_literal else None,
"capacity": capacity, "capacity": capacity,
"backup_data": backup_data,
} }
@@ -61,6 +64,7 @@ def collect_host_stats(*, host: HostConfig, limit: int = 8) -> dict[str, Any]:
trend_runs = [run for run in completed_real_runs if run["has_stats"]][:limit] trend_runs = [run for run in completed_real_runs if run["has_stats"]][:limit]
latest_snapshot = host.snapshots.order_by("-started_at", "-discovered_at", "-id").first() latest_snapshot = host.snapshots.order_by("-started_at", "-discovered_at", "-id").first()
latest_snapshot_stats = _snapshot_summary(latest_snapshot) if latest_snapshot else {} latest_snapshot_stats = _snapshot_summary(latest_snapshot) if latest_snapshot else {}
backup_data = _backup_data_by_kind(host)
literal_values = [_int_at(run, "rsync", "literal_data_bytes") for run in trend_runs] literal_values = [_int_at(run, "rsync", "literal_data_bytes") for run in trend_runs]
literal_values = [value for value in literal_values if value is not None] literal_values = [value for value in literal_values if value is not None]
@@ -75,6 +79,7 @@ def collect_host_stats(*, host: HostConfig, limit: int = 8) -> dict[str, Any]:
"latest_good_run": _first_run_with_status(real_runs, {BackupRun.Status.SUCCESS}), "latest_good_run": _first_run_with_status(real_runs, {BackupRun.Status.SUCCESS}),
"latest_problem_run": _first_run_with_status(real_runs, {BackupRun.Status.WARNING, BackupRun.Status.FAILED}), "latest_problem_run": _first_run_with_status(real_runs, {BackupRun.Status.WARNING, BackupRun.Status.FAILED}),
"latest_snapshot": latest_snapshot_stats, "latest_snapshot": latest_snapshot_stats,
"backup_data": backup_data,
"avg_literal_data_bytes": _average(literal_values), "avg_literal_data_bytes": _average(literal_values),
"avg_daily_literal_data_bytes": _average_daily_literal(trend_runs), "avg_daily_literal_data_bytes": _average_daily_literal(trend_runs),
"total_literal_data_bytes": sum(literal_values), "total_literal_data_bytes": sum(literal_values),
@@ -102,6 +107,60 @@ def _run_summary(run: BackupRun) -> dict[str, Any]:
} }
def _backup_data_by_kind(host: HostConfig) -> dict[str, Any]:
rows: dict[str, dict[str, int]] = {
SnapshotRecord.Kind.SCHEDULED: _empty_snapshot_data_row(),
SnapshotRecord.Kind.MANUAL: _empty_snapshot_data_row(),
SnapshotRecord.Kind.INCOMPLETE: _empty_snapshot_data_row(),
}
total = _empty_snapshot_data_row()
for snapshot in host.snapshots.all():
summary = _snapshot_summary(snapshot)
row = rows.setdefault(snapshot.kind, _empty_snapshot_data_row())
allocated = summary.get("allocated_size_bytes") or summary.get("apparent_size_bytes") or 0
apparent = summary.get("apparent_size_bytes") or 0
row["count"] += 1
row["allocated_size_bytes"] += int(allocated)
row["apparent_size_bytes"] += int(apparent)
total["count"] += 1
total["allocated_size_bytes"] += int(allocated)
total["apparent_size_bytes"] += int(apparent)
return {
"scheduled": rows[SnapshotRecord.Kind.SCHEDULED],
"manual": rows[SnapshotRecord.Kind.MANUAL],
"incomplete": rows[SnapshotRecord.Kind.INCOMPLETE],
"total": total,
}
def _empty_snapshot_data_row() -> dict[str, int]:
return {
"count": 0,
"allocated_size_bytes": 0,
"apparent_size_bytes": 0,
}
def _sum_backup_data_by_kind(rows: Iterable[dict[str, dict[str, int]]]) -> dict[str, dict[str, int]]:
total_rows: dict[str, dict[str, int]] = {
"scheduled": _empty_snapshot_data_row(),
"manual": _empty_snapshot_data_row(),
"incomplete": _empty_snapshot_data_row(),
"total": _empty_snapshot_data_row(),
}
for row in rows:
for kind, values in row.items():
total_row = total_rows.setdefault(kind, _empty_snapshot_data_row())
total_row["count"] += values.get("count", 0)
total_row["allocated_size_bytes"] += values.get("allocated_size_bytes", 0)
total_row["apparent_size_bytes"] += values.get("apparent_size_bytes", 0)
return total_rows
def _snapshot_summary(snapshot: SnapshotRecord | None) -> dict[str, Any]: def _snapshot_summary(snapshot: SnapshotRecord | None) -> dict[str, Any]:
if snapshot is None: if snapshot is None:
return {} return {}

View File

@@ -172,6 +172,29 @@
<div class="metric"><div class="label">Incomplete</div><div class="value">{{ counts.incomplete_snapshots }}</div></div> <div class="metric"><div class="label">Incomplete</div><div class="value">{{ counts.incomplete_snapshots }}</div></div>
</section> </section>
<section class="panel">
<h2>Backup Data</h2>
<section class="grid" aria-label="Host backup data totals">
<div class="metric">
<div class="label">Scheduled</div>
<div class="value">{{ stats_summary.backup_data.scheduled.allocated_size_bytes|filesizeformat }}</div>
</div>
<div class="metric">
<div class="label">Manual</div>
<div class="value">{{ stats_summary.backup_data.manual.allocated_size_bytes|filesizeformat }}</div>
</div>
<div class="metric">
<div class="label">Incomplete</div>
<div class="value">{{ stats_summary.backup_data.incomplete.allocated_size_bytes|filesizeformat }}</div>
</div>
<div class="metric">
<div class="label">Total</div>
<div class="value">{{ stats_summary.backup_data.total.allocated_size_bytes|filesizeformat }}</div>
</div>
</section>
<p class="muted">Totals use the allocated snapshot size recorded in backup metadata, grouped by snapshot kind.</p>
</section>
{% if stats_summary.runs %} {% if stats_summary.runs %}
<section class="panel"> <section class="panel">
<h2>Backup Trends</h2> <h2>Backup Trends</h2>

View File

@@ -102,6 +102,22 @@
<div class="label">Retention</div> <div class="label">Retention</div>
<div class="value">d{{ host.retention_daily }} w{{ host.retention_weekly }} m{{ host.retention_monthly }} y{{ host.retention_yearly }}</div> <div class="value">d{{ host.retention_daily }} w{{ host.retention_weekly }} m{{ host.retention_monthly }} y{{ host.retention_yearly }}</div>
</div> </div>
<div class="host-card-stat">
<div class="label">Scheduled data</div>
<div class="value">{{ host.stats_summary.backup_data.scheduled.allocated_size_bytes|filesizeformat }}</div>
</div>
<div class="host-card-stat">
<div class="label">Manual data</div>
<div class="value">{{ host.stats_summary.backup_data.manual.allocated_size_bytes|filesizeformat }}</div>
</div>
<div class="host-card-stat">
<div class="label">Incomplete data</div>
<div class="value">{{ host.stats_summary.backup_data.incomplete.allocated_size_bytes|filesizeformat }}</div>
</div>
<div class="host-card-stat">
<div class="label">Total data</div>
<div class="value">{{ host.stats_summary.backup_data.total.allocated_size_bytes|filesizeformat }}</div>
</div>
</div> </div>
</div> </div>
</div> </div>

View File

@@ -126,5 +126,23 @@
{% else %} {% else %}
<p class="muted">Storage pressure appears after the first completed backup with stats.</p> <p class="muted">Storage pressure appears after the first completed backup with stats.</p>
{% endif %} {% endif %}
<div class="storage-priority-facts">
<div>
<span class="label">Scheduled data</span>
<strong>{{ stats_summary.backup_data.scheduled.allocated_size_bytes|filesizeformat }}</strong>
</div>
<div>
<span class="label">Manual data</span>
<strong>{{ stats_summary.backup_data.manual.allocated_size_bytes|filesizeformat }}</strong>
</div>
<div>
<span class="label">Incomplete data</span>
<strong>{{ stats_summary.backup_data.incomplete.allocated_size_bytes|filesizeformat }}</strong>
</div>
<div>
<span class="label">Total snapshot data</span>
<strong>{{ stats_summary.backup_data.total.allocated_size_bytes|filesizeformat }}</strong>
</div>
</div>
</article> </article>
</section> </section>

View File

@@ -0,0 +1,64 @@
from __future__ import annotations
from datetime import datetime, timezone
from django.test import TestCase
from pobsync_backend.models import HostConfig, SnapshotRecord
from pobsync_backend.stats_summary import collect_dashboard_stats, collect_host_stats
class StatsSummaryTests(TestCase):
def test_collect_dashboard_stats_sums_backup_data_across_hosts(self) -> None:
web = HostConfig.objects.create(host="web-01", address="web-01.example.test")
db = HostConfig.objects.create(host="db-01", address="db-01.example.test")
self._snapshot(web, "20260519-021500Z__SCHED01", SnapshotRecord.Kind.SCHEDULED, allocated=100)
self._snapshot(web, "20260519-031500Z__MANUAL1", SnapshotRecord.Kind.MANUAL, allocated=200)
self._snapshot(db, "20260519-041500Z__SCHED02", SnapshotRecord.Kind.SCHEDULED, allocated=300)
self._snapshot(db, "20260519-051500Z__BROKEN1", SnapshotRecord.Kind.INCOMPLETE, allocated=400)
stats = collect_dashboard_stats(hosts=[web, db], global_config=None)
self.assertEqual(stats["backup_data"]["scheduled"]["count"], 2)
self.assertEqual(stats["backup_data"]["scheduled"]["allocated_size_bytes"], 400)
self.assertEqual(stats["backup_data"]["manual"]["allocated_size_bytes"], 200)
self.assertEqual(stats["backup_data"]["incomplete"]["allocated_size_bytes"], 400)
self.assertEqual(stats["backup_data"]["total"]["count"], 4)
self.assertEqual(stats["backup_data"]["total"]["allocated_size_bytes"], 1000)
def test_collect_host_stats_sums_backup_data_by_snapshot_kind(self) -> None:
host = HostConfig.objects.create(host="web-01", address="web-01.example.test")
self._snapshot(host, "20260519-021500Z__SCHED01", SnapshotRecord.Kind.SCHEDULED, allocated=100)
self._snapshot(host, "20260519-031500Z__SCHED02", SnapshotRecord.Kind.SCHEDULED, allocated=200)
self._snapshot(host, "20260519-041500Z__MANUAL1", SnapshotRecord.Kind.MANUAL, allocated=300)
self._snapshot(host, "20260519-051500Z__BROKEN1", SnapshotRecord.Kind.INCOMPLETE, allocated=400)
stats = collect_host_stats(host=host)
self.assertEqual(stats["backup_data"]["scheduled"]["count"], 2)
self.assertEqual(stats["backup_data"]["scheduled"]["allocated_size_bytes"], 300)
self.assertEqual(stats["backup_data"]["manual"]["allocated_size_bytes"], 300)
self.assertEqual(stats["backup_data"]["incomplete"]["allocated_size_bytes"], 400)
self.assertEqual(stats["backup_data"]["total"]["count"], 4)
self.assertEqual(stats["backup_data"]["total"]["allocated_size_bytes"], 1000)
def _snapshot(self, host: HostConfig, dirname: str, kind: str, *, allocated: int) -> SnapshotRecord:
started_at = datetime.strptime(dirname.split("__", 1)[0], "%Y%m%d-%H%M%SZ").replace(tzinfo=timezone.utc)
return SnapshotRecord.objects.create(
host=host,
kind=kind,
dirname=dirname,
path=f"/backups/{host.host}/{kind}/{dirname}",
status="success",
started_at=started_at,
metadata={
"stats": {
"storage": {
"snapshot": {
"apparent_size_bytes": allocated * 2,
"allocated_size_bytes": allocated,
}
}
}
},
)

View File

@@ -190,6 +190,29 @@ class ViewTests(TestCase):
self.assertContains(response, "running") self.assertContains(response, "running")
self.assertNotContains(response, "<html", html=False) self.assertNotContains(response, "<html", html=False)
def test_dashboard_priority_live_renders_global_backup_data_totals(self) -> None:
self.client.force_login(self.staff_user)
web = HostConfig.objects.create(host="web-01", address="web-01.example.test")
db = HostConfig.objects.create(host="db-01", address="db-01.example.test")
scheduled = self._snapshot(web, "20260519-021500Z__SCHED01", kind=SnapshotRecord.Kind.SCHEDULED)
manual = self._snapshot(web, "20260519-031500Z__MANUAL1", kind=SnapshotRecord.Kind.MANUAL)
incomplete = self._snapshot(db, "20260519-041500Z__BROKEN1", kind=SnapshotRecord.Kind.INCOMPLETE)
self._set_snapshot_storage(scheduled, allocated=100)
self._set_snapshot_storage(manual, allocated=200)
self._set_snapshot_storage(incomplete, allocated=300)
response = self.client.get(reverse("dashboard_priority_live"))
self.assertEqual(response.status_code, 200)
self.assertContains(response, "Scheduled data")
self.assertContains(response, "Manual data")
self.assertContains(response, "Incomplete data")
self.assertContains(response, "Total snapshot data")
self.assertContains(response, "100&nbsp;bytes", html=True)
self.assertContains(response, "200&nbsp;bytes", html=True)
self.assertContains(response, "300&nbsp;bytes", html=True)
self.assertContains(response, "600&nbsp;bytes", html=True)
def test_dashboard_hosts_live_returns_hosts_partial(self) -> None: def test_dashboard_hosts_live_returns_hosts_partial(self) -> None:
self.client.force_login(self.staff_user) self.client.force_login(self.staff_user)
host = HostConfig.objects.create(host="web-01", address="web-01.example.test") host = HostConfig.objects.create(host="web-01", address="web-01.example.test")
@@ -203,6 +226,28 @@ class ViewTests(TestCase):
self.assertContains(response, "Snapshot health") self.assertContains(response, "Snapshot health")
self.assertNotContains(response, "<html", html=False) self.assertNotContains(response, "<html", html=False)
def test_dashboard_host_cards_render_backup_data_totals(self) -> None:
self.client.force_login(self.staff_user)
host = HostConfig.objects.create(host="web-01", address="web-01.example.test")
scheduled = self._snapshot(host, "20260519-021500Z__SCHED01", kind=SnapshotRecord.Kind.SCHEDULED)
manual = self._snapshot(host, "20260519-031500Z__MANUAL1", kind=SnapshotRecord.Kind.MANUAL)
incomplete = self._snapshot(host, "20260519-041500Z__BROKEN1", kind=SnapshotRecord.Kind.INCOMPLETE)
self._set_snapshot_storage(scheduled, allocated=100)
self._set_snapshot_storage(manual, allocated=200)
self._set_snapshot_storage(incomplete, allocated=300)
response = self.client.get(reverse("dashboard_hosts_live"))
self.assertEqual(response.status_code, 200)
self.assertContains(response, "Scheduled data")
self.assertContains(response, "Manual data")
self.assertContains(response, "Incomplete data")
self.assertContains(response, "Total data")
self.assertContains(response, "100&nbsp;bytes", html=True)
self.assertContains(response, "200&nbsp;bytes", html=True)
self.assertContains(response, "300&nbsp;bytes", html=True)
self.assertContains(response, "600&nbsp;bytes", html=True)
def test_hosts_list_renders_host_cards_and_controls(self) -> None: def test_hosts_list_renders_host_cards_and_controls(self) -> None:
self.client.force_login(self.staff_user) self.client.force_login(self.staff_user)
web = HostConfig.objects.create(host="web-01", address="web-01.example.test") web = HostConfig.objects.create(host="web-01", address="web-01.example.test")
@@ -1075,6 +1120,7 @@ class ViewTests(TestCase):
(backup_root / host.host / subdir).mkdir(parents=True) (backup_root / host.host / subdir).mkdir(parents=True)
ScheduleConfig.objects.create(host=host, cron_expr="15 2 * * *", prune=True, last_status="success") ScheduleConfig.objects.create(host=host, cron_expr="15 2 * * *", prune=True, last_status="success")
snapshot = self._snapshot(host, "20260519-021500Z__ABCDEFGH") snapshot = self._snapshot(host, "20260519-021500Z__ABCDEFGH")
self._set_snapshot_storage(snapshot, allocated=100)
BackupRun.objects.create(host=host, status=BackupRun.Status.SUCCESS, snapshot=snapshot) BackupRun.objects.create(host=host, status=BackupRun.Status.SUCCESS, snapshot=snapshot)
response = self.client.get(reverse("host_detail", args=[host.host])) response = self.client.get(reverse("host_detail", args=[host.host]))
@@ -1099,6 +1145,8 @@ class ViewTests(TestCase):
self.assertContains(response, reverse("prepare_host_directories", args=[host.host])) self.assertContains(response, reverse("prepare_host_directories", args=[host.host]))
self.assertContains(response, "warning") self.assertContains(response, "warning")
self.assertContains(response, "Snapshot Storage") self.assertContains(response, "Snapshot Storage")
self.assertContains(response, "Backup Data")
self.assertContains(response, "100&nbsp;bytes", html=True)
self.assertContains(response, reverse("queue_manual_backup", args=[host.host])) self.assertContains(response, reverse("queue_manual_backup", args=[host.host]))
self.assertContains(response, reverse("run_detail", args=[BackupRun.objects.get().id])) self.assertContains(response, reverse("run_detail", args=[BackupRun.objects.get().id]))
self.assertContains(response, reverse("snapshot_detail", args=[snapshot.id])) self.assertContains(response, reverse("snapshot_detail", args=[snapshot.id]))
@@ -2606,3 +2654,16 @@ class ViewTests(TestCase):
status="success", status="success",
started_at=started_at, started_at=started_at,
) )
def _set_snapshot_storage(self, snapshot: SnapshotRecord, *, allocated: int) -> None:
snapshot.metadata = {
"stats": {
"storage": {
"snapshot": {
"apparent_size_bytes": allocated * 2,
"allocated_size_bytes": allocated,
}
}
}
}
snapshot.save(update_fields=["metadata"])