from __future__ import annotations from datetime import datetime, timezone from pathlib import Path from tempfile import TemporaryDirectory from django.test import TestCase from pobsync.run_stats import tree_usage from pobsync_backend.models import HostConfig, SnapshotRecord from pobsync_backend.stats_summary import collect_dashboard_stats, collect_host_stats class StatsSummaryTests(TestCase): def test_collect_dashboard_stats_sums_backup_data_across_hosts(self) -> None: web = HostConfig.objects.create(host="web-01", address="web-01.example.test") db = HostConfig.objects.create(host="db-01", address="db-01.example.test") self._snapshot(web, "20260519-021500Z__SCHED01", SnapshotRecord.Kind.SCHEDULED, allocated=100) self._snapshot(web, "20260519-031500Z__MANUAL1", SnapshotRecord.Kind.MANUAL, allocated=200) self._snapshot(db, "20260519-041500Z__SCHED02", SnapshotRecord.Kind.SCHEDULED, allocated=300) with TemporaryDirectory() as tmp: incomplete_usage = self._incomplete_snapshot_on_disk( db, Path(tmp), "20260519-051500Z__BROKEN1", ) stats = collect_dashboard_stats(hosts=[web, db], global_config=None) self.assertEqual(stats["backup_data"]["scheduled"]["count"], 2) self.assertEqual(stats["backup_data"]["scheduled"]["allocated_size_bytes"], 400) self.assertEqual(stats["backup_data"]["manual"]["allocated_size_bytes"], 200) self.assertEqual(stats["backup_data"]["incomplete"]["allocated_size_bytes"], incomplete_usage["allocated_size_bytes"]) self.assertEqual(stats["backup_data"]["total"]["count"], 4) self.assertEqual(stats["backup_data"]["total"]["allocated_size_bytes"], 600 + incomplete_usage["allocated_size_bytes"]) self.assertEqual(stats["backup_data"]["total"]["unique_apparent_size_bytes"], 1200 + incomplete_usage["apparent_size_bytes"]) def test_collect_host_stats_sums_backup_data_by_snapshot_kind(self) -> None: host = HostConfig.objects.create(host="web-01", address="web-01.example.test") self._snapshot(host, "20260519-021500Z__SCHED01", SnapshotRecord.Kind.SCHEDULED, allocated=100) self._snapshot(host, "20260519-031500Z__SCHED02", SnapshotRecord.Kind.SCHEDULED, allocated=200) self._snapshot(host, "20260519-041500Z__MANUAL1", SnapshotRecord.Kind.MANUAL, allocated=300) with TemporaryDirectory() as tmp: incomplete_usage = self._incomplete_snapshot_on_disk( host, Path(tmp), "20260519-051500Z__BROKEN1", ) stats = collect_host_stats(host=host) self.assertEqual(stats["backup_data"]["scheduled"]["count"], 2) self.assertEqual(stats["backup_data"]["scheduled"]["allocated_size_bytes"], 300) self.assertEqual(stats["backup_data"]["manual"]["allocated_size_bytes"], 300) self.assertEqual(stats["backup_data"]["incomplete"]["allocated_size_bytes"], incomplete_usage["allocated_size_bytes"]) self.assertEqual(stats["backup_data"]["total"]["count"], 4) self.assertEqual(stats["backup_data"]["total"]["allocated_size_bytes"], 600 + incomplete_usage["allocated_size_bytes"]) self.assertEqual(stats["backup_data"]["total"]["unique_apparent_size_bytes"], 1200 + incomplete_usage["apparent_size_bytes"]) def test_collect_host_stats_falls_back_to_filesystem_usage_for_snapshots_without_metadata(self) -> None: host = HostConfig.objects.create(host="web-01", address="web-01.example.test") with TemporaryDirectory() as tmp: incomplete_dir = Path(tmp) / host.host / ".incomplete" / "20260519-051500Z__BROKEN1" data_dir = incomplete_dir / "data" meta_dir = incomplete_dir / "meta" data_dir.mkdir(parents=True) meta_dir.mkdir() data_dir.joinpath("partial-file").write_text("interrupted backup data\n", encoding="utf-8") meta_dir.joinpath("rsync.log").write_text("not part of the backup data total\n", encoding="utf-8") expected_usage = tree_usage(data_dir) SnapshotRecord.objects.create( host=host, kind=SnapshotRecord.Kind.INCOMPLETE, dirname=incomplete_dir.name, path=str(incomplete_dir), status="failed", metadata={}, ) stats = collect_host_stats(host=host) self.assertEqual(stats["backup_data"]["incomplete"]["count"], 1) self.assertEqual( stats["backup_data"]["incomplete"]["allocated_size_bytes"], expected_usage["allocated_size_bytes"], ) self.assertEqual( stats["backup_data"]["incomplete"]["apparent_size_bytes"], expected_usage["apparent_size_bytes"], ) self.assertEqual( stats["backup_data"]["total"]["allocated_size_bytes"], expected_usage["allocated_size_bytes"], ) def test_collect_host_stats_measures_incomplete_data_from_disk_even_with_stale_metadata(self) -> None: host = HostConfig.objects.create(host="web-01", address="web-01.example.test") with TemporaryDirectory() as tmp: incomplete_dir = Path(tmp) / host.host / ".incomplete" / "20260519-051500Z__BROKEN1" data_dir = incomplete_dir / "data" data_dir.mkdir(parents=True) data_dir.joinpath("partial-file").write_text("interrupted backup data\n", encoding="utf-8") expected_usage = tree_usage(data_dir) SnapshotRecord.objects.create( host=host, kind=SnapshotRecord.Kind.INCOMPLETE, dirname=incomplete_dir.name, path=str(incomplete_dir), status="failed", metadata={ "stats": { "storage": { "snapshot": { "apparent_size_bytes": 0, "allocated_size_bytes": 0, } } } }, ) stats = collect_host_stats(host=host) self.assertEqual( stats["backup_data"]["incomplete"]["allocated_size_bytes"], expected_usage["allocated_size_bytes"], ) self.assertGreater(stats["backup_data"]["incomplete"]["apparent_size_bytes"], 0) def test_collect_host_stats_reports_non_hardlinked_snapshot_data(self) -> None: host = HostConfig.objects.create(host="web-01", address="web-01.example.test") self._snapshot_with_sizes( host, "20260519-021500Z__SCHED01", SnapshotRecord.Kind.SCHEDULED, allocated=1_200, apparent=2_000, hardlinked_apparent=1_500, ) stats = collect_host_stats(host=host) self.assertEqual(stats["backup_data"]["scheduled"]["apparent_size_bytes"], 2_000) self.assertEqual(stats["backup_data"]["scheduled"]["unique_apparent_size_bytes"], 500) self.assertEqual(stats["backup_data"]["total"]["unique_apparent_size_bytes"], 500) def _snapshot(self, host: HostConfig, dirname: str, kind: str, *, allocated: int) -> SnapshotRecord: return self._snapshot_with_sizes(host, dirname, kind, allocated=allocated) def _incomplete_snapshot_on_disk(self, host: HostConfig, root: Path, dirname: str) -> dict: incomplete_dir = root / host.host / ".incomplete" / dirname data_dir = incomplete_dir / "data" data_dir.mkdir(parents=True) data_dir.joinpath("partial-file").write_text("interrupted backup data\n", encoding="utf-8") usage = tree_usage(data_dir) SnapshotRecord.objects.create( host=host, kind=SnapshotRecord.Kind.INCOMPLETE, dirname=dirname, path=str(incomplete_dir), status="failed", ) return usage def _snapshot_with_sizes( self, host: HostConfig, dirname: str, kind: str, *, allocated: int, apparent: int | None = None, hardlinked_apparent: int = 0, ) -> SnapshotRecord: started_at = datetime.strptime(dirname.split("__", 1)[0], "%Y%m%d-%H%M%SZ").replace(tzinfo=timezone.utc) apparent_size = apparent if apparent is not None else allocated * 2 return SnapshotRecord.objects.create( host=host, kind=kind, dirname=dirname, path=f"/backups/{host.host}/{kind}/{dirname}", status="success", started_at=started_at, metadata={ "stats": { "storage": { "snapshot": { "apparent_size_bytes": apparent_size, "allocated_size_bytes": allocated, "hardlinked_apparent_size_bytes": hardlinked_apparent, } } } }, )