From 2ad119e21442f3daaea42a6adcd4b75f31034d4f Mon Sep 17 00:00:00 2001 From: Peter van Arkel Date: Thu, 28 May 2026 21:33:26 +0200 Subject: [PATCH] (bugfix) Measure incomplete snapshot data from disk Use filesystem usage for incomplete snapshots instead of trusting potentially stale metadata, and expose unique non-hardlinked data totals for completed snapshots. Update dashboard and host storage summaries so incomplete data is visible and complete snapshot totals distinguish allocated and unique data. --- src/pobsync_backend/stats_summary.py | 24 +++- .../pobsync_backend/host_detail.html | 9 +- .../partials/dashboard_hosts.html | 4 + .../partials/dashboard_priority.html | 4 + .../tests/test_stats_summary.py | 110 ++++++++++++++++-- src/pobsync_backend/tests/test_views.py | 42 +++++-- 6 files changed, 167 insertions(+), 26 deletions(-) diff --git a/src/pobsync_backend/stats_summary.py b/src/pobsync_backend/stats_summary.py index 06b7aca..773ef53 100644 --- a/src/pobsync_backend/stats_summary.py +++ b/src/pobsync_backend/stats_summary.py @@ -120,12 +120,15 @@ def _backup_data_by_kind(host: HostConfig) -> dict[str, Any]: row = rows.setdefault(snapshot.kind, _empty_snapshot_data_row()) allocated = summary.get("allocated_size_bytes") or summary.get("apparent_size_bytes") or 0 apparent = summary.get("apparent_size_bytes") or 0 + unique_apparent = summary.get("unique_apparent_size_bytes") or 0 row["count"] += 1 row["allocated_size_bytes"] += int(allocated) row["apparent_size_bytes"] += int(apparent) + row["unique_apparent_size_bytes"] += int(unique_apparent) total["count"] += 1 total["allocated_size_bytes"] += int(allocated) total["apparent_size_bytes"] += int(apparent) + total["unique_apparent_size_bytes"] += int(unique_apparent) return { "scheduled": rows[SnapshotRecord.Kind.SCHEDULED], @@ -140,6 +143,7 @@ def _empty_snapshot_data_row() -> dict[str, int]: "count": 0, "allocated_size_bytes": 0, "apparent_size_bytes": 0, + "unique_apparent_size_bytes": 0, } @@ -157,6 +161,7 @@ def _sum_backup_data_by_kind(rows: Iterable[dict[str, dict[str, int]]]) -> dict[ total_row["count"] += values.get("count", 0) total_row["allocated_size_bytes"] += values.get("allocated_size_bytes", 0) total_row["apparent_size_bytes"] += values.get("apparent_size_bytes", 0) + total_row["unique_apparent_size_bytes"] += values.get("unique_apparent_size_bytes", 0) return total_rows @@ -168,21 +173,28 @@ def _snapshot_summary(snapshot: SnapshotRecord | None) -> dict[str, Any]: stats = metadata.get("stats") if isinstance(metadata.get("stats"), dict) else {} storage = stats.get("storage") if isinstance(stats.get("storage"), dict) else {} snapshot_storage = storage.get("snapshot") if isinstance(storage.get("snapshot"), dict) else {} - has_recorded_size = ( - _int_at(snapshot_storage, "allocated_size_bytes") is not None - or _int_at(snapshot_storage, "apparent_size_bytes") is not None - ) - if not has_recorded_size: + if snapshot.kind == SnapshotRecord.Kind.INCOMPLETE: snapshot_storage = _snapshot_storage_from_filesystem(snapshot) + else: + has_recorded_size = ( + _int_at(snapshot_storage, "allocated_size_bytes") is not None + or _int_at(snapshot_storage, "apparent_size_bytes") is not None + ) + if not has_recorded_size: + snapshot_storage = _snapshot_storage_from_filesystem(snapshot) + apparent_size = _int_at(snapshot_storage, "apparent_size_bytes") + hardlinked_apparent = _int_at(snapshot_storage, "hardlinked_apparent_size_bytes") or 0 return { "id": snapshot.id, "dirname": snapshot.dirname, "kind": snapshot.kind, "status": snapshot.status, "started_at": snapshot.started_at, - "apparent_size_bytes": _int_at(snapshot_storage, "apparent_size_bytes"), + "apparent_size_bytes": apparent_size, "allocated_size_bytes": _int_at(snapshot_storage, "allocated_size_bytes"), "hardlinked_files": _int_at(snapshot_storage, "hardlinked_files"), + "hardlinked_apparent_size_bytes": hardlinked_apparent, + "unique_apparent_size_bytes": max((apparent_size or 0) - hardlinked_apparent, 0), } diff --git a/src/pobsync_backend/templates/pobsync_backend/host_detail.html b/src/pobsync_backend/templates/pobsync_backend/host_detail.html index 6ff8faa..83fb2bc 100644 --- a/src/pobsync_backend/templates/pobsync_backend/host_detail.html +++ b/src/pobsync_backend/templates/pobsync_backend/host_detail.html @@ -178,21 +178,28 @@
Scheduled
{{ stats_summary.backup_data.scheduled.allocated_size_bytes|filesizeformat }}
+
unique {{ stats_summary.backup_data.scheduled.unique_apparent_size_bytes|filesizeformat }}
Manual
{{ stats_summary.backup_data.manual.allocated_size_bytes|filesizeformat }}
+
unique {{ stats_summary.backup_data.manual.unique_apparent_size_bytes|filesizeformat }}
Incomplete
{{ stats_summary.backup_data.incomplete.allocated_size_bytes|filesizeformat }}
+
measured from disk
Total
{{ stats_summary.backup_data.total.allocated_size_bytes|filesizeformat }}
+
unique {{ stats_summary.backup_data.total.unique_apparent_size_bytes|filesizeformat }}
-

Totals use the allocated snapshot size recorded in backup metadata, grouped by snapshot kind.

+

+ Main totals use allocated snapshot size. Unique values estimate non-hardlinked visible data; incomplete + snapshots are measured from disk because their metadata can be stale. +

{% if stats_summary.runs %} diff --git a/src/pobsync_backend/templates/pobsync_backend/partials/dashboard_hosts.html b/src/pobsync_backend/templates/pobsync_backend/partials/dashboard_hosts.html index 35b5047..31d2617 100644 --- a/src/pobsync_backend/templates/pobsync_backend/partials/dashboard_hosts.html +++ b/src/pobsync_backend/templates/pobsync_backend/partials/dashboard_hosts.html @@ -105,18 +105,22 @@
Scheduled data
{{ host.stats_summary.backup_data.scheduled.allocated_size_bytes|filesizeformat }}
+
unique {{ host.stats_summary.backup_data.scheduled.unique_apparent_size_bytes|filesizeformat }}
Manual data
{{ host.stats_summary.backup_data.manual.allocated_size_bytes|filesizeformat }}
+
unique {{ host.stats_summary.backup_data.manual.unique_apparent_size_bytes|filesizeformat }}
Incomplete data
{{ host.stats_summary.backup_data.incomplete.allocated_size_bytes|filesizeformat }}
+
measured from disk
Total data
{{ host.stats_summary.backup_data.total.allocated_size_bytes|filesizeformat }}
+
unique {{ host.stats_summary.backup_data.total.unique_apparent_size_bytes|filesizeformat }}
diff --git a/src/pobsync_backend/templates/pobsync_backend/partials/dashboard_priority.html b/src/pobsync_backend/templates/pobsync_backend/partials/dashboard_priority.html index c15001a..7b673a1 100644 --- a/src/pobsync_backend/templates/pobsync_backend/partials/dashboard_priority.html +++ b/src/pobsync_backend/templates/pobsync_backend/partials/dashboard_priority.html @@ -130,18 +130,22 @@
Scheduled data {{ stats_summary.backup_data.scheduled.allocated_size_bytes|filesizeformat }} + unique {{ stats_summary.backup_data.scheduled.unique_apparent_size_bytes|filesizeformat }}
Manual data {{ stats_summary.backup_data.manual.allocated_size_bytes|filesizeformat }} + unique {{ stats_summary.backup_data.manual.unique_apparent_size_bytes|filesizeformat }}
Incomplete data {{ stats_summary.backup_data.incomplete.allocated_size_bytes|filesizeformat }} + measured from disk
Total snapshot data {{ stats_summary.backup_data.total.allocated_size_bytes|filesizeformat }} + unique {{ stats_summary.backup_data.total.unique_apparent_size_bytes|filesizeformat }}
diff --git a/src/pobsync_backend/tests/test_stats_summary.py b/src/pobsync_backend/tests/test_stats_summary.py index bd40472..d64ab30 100644 --- a/src/pobsync_backend/tests/test_stats_summary.py +++ b/src/pobsync_backend/tests/test_stats_summary.py @@ -18,32 +18,44 @@ class StatsSummaryTests(TestCase): self._snapshot(web, "20260519-021500Z__SCHED01", SnapshotRecord.Kind.SCHEDULED, allocated=100) self._snapshot(web, "20260519-031500Z__MANUAL1", SnapshotRecord.Kind.MANUAL, allocated=200) self._snapshot(db, "20260519-041500Z__SCHED02", SnapshotRecord.Kind.SCHEDULED, allocated=300) - self._snapshot(db, "20260519-051500Z__BROKEN1", SnapshotRecord.Kind.INCOMPLETE, allocated=400) + with TemporaryDirectory() as tmp: + incomplete_usage = self._incomplete_snapshot_on_disk( + db, + Path(tmp), + "20260519-051500Z__BROKEN1", + ) - stats = collect_dashboard_stats(hosts=[web, db], global_config=None) + stats = collect_dashboard_stats(hosts=[web, db], global_config=None) self.assertEqual(stats["backup_data"]["scheduled"]["count"], 2) self.assertEqual(stats["backup_data"]["scheduled"]["allocated_size_bytes"], 400) self.assertEqual(stats["backup_data"]["manual"]["allocated_size_bytes"], 200) - self.assertEqual(stats["backup_data"]["incomplete"]["allocated_size_bytes"], 400) + self.assertEqual(stats["backup_data"]["incomplete"]["allocated_size_bytes"], incomplete_usage["allocated_size_bytes"]) self.assertEqual(stats["backup_data"]["total"]["count"], 4) - self.assertEqual(stats["backup_data"]["total"]["allocated_size_bytes"], 1000) + self.assertEqual(stats["backup_data"]["total"]["allocated_size_bytes"], 600 + incomplete_usage["allocated_size_bytes"]) + self.assertEqual(stats["backup_data"]["total"]["unique_apparent_size_bytes"], 1200 + incomplete_usage["apparent_size_bytes"]) def test_collect_host_stats_sums_backup_data_by_snapshot_kind(self) -> None: host = HostConfig.objects.create(host="web-01", address="web-01.example.test") self._snapshot(host, "20260519-021500Z__SCHED01", SnapshotRecord.Kind.SCHEDULED, allocated=100) self._snapshot(host, "20260519-031500Z__SCHED02", SnapshotRecord.Kind.SCHEDULED, allocated=200) self._snapshot(host, "20260519-041500Z__MANUAL1", SnapshotRecord.Kind.MANUAL, allocated=300) - self._snapshot(host, "20260519-051500Z__BROKEN1", SnapshotRecord.Kind.INCOMPLETE, allocated=400) + with TemporaryDirectory() as tmp: + incomplete_usage = self._incomplete_snapshot_on_disk( + host, + Path(tmp), + "20260519-051500Z__BROKEN1", + ) - stats = collect_host_stats(host=host) + stats = collect_host_stats(host=host) self.assertEqual(stats["backup_data"]["scheduled"]["count"], 2) self.assertEqual(stats["backup_data"]["scheduled"]["allocated_size_bytes"], 300) self.assertEqual(stats["backup_data"]["manual"]["allocated_size_bytes"], 300) - self.assertEqual(stats["backup_data"]["incomplete"]["allocated_size_bytes"], 400) + self.assertEqual(stats["backup_data"]["incomplete"]["allocated_size_bytes"], incomplete_usage["allocated_size_bytes"]) self.assertEqual(stats["backup_data"]["total"]["count"], 4) - self.assertEqual(stats["backup_data"]["total"]["allocated_size_bytes"], 1000) + self.assertEqual(stats["backup_data"]["total"]["allocated_size_bytes"], 600 + incomplete_usage["allocated_size_bytes"]) + self.assertEqual(stats["backup_data"]["total"]["unique_apparent_size_bytes"], 1200 + incomplete_usage["apparent_size_bytes"]) def test_collect_host_stats_falls_back_to_filesystem_usage_for_snapshots_without_metadata(self) -> None: host = HostConfig.objects.create(host="web-01", address="web-01.example.test") @@ -81,8 +93,87 @@ class StatsSummaryTests(TestCase): expected_usage["allocated_size_bytes"], ) + def test_collect_host_stats_measures_incomplete_data_from_disk_even_with_stale_metadata(self) -> None: + host = HostConfig.objects.create(host="web-01", address="web-01.example.test") + with TemporaryDirectory() as tmp: + incomplete_dir = Path(tmp) / host.host / ".incomplete" / "20260519-051500Z__BROKEN1" + data_dir = incomplete_dir / "data" + data_dir.mkdir(parents=True) + data_dir.joinpath("partial-file").write_text("interrupted backup data\n", encoding="utf-8") + expected_usage = tree_usage(data_dir) + SnapshotRecord.objects.create( + host=host, + kind=SnapshotRecord.Kind.INCOMPLETE, + dirname=incomplete_dir.name, + path=str(incomplete_dir), + status="failed", + metadata={ + "stats": { + "storage": { + "snapshot": { + "apparent_size_bytes": 0, + "allocated_size_bytes": 0, + } + } + } + }, + ) + + stats = collect_host_stats(host=host) + + self.assertEqual( + stats["backup_data"]["incomplete"]["allocated_size_bytes"], + expected_usage["allocated_size_bytes"], + ) + self.assertGreater(stats["backup_data"]["incomplete"]["apparent_size_bytes"], 0) + + def test_collect_host_stats_reports_non_hardlinked_snapshot_data(self) -> None: + host = HostConfig.objects.create(host="web-01", address="web-01.example.test") + self._snapshot_with_sizes( + host, + "20260519-021500Z__SCHED01", + SnapshotRecord.Kind.SCHEDULED, + allocated=1_200, + apparent=2_000, + hardlinked_apparent=1_500, + ) + + stats = collect_host_stats(host=host) + + self.assertEqual(stats["backup_data"]["scheduled"]["apparent_size_bytes"], 2_000) + self.assertEqual(stats["backup_data"]["scheduled"]["unique_apparent_size_bytes"], 500) + self.assertEqual(stats["backup_data"]["total"]["unique_apparent_size_bytes"], 500) + def _snapshot(self, host: HostConfig, dirname: str, kind: str, *, allocated: int) -> SnapshotRecord: + return self._snapshot_with_sizes(host, dirname, kind, allocated=allocated) + + def _incomplete_snapshot_on_disk(self, host: HostConfig, root: Path, dirname: str) -> dict: + incomplete_dir = root / host.host / ".incomplete" / dirname + data_dir = incomplete_dir / "data" + data_dir.mkdir(parents=True) + data_dir.joinpath("partial-file").write_text("interrupted backup data\n", encoding="utf-8") + usage = tree_usage(data_dir) + SnapshotRecord.objects.create( + host=host, + kind=SnapshotRecord.Kind.INCOMPLETE, + dirname=dirname, + path=str(incomplete_dir), + status="failed", + ) + return usage + + def _snapshot_with_sizes( + self, + host: HostConfig, + dirname: str, + kind: str, + *, + allocated: int, + apparent: int | None = None, + hardlinked_apparent: int = 0, + ) -> SnapshotRecord: started_at = datetime.strptime(dirname.split("__", 1)[0], "%Y%m%d-%H%M%SZ").replace(tzinfo=timezone.utc) + apparent_size = apparent if apparent is not None else allocated * 2 return SnapshotRecord.objects.create( host=host, kind=kind, @@ -94,8 +185,9 @@ class StatsSummaryTests(TestCase): "stats": { "storage": { "snapshot": { - "apparent_size_bytes": allocated * 2, + "apparent_size_bytes": apparent_size, "allocated_size_bytes": allocated, + "hardlinked_apparent_size_bytes": hardlinked_apparent, } } } diff --git a/src/pobsync_backend/tests/test_views.py b/src/pobsync_backend/tests/test_views.py index 45b4cc5..82fb79f 100644 --- a/src/pobsync_backend/tests/test_views.py +++ b/src/pobsync_backend/tests/test_views.py @@ -265,12 +265,23 @@ class ViewTests(TestCase): db = HostConfig.objects.create(host="db-01", address="db-01.example.test") scheduled = self._snapshot(web, "20260519-021500Z__SCHED01", kind=SnapshotRecord.Kind.SCHEDULED) manual = self._snapshot(web, "20260519-031500Z__MANUAL1", kind=SnapshotRecord.Kind.MANUAL) - incomplete = self._snapshot(db, "20260519-041500Z__BROKEN1", kind=SnapshotRecord.Kind.INCOMPLETE) self._set_snapshot_storage(scheduled, allocated=100) self._set_snapshot_storage(manual, allocated=200) - self._set_snapshot_storage(incomplete, allocated=300) + with TemporaryDirectory() as tmp: + incomplete_dir = Path(tmp) / db.host / ".incomplete" / "20260519-041500Z__BROKEN1" + data_dir = incomplete_dir / "data" + data_dir.mkdir(parents=True) + data_dir.joinpath("partial-file").write_text("interrupted backup data\n", encoding="utf-8") + expected_usage = tree_usage(data_dir) + SnapshotRecord.objects.create( + host=db, + kind=SnapshotRecord.Kind.INCOMPLETE, + dirname=incomplete_dir.name, + path=str(incomplete_dir), + status="failed", + ) - response = self.client.get(reverse("dashboard_priority_live")) + response = self.client.get(reverse("dashboard_priority_live")) self.assertEqual(response.status_code, 200) self.assertContains(response, "Scheduled data") @@ -279,8 +290,8 @@ class ViewTests(TestCase): self.assertContains(response, "Total snapshot data") self.assertContains(response, "100 bytes", html=True) self.assertContains(response, "200 bytes", html=True) - self.assertContains(response, "300 bytes", html=True) - self.assertContains(response, "600 bytes", html=True) + self.assertContains(response, filesizeformat(expected_usage["allocated_size_bytes"])) + self.assertContains(response, filesizeformat(300 + expected_usage["allocated_size_bytes"])) def test_dashboard_hosts_live_returns_hosts_partial(self) -> None: self.client.force_login(self.staff_user) @@ -300,12 +311,23 @@ class ViewTests(TestCase): host = HostConfig.objects.create(host="web-01", address="web-01.example.test") scheduled = self._snapshot(host, "20260519-021500Z__SCHED01", kind=SnapshotRecord.Kind.SCHEDULED) manual = self._snapshot(host, "20260519-031500Z__MANUAL1", kind=SnapshotRecord.Kind.MANUAL) - incomplete = self._snapshot(host, "20260519-041500Z__BROKEN1", kind=SnapshotRecord.Kind.INCOMPLETE) self._set_snapshot_storage(scheduled, allocated=100) self._set_snapshot_storage(manual, allocated=200) - self._set_snapshot_storage(incomplete, allocated=300) + with TemporaryDirectory() as tmp: + incomplete_dir = Path(tmp) / host.host / ".incomplete" / "20260519-041500Z__BROKEN1" + data_dir = incomplete_dir / "data" + data_dir.mkdir(parents=True) + data_dir.joinpath("partial-file").write_text("interrupted backup data\n", encoding="utf-8") + expected_usage = tree_usage(data_dir) + SnapshotRecord.objects.create( + host=host, + kind=SnapshotRecord.Kind.INCOMPLETE, + dirname=incomplete_dir.name, + path=str(incomplete_dir), + status="failed", + ) - response = self.client.get(reverse("dashboard_hosts_live")) + response = self.client.get(reverse("dashboard_hosts_live")) self.assertEqual(response.status_code, 200) self.assertContains(response, "Scheduled data") @@ -314,8 +336,8 @@ class ViewTests(TestCase): self.assertContains(response, "Total data") self.assertContains(response, "100 bytes", html=True) self.assertContains(response, "200 bytes", html=True) - self.assertContains(response, "300 bytes", html=True) - self.assertContains(response, "600 bytes", html=True) + self.assertContains(response, filesizeformat(expected_usage["allocated_size_bytes"])) + self.assertContains(response, filesizeformat(300 + expected_usage["allocated_size_bytes"])) def test_dashboard_host_cards_measure_incomplete_data_without_snapshot_metadata(self) -> None: self.client.force_login(self.staff_user) -- 2.43.0