(bugfix) Avoid live backup data scans in web views

Use stored snapshot storage metadata for dashboard and host backup data summaries instead of walking snapshot directories during request rendering. Snapshots without recorded storage metadata are counted as not measured so large backup targets cannot trigger unbounded filesystem scans from live-refresh views.

Closes #97
This commit is contained in:
2026-06-08 22:48:22 +02:00
parent 522ae98bb1
commit f886a7b620
6 changed files with 163 additions and 183 deletions

View File

@@ -1,12 +1,10 @@
from __future__ import annotations
from datetime import datetime, timezone
from pathlib import Path
from tempfile import TemporaryDirectory
from unittest.mock import patch
from django.test import TestCase
from pobsync.run_stats import tree_usage
from pobsync_backend.models import HostConfig, SnapshotRecord
from pobsync_backend.stats_summary import collect_dashboard_stats, collect_host_stats
@@ -18,114 +16,109 @@ class StatsSummaryTests(TestCase):
self._snapshot(web, "20260519-021500Z__SCHED01", SnapshotRecord.Kind.SCHEDULED, allocated=100)
self._snapshot(web, "20260519-031500Z__MANUAL1", SnapshotRecord.Kind.MANUAL, allocated=200)
self._snapshot(db, "20260519-041500Z__SCHED02", SnapshotRecord.Kind.SCHEDULED, allocated=300)
with TemporaryDirectory() as tmp:
incomplete_usage = self._incomplete_snapshot_on_disk(
db,
Path(tmp),
"20260519-051500Z__BROKEN1",
)
self._snapshot(db, "20260519-051500Z__BROKEN1", SnapshotRecord.Kind.INCOMPLETE, allocated=400)
stats = collect_dashboard_stats(hosts=[web, db], global_config=None)
stats = collect_dashboard_stats(hosts=[web, db], global_config=None)
self.assertEqual(stats["backup_data"]["scheduled"]["count"], 2)
self.assertEqual(stats["backup_data"]["scheduled"]["allocated_size_bytes"], 400)
self.assertEqual(stats["backup_data"]["manual"]["allocated_size_bytes"], 200)
self.assertEqual(stats["backup_data"]["incomplete"]["allocated_size_bytes"], incomplete_usage["allocated_size_bytes"])
self.assertEqual(stats["backup_data"]["incomplete"]["allocated_size_bytes"], 400)
self.assertEqual(stats["backup_data"]["total"]["count"], 4)
self.assertEqual(stats["backup_data"]["total"]["allocated_size_bytes"], 600 + incomplete_usage["allocated_size_bytes"])
self.assertEqual(stats["backup_data"]["total"]["unique_apparent_size_bytes"], 1200 + incomplete_usage["apparent_size_bytes"])
self.assertEqual(stats["backup_data"]["total"]["measured_count"], 4)
self.assertEqual(stats["backup_data"]["total"]["unknown_count"], 0)
self.assertEqual(stats["backup_data"]["total"]["allocated_size_bytes"], 1000)
self.assertEqual(stats["backup_data"]["total"]["unique_apparent_size_bytes"], 2000)
def test_collect_host_stats_sums_backup_data_by_snapshot_kind(self) -> None:
host = HostConfig.objects.create(host="web-01", address="web-01.example.test")
self._snapshot(host, "20260519-021500Z__SCHED01", SnapshotRecord.Kind.SCHEDULED, allocated=100)
self._snapshot(host, "20260519-031500Z__SCHED02", SnapshotRecord.Kind.SCHEDULED, allocated=200)
self._snapshot(host, "20260519-041500Z__MANUAL1", SnapshotRecord.Kind.MANUAL, allocated=300)
with TemporaryDirectory() as tmp:
incomplete_usage = self._incomplete_snapshot_on_disk(
host,
Path(tmp),
"20260519-051500Z__BROKEN1",
)
self._snapshot(host, "20260519-051500Z__BROKEN1", SnapshotRecord.Kind.INCOMPLETE, allocated=400)
stats = collect_host_stats(host=host)
stats = collect_host_stats(host=host)
self.assertEqual(stats["backup_data"]["scheduled"]["count"], 2)
self.assertEqual(stats["backup_data"]["scheduled"]["allocated_size_bytes"], 300)
self.assertEqual(stats["backup_data"]["manual"]["allocated_size_bytes"], 300)
self.assertEqual(stats["backup_data"]["incomplete"]["allocated_size_bytes"], incomplete_usage["allocated_size_bytes"])
self.assertEqual(stats["backup_data"]["incomplete"]["allocated_size_bytes"], 400)
self.assertEqual(stats["backup_data"]["total"]["count"], 4)
self.assertEqual(stats["backup_data"]["total"]["allocated_size_bytes"], 600 + incomplete_usage["allocated_size_bytes"])
self.assertEqual(stats["backup_data"]["total"]["unique_apparent_size_bytes"], 1200 + incomplete_usage["apparent_size_bytes"])
self.assertEqual(stats["backup_data"]["total"]["measured_count"], 4)
self.assertEqual(stats["backup_data"]["total"]["unknown_count"], 0)
self.assertEqual(stats["backup_data"]["total"]["allocated_size_bytes"], 1000)
self.assertEqual(stats["backup_data"]["total"]["unique_apparent_size_bytes"], 2000)
def test_collect_host_stats_falls_back_to_filesystem_usage_for_snapshots_without_metadata(self) -> None:
def test_collect_host_stats_marks_snapshots_without_storage_metadata_unknown(self) -> None:
host = HostConfig.objects.create(host="web-01", address="web-01.example.test")
with TemporaryDirectory() as tmp:
incomplete_dir = Path(tmp) / host.host / ".incomplete" / "20260519-051500Z__BROKEN1"
data_dir = incomplete_dir / "data"
meta_dir = incomplete_dir / "meta"
data_dir.mkdir(parents=True)
meta_dir.mkdir()
data_dir.joinpath("partial-file").write_text("interrupted backup data\n", encoding="utf-8")
meta_dir.joinpath("rsync.log").write_text("not part of the backup data total\n", encoding="utf-8")
expected_usage = tree_usage(data_dir)
SnapshotRecord.objects.create(
host=host,
kind=SnapshotRecord.Kind.INCOMPLETE,
dirname=incomplete_dir.name,
path=str(incomplete_dir),
status="failed",
metadata={},
)
SnapshotRecord.objects.create(
host=host,
kind=SnapshotRecord.Kind.INCOMPLETE,
dirname="20260519-051500Z__BROKEN1",
path="/backups/web-01/.incomplete/20260519-051500Z__BROKEN1",
status="failed",
metadata={},
)
with patch("pobsync_backend.stats_summary.tree_usage", create=True) as tree_usage:
stats = collect_host_stats(host=host)
tree_usage.assert_not_called()
self.assertEqual(stats["backup_data"]["incomplete"]["count"], 1)
self.assertEqual(
stats["backup_data"]["incomplete"]["allocated_size_bytes"],
expected_usage["allocated_size_bytes"],
)
self.assertEqual(
stats["backup_data"]["incomplete"]["apparent_size_bytes"],
expected_usage["apparent_size_bytes"],
)
self.assertEqual(
stats["backup_data"]["total"]["allocated_size_bytes"],
expected_usage["allocated_size_bytes"],
)
self.assertEqual(stats["backup_data"]["incomplete"]["measured_count"], 0)
self.assertEqual(stats["backup_data"]["incomplete"]["unknown_count"], 1)
self.assertEqual(stats["backup_data"]["incomplete"]["allocated_size_bytes"], 0)
self.assertEqual(stats["backup_data"]["incomplete"]["apparent_size_bytes"], 0)
self.assertEqual(stats["backup_data"]["total"]["unknown_count"], 1)
def test_collect_host_stats_measures_incomplete_data_from_disk_even_with_stale_metadata(self) -> None:
def test_collect_host_stats_uses_recorded_zero_storage_without_rescanning(self) -> None:
host = HostConfig.objects.create(host="web-01", address="web-01.example.test")
with TemporaryDirectory() as tmp:
incomplete_dir = Path(tmp) / host.host / ".incomplete" / "20260519-051500Z__BROKEN1"
data_dir = incomplete_dir / "data"
data_dir.mkdir(parents=True)
data_dir.joinpath("partial-file").write_text("interrupted backup data\n", encoding="utf-8")
expected_usage = tree_usage(data_dir)
SnapshotRecord.objects.create(
host=host,
kind=SnapshotRecord.Kind.INCOMPLETE,
dirname=incomplete_dir.name,
path=str(incomplete_dir),
status="failed",
metadata={
"stats": {
"storage": {
"snapshot": {
"apparent_size_bytes": 0,
"allocated_size_bytes": 0,
}
SnapshotRecord.objects.create(
host=host,
kind=SnapshotRecord.Kind.INCOMPLETE,
dirname="20260519-051500Z__BROKEN1",
path="/backups/web-01/.incomplete/20260519-051500Z__BROKEN1",
status="failed",
metadata={
"stats": {
"storage": {
"snapshot": {
"apparent_size_bytes": 0,
"allocated_size_bytes": 0,
}
}
},
)
}
},
)
with patch("pobsync_backend.stats_summary.tree_usage", create=True) as tree_usage:
stats = collect_host_stats(host=host)
self.assertEqual(
stats["backup_data"]["incomplete"]["allocated_size_bytes"],
expected_usage["allocated_size_bytes"],
tree_usage.assert_not_called()
self.assertEqual(stats["backup_data"]["incomplete"]["measured_count"], 1)
self.assertEqual(stats["backup_data"]["incomplete"]["unknown_count"], 0)
self.assertEqual(stats["backup_data"]["incomplete"]["allocated_size_bytes"], 0)
self.assertEqual(stats["backup_data"]["incomplete"]["apparent_size_bytes"], 0)
def test_collect_dashboard_stats_does_not_scan_filesystem_for_missing_snapshot_metadata(self) -> None:
host = HostConfig.objects.create(host="web-01", address="web-01.example.test")
SnapshotRecord.objects.create(
host=host,
kind=SnapshotRecord.Kind.SCHEDULED,
dirname="20260519-051500Z__SCHED01",
path="/backups/web-01/scheduled/20260519-051500Z__SCHED01",
status="success",
metadata={},
)
self.assertGreater(stats["backup_data"]["incomplete"]["apparent_size_bytes"], 0)
with patch("pobsync_backend.stats_summary.tree_usage", create=True) as tree_usage:
stats = collect_dashboard_stats(hosts=[host], global_config=None)
tree_usage.assert_not_called()
self.assertEqual(stats["backup_data"]["scheduled"]["count"], 1)
self.assertEqual(stats["backup_data"]["scheduled"]["measured_count"], 0)
self.assertEqual(stats["backup_data"]["scheduled"]["unknown_count"], 1)
self.assertEqual(stats["backup_data"]["scheduled"]["allocated_size_bytes"], 0)
def test_collect_host_stats_reports_non_hardlinked_snapshot_data(self) -> None:
host = HostConfig.objects.create(host="web-01", address="web-01.example.test")
@@ -147,21 +140,6 @@ class StatsSummaryTests(TestCase):
def _snapshot(self, host: HostConfig, dirname: str, kind: str, *, allocated: int) -> SnapshotRecord:
return self._snapshot_with_sizes(host, dirname, kind, allocated=allocated)
def _incomplete_snapshot_on_disk(self, host: HostConfig, root: Path, dirname: str) -> dict:
incomplete_dir = root / host.host / ".incomplete" / dirname
data_dir = incomplete_dir / "data"
data_dir.mkdir(parents=True)
data_dir.joinpath("partial-file").write_text("interrupted backup data\n", encoding="utf-8")
usage = tree_usage(data_dir)
SnapshotRecord.objects.create(
host=host,
kind=SnapshotRecord.Kind.INCOMPLETE,
dirname=dirname,
path=str(incomplete_dir),
status="failed",
)
return usage
def _snapshot_with_sizes(
self,
host: HostConfig,