(bugfix) Avoid live backup data scans in web views
Use stored snapshot storage metadata for dashboard and host backup data summaries instead of walking snapshot directories during request rendering. Snapshots without recorded storage metadata are counted as not measured so large backup targets cannot trigger unbounded filesystem scans from live-refresh views. Closes #97
This commit is contained in:
@@ -1,12 +1,10 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from tempfile import TemporaryDirectory
|
||||
from unittest.mock import patch
|
||||
|
||||
from django.test import TestCase
|
||||
|
||||
from pobsync.run_stats import tree_usage
|
||||
from pobsync_backend.models import HostConfig, SnapshotRecord
|
||||
from pobsync_backend.stats_summary import collect_dashboard_stats, collect_host_stats
|
||||
|
||||
@@ -18,114 +16,109 @@ class StatsSummaryTests(TestCase):
|
||||
self._snapshot(web, "20260519-021500Z__SCHED01", SnapshotRecord.Kind.SCHEDULED, allocated=100)
|
||||
self._snapshot(web, "20260519-031500Z__MANUAL1", SnapshotRecord.Kind.MANUAL, allocated=200)
|
||||
self._snapshot(db, "20260519-041500Z__SCHED02", SnapshotRecord.Kind.SCHEDULED, allocated=300)
|
||||
with TemporaryDirectory() as tmp:
|
||||
incomplete_usage = self._incomplete_snapshot_on_disk(
|
||||
db,
|
||||
Path(tmp),
|
||||
"20260519-051500Z__BROKEN1",
|
||||
)
|
||||
self._snapshot(db, "20260519-051500Z__BROKEN1", SnapshotRecord.Kind.INCOMPLETE, allocated=400)
|
||||
|
||||
stats = collect_dashboard_stats(hosts=[web, db], global_config=None)
|
||||
stats = collect_dashboard_stats(hosts=[web, db], global_config=None)
|
||||
|
||||
self.assertEqual(stats["backup_data"]["scheduled"]["count"], 2)
|
||||
self.assertEqual(stats["backup_data"]["scheduled"]["allocated_size_bytes"], 400)
|
||||
self.assertEqual(stats["backup_data"]["manual"]["allocated_size_bytes"], 200)
|
||||
self.assertEqual(stats["backup_data"]["incomplete"]["allocated_size_bytes"], incomplete_usage["allocated_size_bytes"])
|
||||
self.assertEqual(stats["backup_data"]["incomplete"]["allocated_size_bytes"], 400)
|
||||
self.assertEqual(stats["backup_data"]["total"]["count"], 4)
|
||||
self.assertEqual(stats["backup_data"]["total"]["allocated_size_bytes"], 600 + incomplete_usage["allocated_size_bytes"])
|
||||
self.assertEqual(stats["backup_data"]["total"]["unique_apparent_size_bytes"], 1200 + incomplete_usage["apparent_size_bytes"])
|
||||
self.assertEqual(stats["backup_data"]["total"]["measured_count"], 4)
|
||||
self.assertEqual(stats["backup_data"]["total"]["unknown_count"], 0)
|
||||
self.assertEqual(stats["backup_data"]["total"]["allocated_size_bytes"], 1000)
|
||||
self.assertEqual(stats["backup_data"]["total"]["unique_apparent_size_bytes"], 2000)
|
||||
|
||||
def test_collect_host_stats_sums_backup_data_by_snapshot_kind(self) -> None:
|
||||
host = HostConfig.objects.create(host="web-01", address="web-01.example.test")
|
||||
self._snapshot(host, "20260519-021500Z__SCHED01", SnapshotRecord.Kind.SCHEDULED, allocated=100)
|
||||
self._snapshot(host, "20260519-031500Z__SCHED02", SnapshotRecord.Kind.SCHEDULED, allocated=200)
|
||||
self._snapshot(host, "20260519-041500Z__MANUAL1", SnapshotRecord.Kind.MANUAL, allocated=300)
|
||||
with TemporaryDirectory() as tmp:
|
||||
incomplete_usage = self._incomplete_snapshot_on_disk(
|
||||
host,
|
||||
Path(tmp),
|
||||
"20260519-051500Z__BROKEN1",
|
||||
)
|
||||
self._snapshot(host, "20260519-051500Z__BROKEN1", SnapshotRecord.Kind.INCOMPLETE, allocated=400)
|
||||
|
||||
stats = collect_host_stats(host=host)
|
||||
stats = collect_host_stats(host=host)
|
||||
|
||||
self.assertEqual(stats["backup_data"]["scheduled"]["count"], 2)
|
||||
self.assertEqual(stats["backup_data"]["scheduled"]["allocated_size_bytes"], 300)
|
||||
self.assertEqual(stats["backup_data"]["manual"]["allocated_size_bytes"], 300)
|
||||
self.assertEqual(stats["backup_data"]["incomplete"]["allocated_size_bytes"], incomplete_usage["allocated_size_bytes"])
|
||||
self.assertEqual(stats["backup_data"]["incomplete"]["allocated_size_bytes"], 400)
|
||||
self.assertEqual(stats["backup_data"]["total"]["count"], 4)
|
||||
self.assertEqual(stats["backup_data"]["total"]["allocated_size_bytes"], 600 + incomplete_usage["allocated_size_bytes"])
|
||||
self.assertEqual(stats["backup_data"]["total"]["unique_apparent_size_bytes"], 1200 + incomplete_usage["apparent_size_bytes"])
|
||||
self.assertEqual(stats["backup_data"]["total"]["measured_count"], 4)
|
||||
self.assertEqual(stats["backup_data"]["total"]["unknown_count"], 0)
|
||||
self.assertEqual(stats["backup_data"]["total"]["allocated_size_bytes"], 1000)
|
||||
self.assertEqual(stats["backup_data"]["total"]["unique_apparent_size_bytes"], 2000)
|
||||
|
||||
def test_collect_host_stats_falls_back_to_filesystem_usage_for_snapshots_without_metadata(self) -> None:
|
||||
def test_collect_host_stats_marks_snapshots_without_storage_metadata_unknown(self) -> None:
|
||||
host = HostConfig.objects.create(host="web-01", address="web-01.example.test")
|
||||
with TemporaryDirectory() as tmp:
|
||||
incomplete_dir = Path(tmp) / host.host / ".incomplete" / "20260519-051500Z__BROKEN1"
|
||||
data_dir = incomplete_dir / "data"
|
||||
meta_dir = incomplete_dir / "meta"
|
||||
data_dir.mkdir(parents=True)
|
||||
meta_dir.mkdir()
|
||||
data_dir.joinpath("partial-file").write_text("interrupted backup data\n", encoding="utf-8")
|
||||
meta_dir.joinpath("rsync.log").write_text("not part of the backup data total\n", encoding="utf-8")
|
||||
expected_usage = tree_usage(data_dir)
|
||||
SnapshotRecord.objects.create(
|
||||
host=host,
|
||||
kind=SnapshotRecord.Kind.INCOMPLETE,
|
||||
dirname=incomplete_dir.name,
|
||||
path=str(incomplete_dir),
|
||||
status="failed",
|
||||
metadata={},
|
||||
)
|
||||
SnapshotRecord.objects.create(
|
||||
host=host,
|
||||
kind=SnapshotRecord.Kind.INCOMPLETE,
|
||||
dirname="20260519-051500Z__BROKEN1",
|
||||
path="/backups/web-01/.incomplete/20260519-051500Z__BROKEN1",
|
||||
status="failed",
|
||||
metadata={},
|
||||
)
|
||||
|
||||
with patch("pobsync_backend.stats_summary.tree_usage", create=True) as tree_usage:
|
||||
stats = collect_host_stats(host=host)
|
||||
|
||||
tree_usage.assert_not_called()
|
||||
self.assertEqual(stats["backup_data"]["incomplete"]["count"], 1)
|
||||
self.assertEqual(
|
||||
stats["backup_data"]["incomplete"]["allocated_size_bytes"],
|
||||
expected_usage["allocated_size_bytes"],
|
||||
)
|
||||
self.assertEqual(
|
||||
stats["backup_data"]["incomplete"]["apparent_size_bytes"],
|
||||
expected_usage["apparent_size_bytes"],
|
||||
)
|
||||
self.assertEqual(
|
||||
stats["backup_data"]["total"]["allocated_size_bytes"],
|
||||
expected_usage["allocated_size_bytes"],
|
||||
)
|
||||
self.assertEqual(stats["backup_data"]["incomplete"]["measured_count"], 0)
|
||||
self.assertEqual(stats["backup_data"]["incomplete"]["unknown_count"], 1)
|
||||
self.assertEqual(stats["backup_data"]["incomplete"]["allocated_size_bytes"], 0)
|
||||
self.assertEqual(stats["backup_data"]["incomplete"]["apparent_size_bytes"], 0)
|
||||
self.assertEqual(stats["backup_data"]["total"]["unknown_count"], 1)
|
||||
|
||||
def test_collect_host_stats_measures_incomplete_data_from_disk_even_with_stale_metadata(self) -> None:
|
||||
def test_collect_host_stats_uses_recorded_zero_storage_without_rescanning(self) -> None:
|
||||
host = HostConfig.objects.create(host="web-01", address="web-01.example.test")
|
||||
with TemporaryDirectory() as tmp:
|
||||
incomplete_dir = Path(tmp) / host.host / ".incomplete" / "20260519-051500Z__BROKEN1"
|
||||
data_dir = incomplete_dir / "data"
|
||||
data_dir.mkdir(parents=True)
|
||||
data_dir.joinpath("partial-file").write_text("interrupted backup data\n", encoding="utf-8")
|
||||
expected_usage = tree_usage(data_dir)
|
||||
SnapshotRecord.objects.create(
|
||||
host=host,
|
||||
kind=SnapshotRecord.Kind.INCOMPLETE,
|
||||
dirname=incomplete_dir.name,
|
||||
path=str(incomplete_dir),
|
||||
status="failed",
|
||||
metadata={
|
||||
"stats": {
|
||||
"storage": {
|
||||
"snapshot": {
|
||||
"apparent_size_bytes": 0,
|
||||
"allocated_size_bytes": 0,
|
||||
}
|
||||
SnapshotRecord.objects.create(
|
||||
host=host,
|
||||
kind=SnapshotRecord.Kind.INCOMPLETE,
|
||||
dirname="20260519-051500Z__BROKEN1",
|
||||
path="/backups/web-01/.incomplete/20260519-051500Z__BROKEN1",
|
||||
status="failed",
|
||||
metadata={
|
||||
"stats": {
|
||||
"storage": {
|
||||
"snapshot": {
|
||||
"apparent_size_bytes": 0,
|
||||
"allocated_size_bytes": 0,
|
||||
}
|
||||
}
|
||||
},
|
||||
)
|
||||
}
|
||||
},
|
||||
)
|
||||
|
||||
with patch("pobsync_backend.stats_summary.tree_usage", create=True) as tree_usage:
|
||||
stats = collect_host_stats(host=host)
|
||||
|
||||
self.assertEqual(
|
||||
stats["backup_data"]["incomplete"]["allocated_size_bytes"],
|
||||
expected_usage["allocated_size_bytes"],
|
||||
tree_usage.assert_not_called()
|
||||
self.assertEqual(stats["backup_data"]["incomplete"]["measured_count"], 1)
|
||||
self.assertEqual(stats["backup_data"]["incomplete"]["unknown_count"], 0)
|
||||
self.assertEqual(stats["backup_data"]["incomplete"]["allocated_size_bytes"], 0)
|
||||
self.assertEqual(stats["backup_data"]["incomplete"]["apparent_size_bytes"], 0)
|
||||
|
||||
def test_collect_dashboard_stats_does_not_scan_filesystem_for_missing_snapshot_metadata(self) -> None:
|
||||
host = HostConfig.objects.create(host="web-01", address="web-01.example.test")
|
||||
SnapshotRecord.objects.create(
|
||||
host=host,
|
||||
kind=SnapshotRecord.Kind.SCHEDULED,
|
||||
dirname="20260519-051500Z__SCHED01",
|
||||
path="/backups/web-01/scheduled/20260519-051500Z__SCHED01",
|
||||
status="success",
|
||||
metadata={},
|
||||
)
|
||||
self.assertGreater(stats["backup_data"]["incomplete"]["apparent_size_bytes"], 0)
|
||||
|
||||
with patch("pobsync_backend.stats_summary.tree_usage", create=True) as tree_usage:
|
||||
stats = collect_dashboard_stats(hosts=[host], global_config=None)
|
||||
|
||||
tree_usage.assert_not_called()
|
||||
self.assertEqual(stats["backup_data"]["scheduled"]["count"], 1)
|
||||
self.assertEqual(stats["backup_data"]["scheduled"]["measured_count"], 0)
|
||||
self.assertEqual(stats["backup_data"]["scheduled"]["unknown_count"], 1)
|
||||
self.assertEqual(stats["backup_data"]["scheduled"]["allocated_size_bytes"], 0)
|
||||
|
||||
def test_collect_host_stats_reports_non_hardlinked_snapshot_data(self) -> None:
|
||||
host = HostConfig.objects.create(host="web-01", address="web-01.example.test")
|
||||
@@ -147,21 +140,6 @@ class StatsSummaryTests(TestCase):
|
||||
def _snapshot(self, host: HostConfig, dirname: str, kind: str, *, allocated: int) -> SnapshotRecord:
|
||||
return self._snapshot_with_sizes(host, dirname, kind, allocated=allocated)
|
||||
|
||||
def _incomplete_snapshot_on_disk(self, host: HostConfig, root: Path, dirname: str) -> dict:
|
||||
incomplete_dir = root / host.host / ".incomplete" / dirname
|
||||
data_dir = incomplete_dir / "data"
|
||||
data_dir.mkdir(parents=True)
|
||||
data_dir.joinpath("partial-file").write_text("interrupted backup data\n", encoding="utf-8")
|
||||
usage = tree_usage(data_dir)
|
||||
SnapshotRecord.objects.create(
|
||||
host=host,
|
||||
kind=SnapshotRecord.Kind.INCOMPLETE,
|
||||
dirname=dirname,
|
||||
path=str(incomplete_dir),
|
||||
status="failed",
|
||||
)
|
||||
return usage
|
||||
|
||||
def _snapshot_with_sizes(
|
||||
self,
|
||||
host: HostConfig,
|
||||
|
||||
Reference in New Issue
Block a user