(bugfix) Avoid live backup data scans in web views #99

Merged
parkel merged 1 commits from issue-97-avoid-live-storage-scans into master 2026-06-08 23:11:55 +02:00
6 changed files with 163 additions and 183 deletions

View File

@@ -5,7 +5,7 @@ from typing import Any, Iterable
from django.utils import timezone from django.utils import timezone
from pobsync.run_stats import filesystem_capacity, tree_usage from pobsync.run_stats import filesystem_capacity
from .models import BackupRun, GlobalConfig, HostConfig, SnapshotRecord from .models import BackupRun, GlobalConfig, HostConfig, SnapshotRecord
@@ -118,14 +118,26 @@ def _backup_data_by_kind(host: HostConfig) -> dict[str, Any]:
for snapshot in host.snapshots.all(): for snapshot in host.snapshots.all():
summary = _snapshot_summary(snapshot) summary = _snapshot_summary(snapshot)
row = rows.setdefault(snapshot.kind, _empty_snapshot_data_row()) row = rows.setdefault(snapshot.kind, _empty_snapshot_data_row())
allocated = summary.get("allocated_size_bytes") or summary.get("apparent_size_bytes") or 0
apparent = summary.get("apparent_size_bytes") or 0
unique_apparent = summary.get("unique_apparent_size_bytes") or 0
row["count"] += 1 row["count"] += 1
total["count"] += 1
if not summary.get("storage_measured"):
row["unknown_count"] += 1
total["unknown_count"] += 1
continue
allocated = summary.get("allocated_size_bytes")
if allocated is None:
allocated = summary.get("apparent_size_bytes")
apparent = summary.get("apparent_size_bytes")
unique_apparent = summary.get("unique_apparent_size_bytes")
allocated = int(allocated or 0)
apparent = int(apparent or 0)
unique_apparent = int(unique_apparent or 0)
row["measured_count"] += 1
row["allocated_size_bytes"] += int(allocated) row["allocated_size_bytes"] += int(allocated)
row["apparent_size_bytes"] += int(apparent) row["apparent_size_bytes"] += int(apparent)
row["unique_apparent_size_bytes"] += int(unique_apparent) row["unique_apparent_size_bytes"] += int(unique_apparent)
total["count"] += 1 total["measured_count"] += 1
total["allocated_size_bytes"] += int(allocated) total["allocated_size_bytes"] += int(allocated)
total["apparent_size_bytes"] += int(apparent) total["apparent_size_bytes"] += int(apparent)
total["unique_apparent_size_bytes"] += int(unique_apparent) total["unique_apparent_size_bytes"] += int(unique_apparent)
@@ -141,6 +153,8 @@ def _backup_data_by_kind(host: HostConfig) -> dict[str, Any]:
def _empty_snapshot_data_row() -> dict[str, int]: def _empty_snapshot_data_row() -> dict[str, int]:
return { return {
"count": 0, "count": 0,
"measured_count": 0,
"unknown_count": 0,
"allocated_size_bytes": 0, "allocated_size_bytes": 0,
"apparent_size_bytes": 0, "apparent_size_bytes": 0,
"unique_apparent_size_bytes": 0, "unique_apparent_size_bytes": 0,
@@ -159,6 +173,8 @@ def _sum_backup_data_by_kind(rows: Iterable[dict[str, dict[str, int]]]) -> dict[
for kind, values in row.items(): for kind, values in row.items():
total_row = total_rows.setdefault(kind, _empty_snapshot_data_row()) total_row = total_rows.setdefault(kind, _empty_snapshot_data_row())
total_row["count"] += values.get("count", 0) total_row["count"] += values.get("count", 0)
total_row["measured_count"] += values.get("measured_count", 0)
total_row["unknown_count"] += values.get("unknown_count", 0)
total_row["allocated_size_bytes"] += values.get("allocated_size_bytes", 0) total_row["allocated_size_bytes"] += values.get("allocated_size_bytes", 0)
total_row["apparent_size_bytes"] += values.get("apparent_size_bytes", 0) total_row["apparent_size_bytes"] += values.get("apparent_size_bytes", 0)
total_row["unique_apparent_size_bytes"] += values.get("unique_apparent_size_bytes", 0) total_row["unique_apparent_size_bytes"] += values.get("unique_apparent_size_bytes", 0)
@@ -173,15 +189,7 @@ def _snapshot_summary(snapshot: SnapshotRecord | None) -> dict[str, Any]:
stats = metadata.get("stats") if isinstance(metadata.get("stats"), dict) else {} stats = metadata.get("stats") if isinstance(metadata.get("stats"), dict) else {}
storage = stats.get("storage") if isinstance(stats.get("storage"), dict) else {} storage = stats.get("storage") if isinstance(stats.get("storage"), dict) else {}
snapshot_storage = storage.get("snapshot") if isinstance(storage.get("snapshot"), dict) else {} snapshot_storage = storage.get("snapshot") if isinstance(storage.get("snapshot"), dict) else {}
if snapshot.kind == SnapshotRecord.Kind.INCOMPLETE: storage_measured = _has_recorded_snapshot_storage(snapshot_storage)
snapshot_storage = _snapshot_storage_from_filesystem(snapshot)
else:
has_recorded_size = (
_int_at(snapshot_storage, "allocated_size_bytes") is not None
or _int_at(snapshot_storage, "apparent_size_bytes") is not None
)
if not has_recorded_size:
snapshot_storage = _snapshot_storage_from_filesystem(snapshot)
apparent_size = _int_at(snapshot_storage, "apparent_size_bytes") apparent_size = _int_at(snapshot_storage, "apparent_size_bytes")
hardlinked_apparent = _int_at(snapshot_storage, "hardlinked_apparent_size_bytes") or 0 hardlinked_apparent = _int_at(snapshot_storage, "hardlinked_apparent_size_bytes") or 0
return { return {
@@ -195,19 +203,18 @@ def _snapshot_summary(snapshot: SnapshotRecord | None) -> dict[str, Any]:
"hardlinked_files": _int_at(snapshot_storage, "hardlinked_files"), "hardlinked_files": _int_at(snapshot_storage, "hardlinked_files"),
"hardlinked_apparent_size_bytes": hardlinked_apparent, "hardlinked_apparent_size_bytes": hardlinked_apparent,
"unique_apparent_size_bytes": max((apparent_size or 0) - hardlinked_apparent, 0), "unique_apparent_size_bytes": max((apparent_size or 0) - hardlinked_apparent, 0),
"storage_measured": storage_measured,
} }
def _snapshot_storage_from_filesystem(snapshot: SnapshotRecord) -> dict[str, Any]: def _has_recorded_snapshot_storage(snapshot_storage: dict[str, Any]) -> bool:
if not snapshot.path: return any(
return {} _int_at(snapshot_storage, key) is not None
snapshot_path = Path(snapshot.path) for key in (
data_path = snapshot_path / "data" "allocated_size_bytes",
if snapshot_path.name == "data": "apparent_size_bytes",
return tree_usage(snapshot_path) )
if data_path.exists(): )
return tree_usage(data_path)
return tree_usage(snapshot_path)
def _is_real_run(run: BackupRun) -> bool: def _is_real_run(run: BackupRun) -> bool:

View File

@@ -190,27 +190,35 @@
<div class="metric"> <div class="metric">
<div class="label">Scheduled</div> <div class="label">Scheduled</div>
<div class="value">{{ stats_summary.backup_data.scheduled.allocated_size_bytes|filesizeformat }}</div> <div class="value">{{ stats_summary.backup_data.scheduled.allocated_size_bytes|filesizeformat }}</div>
<div class="muted">unique {{ stats_summary.backup_data.scheduled.unique_apparent_size_bytes|filesizeformat }}</div> <div class="muted">
unique {{ stats_summary.backup_data.scheduled.unique_apparent_size_bytes|filesizeformat }}{% if stats_summary.backup_data.scheduled.unknown_count %}; {{ stats_summary.backup_data.scheduled.unknown_count }} not measured{% endif %}
</div>
</div> </div>
<div class="metric"> <div class="metric">
<div class="label">Manual</div> <div class="label">Manual</div>
<div class="value">{{ stats_summary.backup_data.manual.allocated_size_bytes|filesizeformat }}</div> <div class="value">{{ stats_summary.backup_data.manual.allocated_size_bytes|filesizeformat }}</div>
<div class="muted">unique {{ stats_summary.backup_data.manual.unique_apparent_size_bytes|filesizeformat }}</div> <div class="muted">
unique {{ stats_summary.backup_data.manual.unique_apparent_size_bytes|filesizeformat }}{% if stats_summary.backup_data.manual.unknown_count %}; {{ stats_summary.backup_data.manual.unknown_count }} not measured{% endif %}
</div>
</div> </div>
<div class="metric"> <div class="metric">
<div class="label">Incomplete</div> <div class="label">Incomplete</div>
<div class="value">{{ stats_summary.backup_data.incomplete.allocated_size_bytes|filesizeformat }}</div> <div class="value">{{ stats_summary.backup_data.incomplete.allocated_size_bytes|filesizeformat }}</div>
<div class="muted">measured from disk</div> <div class="muted">
stored metadata{% if stats_summary.backup_data.incomplete.unknown_count %}; {{ stats_summary.backup_data.incomplete.unknown_count }} not measured{% endif %}
</div>
</div> </div>
<div class="metric"> <div class="metric">
<div class="label">Total</div> <div class="label">Total</div>
<div class="value">{{ stats_summary.backup_data.total.allocated_size_bytes|filesizeformat }}</div> <div class="value">{{ stats_summary.backup_data.total.allocated_size_bytes|filesizeformat }}</div>
<div class="muted">unique {{ stats_summary.backup_data.total.unique_apparent_size_bytes|filesizeformat }}</div> <div class="muted">
unique {{ stats_summary.backup_data.total.unique_apparent_size_bytes|filesizeformat }}{% if stats_summary.backup_data.total.unknown_count %}; {{ stats_summary.backup_data.total.unknown_count }} not measured{% endif %}
</div>
</div> </div>
</section> </section>
<p class="muted"> <p class="muted">
Main totals use allocated snapshot size. Unique values estimate non-hardlinked visible data; incomplete Main totals use stored snapshot metadata. Unique values estimate non-hardlinked visible data; snapshots without
snapshots are measured from disk because their metadata can be stale. recorded storage metadata are shown as not measured until a backup or metrics refresh records them.
</p> </p>
</section> </section>

View File

@@ -105,22 +105,30 @@
<div class="host-card-stat"> <div class="host-card-stat">
<div class="label">Scheduled data</div> <div class="label">Scheduled data</div>
<div class="value">{{ host.stats_summary.backup_data.scheduled.allocated_size_bytes|filesizeformat }}</div> <div class="value">{{ host.stats_summary.backup_data.scheduled.allocated_size_bytes|filesizeformat }}</div>
<div class="muted">unique {{ host.stats_summary.backup_data.scheduled.unique_apparent_size_bytes|filesizeformat }}</div> <div class="muted">
unique {{ host.stats_summary.backup_data.scheduled.unique_apparent_size_bytes|filesizeformat }}{% if host.stats_summary.backup_data.scheduled.unknown_count %}; {{ host.stats_summary.backup_data.scheduled.unknown_count }} not measured{% endif %}
</div>
</div> </div>
<div class="host-card-stat"> <div class="host-card-stat">
<div class="label">Manual data</div> <div class="label">Manual data</div>
<div class="value">{{ host.stats_summary.backup_data.manual.allocated_size_bytes|filesizeformat }}</div> <div class="value">{{ host.stats_summary.backup_data.manual.allocated_size_bytes|filesizeformat }}</div>
<div class="muted">unique {{ host.stats_summary.backup_data.manual.unique_apparent_size_bytes|filesizeformat }}</div> <div class="muted">
unique {{ host.stats_summary.backup_data.manual.unique_apparent_size_bytes|filesizeformat }}{% if host.stats_summary.backup_data.manual.unknown_count %}; {{ host.stats_summary.backup_data.manual.unknown_count }} not measured{% endif %}
</div>
</div> </div>
<div class="host-card-stat"> <div class="host-card-stat">
<div class="label">Incomplete data</div> <div class="label">Incomplete data</div>
<div class="value">{{ host.stats_summary.backup_data.incomplete.allocated_size_bytes|filesizeformat }}</div> <div class="value">{{ host.stats_summary.backup_data.incomplete.allocated_size_bytes|filesizeformat }}</div>
<div class="muted">measured from disk</div> <div class="muted">
stored metadata{% if host.stats_summary.backup_data.incomplete.unknown_count %}; {{ host.stats_summary.backup_data.incomplete.unknown_count }} not measured{% endif %}
</div>
</div> </div>
<div class="host-card-stat"> <div class="host-card-stat">
<div class="label">Total data</div> <div class="label">Total data</div>
<div class="value">{{ host.stats_summary.backup_data.total.allocated_size_bytes|filesizeformat }}</div> <div class="value">{{ host.stats_summary.backup_data.total.allocated_size_bytes|filesizeformat }}</div>
<div class="muted">unique {{ host.stats_summary.backup_data.total.unique_apparent_size_bytes|filesizeformat }}</div> <div class="muted">
unique {{ host.stats_summary.backup_data.total.unique_apparent_size_bytes|filesizeformat }}{% if host.stats_summary.backup_data.total.unknown_count %}; {{ host.stats_summary.backup_data.total.unknown_count }} not measured{% endif %}
</div>
</div> </div>
</div> </div>
</div> </div>

View File

@@ -130,22 +130,30 @@
<div> <div>
<span class="label">Scheduled data</span> <span class="label">Scheduled data</span>
<strong>{{ stats_summary.backup_data.scheduled.allocated_size_bytes|filesizeformat }}</strong> <strong>{{ stats_summary.backup_data.scheduled.allocated_size_bytes|filesizeformat }}</strong>
<span class="muted">unique {{ stats_summary.backup_data.scheduled.unique_apparent_size_bytes|filesizeformat }}</span> <span class="muted">
unique {{ stats_summary.backup_data.scheduled.unique_apparent_size_bytes|filesizeformat }}{% if stats_summary.backup_data.scheduled.unknown_count %}; {{ stats_summary.backup_data.scheduled.unknown_count }} not measured{% endif %}
</span>
</div> </div>
<div> <div>
<span class="label">Manual data</span> <span class="label">Manual data</span>
<strong>{{ stats_summary.backup_data.manual.allocated_size_bytes|filesizeformat }}</strong> <strong>{{ stats_summary.backup_data.manual.allocated_size_bytes|filesizeformat }}</strong>
<span class="muted">unique {{ stats_summary.backup_data.manual.unique_apparent_size_bytes|filesizeformat }}</span> <span class="muted">
unique {{ stats_summary.backup_data.manual.unique_apparent_size_bytes|filesizeformat }}{% if stats_summary.backup_data.manual.unknown_count %}; {{ stats_summary.backup_data.manual.unknown_count }} not measured{% endif %}
</span>
</div> </div>
<div> <div>
<span class="label">Incomplete data</span> <span class="label">Incomplete data</span>
<strong>{{ stats_summary.backup_data.incomplete.allocated_size_bytes|filesizeformat }}</strong> <strong>{{ stats_summary.backup_data.incomplete.allocated_size_bytes|filesizeformat }}</strong>
<span class="muted">measured from disk</span> <span class="muted">
stored metadata{% if stats_summary.backup_data.incomplete.unknown_count %}; {{ stats_summary.backup_data.incomplete.unknown_count }} not measured{% endif %}
</span>
</div> </div>
<div> <div>
<span class="label">Total snapshot data</span> <span class="label">Total snapshot data</span>
<strong>{{ stats_summary.backup_data.total.allocated_size_bytes|filesizeformat }}</strong> <strong>{{ stats_summary.backup_data.total.allocated_size_bytes|filesizeformat }}</strong>
<span class="muted">unique {{ stats_summary.backup_data.total.unique_apparent_size_bytes|filesizeformat }}</span> <span class="muted">
unique {{ stats_summary.backup_data.total.unique_apparent_size_bytes|filesizeformat }}{% if stats_summary.backup_data.total.unknown_count %}; {{ stats_summary.backup_data.total.unknown_count }} not measured{% endif %}
</span>
</div> </div>
</div> </div>
</article> </article>

View File

@@ -1,12 +1,10 @@
from __future__ import annotations from __future__ import annotations
from datetime import datetime, timezone from datetime import datetime, timezone
from pathlib import Path from unittest.mock import patch
from tempfile import TemporaryDirectory
from django.test import TestCase from django.test import TestCase
from pobsync.run_stats import tree_usage
from pobsync_backend.models import HostConfig, SnapshotRecord from pobsync_backend.models import HostConfig, SnapshotRecord
from pobsync_backend.stats_summary import collect_dashboard_stats, collect_host_stats from pobsync_backend.stats_summary import collect_dashboard_stats, collect_host_stats
@@ -18,114 +16,109 @@ class StatsSummaryTests(TestCase):
self._snapshot(web, "20260519-021500Z__SCHED01", SnapshotRecord.Kind.SCHEDULED, allocated=100) self._snapshot(web, "20260519-021500Z__SCHED01", SnapshotRecord.Kind.SCHEDULED, allocated=100)
self._snapshot(web, "20260519-031500Z__MANUAL1", SnapshotRecord.Kind.MANUAL, allocated=200) self._snapshot(web, "20260519-031500Z__MANUAL1", SnapshotRecord.Kind.MANUAL, allocated=200)
self._snapshot(db, "20260519-041500Z__SCHED02", SnapshotRecord.Kind.SCHEDULED, allocated=300) self._snapshot(db, "20260519-041500Z__SCHED02", SnapshotRecord.Kind.SCHEDULED, allocated=300)
with TemporaryDirectory() as tmp: self._snapshot(db, "20260519-051500Z__BROKEN1", SnapshotRecord.Kind.INCOMPLETE, allocated=400)
incomplete_usage = self._incomplete_snapshot_on_disk(
db,
Path(tmp),
"20260519-051500Z__BROKEN1",
)
stats = collect_dashboard_stats(hosts=[web, db], global_config=None) stats = collect_dashboard_stats(hosts=[web, db], global_config=None)
self.assertEqual(stats["backup_data"]["scheduled"]["count"], 2) self.assertEqual(stats["backup_data"]["scheduled"]["count"], 2)
self.assertEqual(stats["backup_data"]["scheduled"]["allocated_size_bytes"], 400) self.assertEqual(stats["backup_data"]["scheduled"]["allocated_size_bytes"], 400)
self.assertEqual(stats["backup_data"]["manual"]["allocated_size_bytes"], 200) self.assertEqual(stats["backup_data"]["manual"]["allocated_size_bytes"], 200)
self.assertEqual(stats["backup_data"]["incomplete"]["allocated_size_bytes"], incomplete_usage["allocated_size_bytes"]) self.assertEqual(stats["backup_data"]["incomplete"]["allocated_size_bytes"], 400)
self.assertEqual(stats["backup_data"]["total"]["count"], 4) self.assertEqual(stats["backup_data"]["total"]["count"], 4)
self.assertEqual(stats["backup_data"]["total"]["allocated_size_bytes"], 600 + incomplete_usage["allocated_size_bytes"]) self.assertEqual(stats["backup_data"]["total"]["measured_count"], 4)
self.assertEqual(stats["backup_data"]["total"]["unique_apparent_size_bytes"], 1200 + incomplete_usage["apparent_size_bytes"]) self.assertEqual(stats["backup_data"]["total"]["unknown_count"], 0)
self.assertEqual(stats["backup_data"]["total"]["allocated_size_bytes"], 1000)
self.assertEqual(stats["backup_data"]["total"]["unique_apparent_size_bytes"], 2000)
def test_collect_host_stats_sums_backup_data_by_snapshot_kind(self) -> None: def test_collect_host_stats_sums_backup_data_by_snapshot_kind(self) -> None:
host = HostConfig.objects.create(host="web-01", address="web-01.example.test") host = HostConfig.objects.create(host="web-01", address="web-01.example.test")
self._snapshot(host, "20260519-021500Z__SCHED01", SnapshotRecord.Kind.SCHEDULED, allocated=100) self._snapshot(host, "20260519-021500Z__SCHED01", SnapshotRecord.Kind.SCHEDULED, allocated=100)
self._snapshot(host, "20260519-031500Z__SCHED02", SnapshotRecord.Kind.SCHEDULED, allocated=200) self._snapshot(host, "20260519-031500Z__SCHED02", SnapshotRecord.Kind.SCHEDULED, allocated=200)
self._snapshot(host, "20260519-041500Z__MANUAL1", SnapshotRecord.Kind.MANUAL, allocated=300) self._snapshot(host, "20260519-041500Z__MANUAL1", SnapshotRecord.Kind.MANUAL, allocated=300)
with TemporaryDirectory() as tmp: self._snapshot(host, "20260519-051500Z__BROKEN1", SnapshotRecord.Kind.INCOMPLETE, allocated=400)
incomplete_usage = self._incomplete_snapshot_on_disk(
host,
Path(tmp),
"20260519-051500Z__BROKEN1",
)
stats = collect_host_stats(host=host) stats = collect_host_stats(host=host)
self.assertEqual(stats["backup_data"]["scheduled"]["count"], 2) self.assertEqual(stats["backup_data"]["scheduled"]["count"], 2)
self.assertEqual(stats["backup_data"]["scheduled"]["allocated_size_bytes"], 300) self.assertEqual(stats["backup_data"]["scheduled"]["allocated_size_bytes"], 300)
self.assertEqual(stats["backup_data"]["manual"]["allocated_size_bytes"], 300) self.assertEqual(stats["backup_data"]["manual"]["allocated_size_bytes"], 300)
self.assertEqual(stats["backup_data"]["incomplete"]["allocated_size_bytes"], incomplete_usage["allocated_size_bytes"]) self.assertEqual(stats["backup_data"]["incomplete"]["allocated_size_bytes"], 400)
self.assertEqual(stats["backup_data"]["total"]["count"], 4) self.assertEqual(stats["backup_data"]["total"]["count"], 4)
self.assertEqual(stats["backup_data"]["total"]["allocated_size_bytes"], 600 + incomplete_usage["allocated_size_bytes"]) self.assertEqual(stats["backup_data"]["total"]["measured_count"], 4)
self.assertEqual(stats["backup_data"]["total"]["unique_apparent_size_bytes"], 1200 + incomplete_usage["apparent_size_bytes"]) self.assertEqual(stats["backup_data"]["total"]["unknown_count"], 0)
self.assertEqual(stats["backup_data"]["total"]["allocated_size_bytes"], 1000)
self.assertEqual(stats["backup_data"]["total"]["unique_apparent_size_bytes"], 2000)
def test_collect_host_stats_falls_back_to_filesystem_usage_for_snapshots_without_metadata(self) -> None: def test_collect_host_stats_marks_snapshots_without_storage_metadata_unknown(self) -> None:
host = HostConfig.objects.create(host="web-01", address="web-01.example.test") host = HostConfig.objects.create(host="web-01", address="web-01.example.test")
with TemporaryDirectory() as tmp: SnapshotRecord.objects.create(
incomplete_dir = Path(tmp) / host.host / ".incomplete" / "20260519-051500Z__BROKEN1" host=host,
data_dir = incomplete_dir / "data" kind=SnapshotRecord.Kind.INCOMPLETE,
meta_dir = incomplete_dir / "meta" dirname="20260519-051500Z__BROKEN1",
data_dir.mkdir(parents=True) path="/backups/web-01/.incomplete/20260519-051500Z__BROKEN1",
meta_dir.mkdir() status="failed",
data_dir.joinpath("partial-file").write_text("interrupted backup data\n", encoding="utf-8") metadata={},
meta_dir.joinpath("rsync.log").write_text("not part of the backup data total\n", encoding="utf-8") )
expected_usage = tree_usage(data_dir)
SnapshotRecord.objects.create(
host=host,
kind=SnapshotRecord.Kind.INCOMPLETE,
dirname=incomplete_dir.name,
path=str(incomplete_dir),
status="failed",
metadata={},
)
with patch("pobsync_backend.stats_summary.tree_usage", create=True) as tree_usage:
stats = collect_host_stats(host=host) stats = collect_host_stats(host=host)
tree_usage.assert_not_called()
self.assertEqual(stats["backup_data"]["incomplete"]["count"], 1) self.assertEqual(stats["backup_data"]["incomplete"]["count"], 1)
self.assertEqual( self.assertEqual(stats["backup_data"]["incomplete"]["measured_count"], 0)
stats["backup_data"]["incomplete"]["allocated_size_bytes"], self.assertEqual(stats["backup_data"]["incomplete"]["unknown_count"], 1)
expected_usage["allocated_size_bytes"], self.assertEqual(stats["backup_data"]["incomplete"]["allocated_size_bytes"], 0)
) self.assertEqual(stats["backup_data"]["incomplete"]["apparent_size_bytes"], 0)
self.assertEqual( self.assertEqual(stats["backup_data"]["total"]["unknown_count"], 1)
stats["backup_data"]["incomplete"]["apparent_size_bytes"],
expected_usage["apparent_size_bytes"],
)
self.assertEqual(
stats["backup_data"]["total"]["allocated_size_bytes"],
expected_usage["allocated_size_bytes"],
)
def test_collect_host_stats_measures_incomplete_data_from_disk_even_with_stale_metadata(self) -> None: def test_collect_host_stats_uses_recorded_zero_storage_without_rescanning(self) -> None:
host = HostConfig.objects.create(host="web-01", address="web-01.example.test") host = HostConfig.objects.create(host="web-01", address="web-01.example.test")
with TemporaryDirectory() as tmp: SnapshotRecord.objects.create(
incomplete_dir = Path(tmp) / host.host / ".incomplete" / "20260519-051500Z__BROKEN1" host=host,
data_dir = incomplete_dir / "data" kind=SnapshotRecord.Kind.INCOMPLETE,
data_dir.mkdir(parents=True) dirname="20260519-051500Z__BROKEN1",
data_dir.joinpath("partial-file").write_text("interrupted backup data\n", encoding="utf-8") path="/backups/web-01/.incomplete/20260519-051500Z__BROKEN1",
expected_usage = tree_usage(data_dir) status="failed",
SnapshotRecord.objects.create( metadata={
host=host, "stats": {
kind=SnapshotRecord.Kind.INCOMPLETE, "storage": {
dirname=incomplete_dir.name, "snapshot": {
path=str(incomplete_dir), "apparent_size_bytes": 0,
status="failed", "allocated_size_bytes": 0,
metadata={
"stats": {
"storage": {
"snapshot": {
"apparent_size_bytes": 0,
"allocated_size_bytes": 0,
}
} }
} }
}, }
) },
)
with patch("pobsync_backend.stats_summary.tree_usage", create=True) as tree_usage:
stats = collect_host_stats(host=host) stats = collect_host_stats(host=host)
self.assertEqual( tree_usage.assert_not_called()
stats["backup_data"]["incomplete"]["allocated_size_bytes"], self.assertEqual(stats["backup_data"]["incomplete"]["measured_count"], 1)
expected_usage["allocated_size_bytes"], self.assertEqual(stats["backup_data"]["incomplete"]["unknown_count"], 0)
self.assertEqual(stats["backup_data"]["incomplete"]["allocated_size_bytes"], 0)
self.assertEqual(stats["backup_data"]["incomplete"]["apparent_size_bytes"], 0)
def test_collect_dashboard_stats_does_not_scan_filesystem_for_missing_snapshot_metadata(self) -> None:
host = HostConfig.objects.create(host="web-01", address="web-01.example.test")
SnapshotRecord.objects.create(
host=host,
kind=SnapshotRecord.Kind.SCHEDULED,
dirname="20260519-051500Z__SCHED01",
path="/backups/web-01/scheduled/20260519-051500Z__SCHED01",
status="success",
metadata={},
) )
self.assertGreater(stats["backup_data"]["incomplete"]["apparent_size_bytes"], 0)
with patch("pobsync_backend.stats_summary.tree_usage", create=True) as tree_usage:
stats = collect_dashboard_stats(hosts=[host], global_config=None)
tree_usage.assert_not_called()
self.assertEqual(stats["backup_data"]["scheduled"]["count"], 1)
self.assertEqual(stats["backup_data"]["scheduled"]["measured_count"], 0)
self.assertEqual(stats["backup_data"]["scheduled"]["unknown_count"], 1)
self.assertEqual(stats["backup_data"]["scheduled"]["allocated_size_bytes"], 0)
def test_collect_host_stats_reports_non_hardlinked_snapshot_data(self) -> None: def test_collect_host_stats_reports_non_hardlinked_snapshot_data(self) -> None:
host = HostConfig.objects.create(host="web-01", address="web-01.example.test") host = HostConfig.objects.create(host="web-01", address="web-01.example.test")
@@ -147,21 +140,6 @@ class StatsSummaryTests(TestCase):
def _snapshot(self, host: HostConfig, dirname: str, kind: str, *, allocated: int) -> SnapshotRecord: def _snapshot(self, host: HostConfig, dirname: str, kind: str, *, allocated: int) -> SnapshotRecord:
return self._snapshot_with_sizes(host, dirname, kind, allocated=allocated) return self._snapshot_with_sizes(host, dirname, kind, allocated=allocated)
def _incomplete_snapshot_on_disk(self, host: HostConfig, root: Path, dirname: str) -> dict:
incomplete_dir = root / host.host / ".incomplete" / dirname
data_dir = incomplete_dir / "data"
data_dir.mkdir(parents=True)
data_dir.joinpath("partial-file").write_text("interrupted backup data\n", encoding="utf-8")
usage = tree_usage(data_dir)
SnapshotRecord.objects.create(
host=host,
kind=SnapshotRecord.Kind.INCOMPLETE,
dirname=dirname,
path=str(incomplete_dir),
status="failed",
)
return usage
def _snapshot_with_sizes( def _snapshot_with_sizes(
self, self,
host: HostConfig, host: HostConfig,

View File

@@ -8,11 +8,9 @@ from unittest.mock import patch
from django.contrib.auth import get_user_model from django.contrib.auth import get_user_model
from django.core.files.uploadedfile import SimpleUploadedFile from django.core.files.uploadedfile import SimpleUploadedFile
from django.template.defaultfilters import filesizeformat
from django.test import TestCase, override_settings from django.test import TestCase, override_settings
from django.urls import reverse from django.urls import reverse
from pobsync.run_stats import tree_usage
from pobsync.util import write_yaml_atomic from pobsync.util import write_yaml_atomic
from pobsync_backend.models import ( from pobsync_backend.models import (
BackupRun, BackupRun,
@@ -464,23 +462,12 @@ class ViewTests(TestCase):
db = HostConfig.objects.create(host="db-01", address="db-01.example.test") db = HostConfig.objects.create(host="db-01", address="db-01.example.test")
scheduled = self._snapshot(web, "20260519-021500Z__SCHED01", kind=SnapshotRecord.Kind.SCHEDULED) scheduled = self._snapshot(web, "20260519-021500Z__SCHED01", kind=SnapshotRecord.Kind.SCHEDULED)
manual = self._snapshot(web, "20260519-031500Z__MANUAL1", kind=SnapshotRecord.Kind.MANUAL) manual = self._snapshot(web, "20260519-031500Z__MANUAL1", kind=SnapshotRecord.Kind.MANUAL)
incomplete = self._snapshot(db, "20260519-041500Z__BROKEN1", kind=SnapshotRecord.Kind.INCOMPLETE)
self._set_snapshot_storage(scheduled, allocated=100) self._set_snapshot_storage(scheduled, allocated=100)
self._set_snapshot_storage(manual, allocated=200) self._set_snapshot_storage(manual, allocated=200)
with TemporaryDirectory() as tmp: self._set_snapshot_storage(incomplete, allocated=400)
incomplete_dir = Path(tmp) / db.host / ".incomplete" / "20260519-041500Z__BROKEN1"
data_dir = incomplete_dir / "data"
data_dir.mkdir(parents=True)
data_dir.joinpath("partial-file").write_text("interrupted backup data\n", encoding="utf-8")
expected_usage = tree_usage(data_dir)
SnapshotRecord.objects.create(
host=db,
kind=SnapshotRecord.Kind.INCOMPLETE,
dirname=incomplete_dir.name,
path=str(incomplete_dir),
status="failed",
)
response = self.client.get(reverse("dashboard_priority_live")) response = self.client.get(reverse("dashboard_priority_live"))
self.assertEqual(response.status_code, 200) self.assertEqual(response.status_code, 200)
self.assertContains(response, "Scheduled data") self.assertContains(response, "Scheduled data")
@@ -489,8 +476,8 @@ class ViewTests(TestCase):
self.assertContains(response, "Total snapshot data") self.assertContains(response, "Total snapshot data")
self.assertContains(response, "100&nbsp;bytes", html=True) self.assertContains(response, "100&nbsp;bytes", html=True)
self.assertContains(response, "200&nbsp;bytes", html=True) self.assertContains(response, "200&nbsp;bytes", html=True)
self.assertContains(response, filesizeformat(expected_usage["allocated_size_bytes"])) self.assertContains(response, "400&nbsp;bytes", html=True)
self.assertContains(response, filesizeformat(300 + expected_usage["allocated_size_bytes"])) self.assertContains(response, "700&nbsp;bytes", html=True)
def test_dashboard_hosts_live_returns_hosts_partial(self) -> None: def test_dashboard_hosts_live_returns_hosts_partial(self) -> None:
self.client.force_login(self.staff_user) self.client.force_login(self.staff_user)
@@ -510,23 +497,12 @@ class ViewTests(TestCase):
host = HostConfig.objects.create(host="web-01", address="web-01.example.test") host = HostConfig.objects.create(host="web-01", address="web-01.example.test")
scheduled = self._snapshot(host, "20260519-021500Z__SCHED01", kind=SnapshotRecord.Kind.SCHEDULED) scheduled = self._snapshot(host, "20260519-021500Z__SCHED01", kind=SnapshotRecord.Kind.SCHEDULED)
manual = self._snapshot(host, "20260519-031500Z__MANUAL1", kind=SnapshotRecord.Kind.MANUAL) manual = self._snapshot(host, "20260519-031500Z__MANUAL1", kind=SnapshotRecord.Kind.MANUAL)
incomplete = self._snapshot(host, "20260519-041500Z__BROKEN1", kind=SnapshotRecord.Kind.INCOMPLETE)
self._set_snapshot_storage(scheduled, allocated=100) self._set_snapshot_storage(scheduled, allocated=100)
self._set_snapshot_storage(manual, allocated=200) self._set_snapshot_storage(manual, allocated=200)
with TemporaryDirectory() as tmp: self._set_snapshot_storage(incomplete, allocated=400)
incomplete_dir = Path(tmp) / host.host / ".incomplete" / "20260519-041500Z__BROKEN1"
data_dir = incomplete_dir / "data"
data_dir.mkdir(parents=True)
data_dir.joinpath("partial-file").write_text("interrupted backup data\n", encoding="utf-8")
expected_usage = tree_usage(data_dir)
SnapshotRecord.objects.create(
host=host,
kind=SnapshotRecord.Kind.INCOMPLETE,
dirname=incomplete_dir.name,
path=str(incomplete_dir),
status="failed",
)
response = self.client.get(reverse("dashboard_hosts_live")) response = self.client.get(reverse("dashboard_hosts_live"))
self.assertEqual(response.status_code, 200) self.assertEqual(response.status_code, 200)
self.assertContains(response, "Scheduled data") self.assertContains(response, "Scheduled data")
@@ -535,32 +511,27 @@ class ViewTests(TestCase):
self.assertContains(response, "Total data") self.assertContains(response, "Total data")
self.assertContains(response, "100&nbsp;bytes", html=True) self.assertContains(response, "100&nbsp;bytes", html=True)
self.assertContains(response, "200&nbsp;bytes", html=True) self.assertContains(response, "200&nbsp;bytes", html=True)
self.assertContains(response, filesizeformat(expected_usage["allocated_size_bytes"])) self.assertContains(response, "400&nbsp;bytes", html=True)
self.assertContains(response, filesizeformat(300 + expected_usage["allocated_size_bytes"])) self.assertContains(response, "700&nbsp;bytes", html=True)
def test_dashboard_host_cards_measure_incomplete_data_without_snapshot_metadata(self) -> None: def test_dashboard_host_cards_mark_incomplete_data_without_snapshot_metadata_unmeasured(self) -> None:
self.client.force_login(self.staff_user) self.client.force_login(self.staff_user)
host = HostConfig.objects.create(host="web-01", address="web-01.example.test") host = HostConfig.objects.create(host="web-01", address="web-01.example.test")
with TemporaryDirectory() as tmp: SnapshotRecord.objects.create(
incomplete_dir = Path(tmp) / host.host / ".incomplete" / "20260519-041500Z__BROKEN1" host=host,
data_dir = incomplete_dir / "data" kind=SnapshotRecord.Kind.INCOMPLETE,
data_dir.mkdir(parents=True) dirname="20260519-041500Z__BROKEN1",
data_dir.joinpath("partial-file").write_text("interrupted backup data\n", encoding="utf-8") path="/backups/web-01/.incomplete/20260519-041500Z__BROKEN1",
expected_usage = tree_usage(data_dir) status="failed",
SnapshotRecord.objects.create( metadata={},
host=host, )
kind=SnapshotRecord.Kind.INCOMPLETE,
dirname=incomplete_dir.name,
path=str(incomplete_dir),
status="failed",
metadata={},
)
response = self.client.get(reverse("dashboard_hosts_live")) response = self.client.get(reverse("dashboard_hosts_live"))
self.assertEqual(response.status_code, 200) self.assertEqual(response.status_code, 200)
self.assertContains(response, "Incomplete data") self.assertContains(response, "Incomplete data")
self.assertContains(response, filesizeformat(expected_usage["allocated_size_bytes"])) self.assertContains(response, "0&nbsp;bytes", html=True)
self.assertContains(response, "1 not measured")
def test_hosts_list_renders_host_cards_and_controls(self) -> None: def test_hosts_list_renders_host_cards_and_controls(self) -> None:
self.client.force_login(self.staff_user) self.client.force_login(self.staff_user)