(bugfix) Measure incomplete snapshot data from disk
Use filesystem usage for incomplete snapshots instead of trusting potentially stale metadata, and expose unique non-hardlinked data totals for completed snapshots. Update dashboard and host storage summaries so incomplete data is visible and complete snapshot totals distinguish allocated and unique data.
This commit is contained in:
@@ -120,12 +120,15 @@ def _backup_data_by_kind(host: HostConfig) -> dict[str, Any]:
|
||||
row = rows.setdefault(snapshot.kind, _empty_snapshot_data_row())
|
||||
allocated = summary.get("allocated_size_bytes") or summary.get("apparent_size_bytes") or 0
|
||||
apparent = summary.get("apparent_size_bytes") or 0
|
||||
unique_apparent = summary.get("unique_apparent_size_bytes") or 0
|
||||
row["count"] += 1
|
||||
row["allocated_size_bytes"] += int(allocated)
|
||||
row["apparent_size_bytes"] += int(apparent)
|
||||
row["unique_apparent_size_bytes"] += int(unique_apparent)
|
||||
total["count"] += 1
|
||||
total["allocated_size_bytes"] += int(allocated)
|
||||
total["apparent_size_bytes"] += int(apparent)
|
||||
total["unique_apparent_size_bytes"] += int(unique_apparent)
|
||||
|
||||
return {
|
||||
"scheduled": rows[SnapshotRecord.Kind.SCHEDULED],
|
||||
@@ -140,6 +143,7 @@ def _empty_snapshot_data_row() -> dict[str, int]:
|
||||
"count": 0,
|
||||
"allocated_size_bytes": 0,
|
||||
"apparent_size_bytes": 0,
|
||||
"unique_apparent_size_bytes": 0,
|
||||
}
|
||||
|
||||
|
||||
@@ -157,6 +161,7 @@ def _sum_backup_data_by_kind(rows: Iterable[dict[str, dict[str, int]]]) -> dict[
|
||||
total_row["count"] += values.get("count", 0)
|
||||
total_row["allocated_size_bytes"] += values.get("allocated_size_bytes", 0)
|
||||
total_row["apparent_size_bytes"] += values.get("apparent_size_bytes", 0)
|
||||
total_row["unique_apparent_size_bytes"] += values.get("unique_apparent_size_bytes", 0)
|
||||
|
||||
return total_rows
|
||||
|
||||
@@ -168,21 +173,28 @@ def _snapshot_summary(snapshot: SnapshotRecord | None) -> dict[str, Any]:
|
||||
stats = metadata.get("stats") if isinstance(metadata.get("stats"), dict) else {}
|
||||
storage = stats.get("storage") if isinstance(stats.get("storage"), dict) else {}
|
||||
snapshot_storage = storage.get("snapshot") if isinstance(storage.get("snapshot"), dict) else {}
|
||||
has_recorded_size = (
|
||||
_int_at(snapshot_storage, "allocated_size_bytes") is not None
|
||||
or _int_at(snapshot_storage, "apparent_size_bytes") is not None
|
||||
)
|
||||
if not has_recorded_size:
|
||||
if snapshot.kind == SnapshotRecord.Kind.INCOMPLETE:
|
||||
snapshot_storage = _snapshot_storage_from_filesystem(snapshot)
|
||||
else:
|
||||
has_recorded_size = (
|
||||
_int_at(snapshot_storage, "allocated_size_bytes") is not None
|
||||
or _int_at(snapshot_storage, "apparent_size_bytes") is not None
|
||||
)
|
||||
if not has_recorded_size:
|
||||
snapshot_storage = _snapshot_storage_from_filesystem(snapshot)
|
||||
apparent_size = _int_at(snapshot_storage, "apparent_size_bytes")
|
||||
hardlinked_apparent = _int_at(snapshot_storage, "hardlinked_apparent_size_bytes") or 0
|
||||
return {
|
||||
"id": snapshot.id,
|
||||
"dirname": snapshot.dirname,
|
||||
"kind": snapshot.kind,
|
||||
"status": snapshot.status,
|
||||
"started_at": snapshot.started_at,
|
||||
"apparent_size_bytes": _int_at(snapshot_storage, "apparent_size_bytes"),
|
||||
"apparent_size_bytes": apparent_size,
|
||||
"allocated_size_bytes": _int_at(snapshot_storage, "allocated_size_bytes"),
|
||||
"hardlinked_files": _int_at(snapshot_storage, "hardlinked_files"),
|
||||
"hardlinked_apparent_size_bytes": hardlinked_apparent,
|
||||
"unique_apparent_size_bytes": max((apparent_size or 0) - hardlinked_apparent, 0),
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -178,21 +178,28 @@
|
||||
<div class="metric">
|
||||
<div class="label">Scheduled</div>
|
||||
<div class="value">{{ stats_summary.backup_data.scheduled.allocated_size_bytes|filesizeformat }}</div>
|
||||
<div class="muted">unique {{ stats_summary.backup_data.scheduled.unique_apparent_size_bytes|filesizeformat }}</div>
|
||||
</div>
|
||||
<div class="metric">
|
||||
<div class="label">Manual</div>
|
||||
<div class="value">{{ stats_summary.backup_data.manual.allocated_size_bytes|filesizeformat }}</div>
|
||||
<div class="muted">unique {{ stats_summary.backup_data.manual.unique_apparent_size_bytes|filesizeformat }}</div>
|
||||
</div>
|
||||
<div class="metric">
|
||||
<div class="label">Incomplete</div>
|
||||
<div class="value">{{ stats_summary.backup_data.incomplete.allocated_size_bytes|filesizeformat }}</div>
|
||||
<div class="muted">measured from disk</div>
|
||||
</div>
|
||||
<div class="metric">
|
||||
<div class="label">Total</div>
|
||||
<div class="value">{{ stats_summary.backup_data.total.allocated_size_bytes|filesizeformat }}</div>
|
||||
<div class="muted">unique {{ stats_summary.backup_data.total.unique_apparent_size_bytes|filesizeformat }}</div>
|
||||
</div>
|
||||
</section>
|
||||
<p class="muted">Totals use the allocated snapshot size recorded in backup metadata, grouped by snapshot kind.</p>
|
||||
<p class="muted">
|
||||
Main totals use allocated snapshot size. Unique values estimate non-hardlinked visible data; incomplete
|
||||
snapshots are measured from disk because their metadata can be stale.
|
||||
</p>
|
||||
</section>
|
||||
|
||||
{% if stats_summary.runs %}
|
||||
|
||||
@@ -105,18 +105,22 @@
|
||||
<div class="host-card-stat">
|
||||
<div class="label">Scheduled data</div>
|
||||
<div class="value">{{ host.stats_summary.backup_data.scheduled.allocated_size_bytes|filesizeformat }}</div>
|
||||
<div class="muted">unique {{ host.stats_summary.backup_data.scheduled.unique_apparent_size_bytes|filesizeformat }}</div>
|
||||
</div>
|
||||
<div class="host-card-stat">
|
||||
<div class="label">Manual data</div>
|
||||
<div class="value">{{ host.stats_summary.backup_data.manual.allocated_size_bytes|filesizeformat }}</div>
|
||||
<div class="muted">unique {{ host.stats_summary.backup_data.manual.unique_apparent_size_bytes|filesizeformat }}</div>
|
||||
</div>
|
||||
<div class="host-card-stat">
|
||||
<div class="label">Incomplete data</div>
|
||||
<div class="value">{{ host.stats_summary.backup_data.incomplete.allocated_size_bytes|filesizeformat }}</div>
|
||||
<div class="muted">measured from disk</div>
|
||||
</div>
|
||||
<div class="host-card-stat">
|
||||
<div class="label">Total data</div>
|
||||
<div class="value">{{ host.stats_summary.backup_data.total.allocated_size_bytes|filesizeformat }}</div>
|
||||
<div class="muted">unique {{ host.stats_summary.backup_data.total.unique_apparent_size_bytes|filesizeformat }}</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -130,18 +130,22 @@
|
||||
<div>
|
||||
<span class="label">Scheduled data</span>
|
||||
<strong>{{ stats_summary.backup_data.scheduled.allocated_size_bytes|filesizeformat }}</strong>
|
||||
<span class="muted">unique {{ stats_summary.backup_data.scheduled.unique_apparent_size_bytes|filesizeformat }}</span>
|
||||
</div>
|
||||
<div>
|
||||
<span class="label">Manual data</span>
|
||||
<strong>{{ stats_summary.backup_data.manual.allocated_size_bytes|filesizeformat }}</strong>
|
||||
<span class="muted">unique {{ stats_summary.backup_data.manual.unique_apparent_size_bytes|filesizeformat }}</span>
|
||||
</div>
|
||||
<div>
|
||||
<span class="label">Incomplete data</span>
|
||||
<strong>{{ stats_summary.backup_data.incomplete.allocated_size_bytes|filesizeformat }}</strong>
|
||||
<span class="muted">measured from disk</span>
|
||||
</div>
|
||||
<div>
|
||||
<span class="label">Total snapshot data</span>
|
||||
<strong>{{ stats_summary.backup_data.total.allocated_size_bytes|filesizeformat }}</strong>
|
||||
<span class="muted">unique {{ stats_summary.backup_data.total.unique_apparent_size_bytes|filesizeformat }}</span>
|
||||
</div>
|
||||
</div>
|
||||
</article>
|
||||
|
||||
@@ -18,32 +18,44 @@ class StatsSummaryTests(TestCase):
|
||||
self._snapshot(web, "20260519-021500Z__SCHED01", SnapshotRecord.Kind.SCHEDULED, allocated=100)
|
||||
self._snapshot(web, "20260519-031500Z__MANUAL1", SnapshotRecord.Kind.MANUAL, allocated=200)
|
||||
self._snapshot(db, "20260519-041500Z__SCHED02", SnapshotRecord.Kind.SCHEDULED, allocated=300)
|
||||
self._snapshot(db, "20260519-051500Z__BROKEN1", SnapshotRecord.Kind.INCOMPLETE, allocated=400)
|
||||
with TemporaryDirectory() as tmp:
|
||||
incomplete_usage = self._incomplete_snapshot_on_disk(
|
||||
db,
|
||||
Path(tmp),
|
||||
"20260519-051500Z__BROKEN1",
|
||||
)
|
||||
|
||||
stats = collect_dashboard_stats(hosts=[web, db], global_config=None)
|
||||
stats = collect_dashboard_stats(hosts=[web, db], global_config=None)
|
||||
|
||||
self.assertEqual(stats["backup_data"]["scheduled"]["count"], 2)
|
||||
self.assertEqual(stats["backup_data"]["scheduled"]["allocated_size_bytes"], 400)
|
||||
self.assertEqual(stats["backup_data"]["manual"]["allocated_size_bytes"], 200)
|
||||
self.assertEqual(stats["backup_data"]["incomplete"]["allocated_size_bytes"], 400)
|
||||
self.assertEqual(stats["backup_data"]["incomplete"]["allocated_size_bytes"], incomplete_usage["allocated_size_bytes"])
|
||||
self.assertEqual(stats["backup_data"]["total"]["count"], 4)
|
||||
self.assertEqual(stats["backup_data"]["total"]["allocated_size_bytes"], 1000)
|
||||
self.assertEqual(stats["backup_data"]["total"]["allocated_size_bytes"], 600 + incomplete_usage["allocated_size_bytes"])
|
||||
self.assertEqual(stats["backup_data"]["total"]["unique_apparent_size_bytes"], 1200 + incomplete_usage["apparent_size_bytes"])
|
||||
|
||||
def test_collect_host_stats_sums_backup_data_by_snapshot_kind(self) -> None:
|
||||
host = HostConfig.objects.create(host="web-01", address="web-01.example.test")
|
||||
self._snapshot(host, "20260519-021500Z__SCHED01", SnapshotRecord.Kind.SCHEDULED, allocated=100)
|
||||
self._snapshot(host, "20260519-031500Z__SCHED02", SnapshotRecord.Kind.SCHEDULED, allocated=200)
|
||||
self._snapshot(host, "20260519-041500Z__MANUAL1", SnapshotRecord.Kind.MANUAL, allocated=300)
|
||||
self._snapshot(host, "20260519-051500Z__BROKEN1", SnapshotRecord.Kind.INCOMPLETE, allocated=400)
|
||||
with TemporaryDirectory() as tmp:
|
||||
incomplete_usage = self._incomplete_snapshot_on_disk(
|
||||
host,
|
||||
Path(tmp),
|
||||
"20260519-051500Z__BROKEN1",
|
||||
)
|
||||
|
||||
stats = collect_host_stats(host=host)
|
||||
stats = collect_host_stats(host=host)
|
||||
|
||||
self.assertEqual(stats["backup_data"]["scheduled"]["count"], 2)
|
||||
self.assertEqual(stats["backup_data"]["scheduled"]["allocated_size_bytes"], 300)
|
||||
self.assertEqual(stats["backup_data"]["manual"]["allocated_size_bytes"], 300)
|
||||
self.assertEqual(stats["backup_data"]["incomplete"]["allocated_size_bytes"], 400)
|
||||
self.assertEqual(stats["backup_data"]["incomplete"]["allocated_size_bytes"], incomplete_usage["allocated_size_bytes"])
|
||||
self.assertEqual(stats["backup_data"]["total"]["count"], 4)
|
||||
self.assertEqual(stats["backup_data"]["total"]["allocated_size_bytes"], 1000)
|
||||
self.assertEqual(stats["backup_data"]["total"]["allocated_size_bytes"], 600 + incomplete_usage["allocated_size_bytes"])
|
||||
self.assertEqual(stats["backup_data"]["total"]["unique_apparent_size_bytes"], 1200 + incomplete_usage["apparent_size_bytes"])
|
||||
|
||||
def test_collect_host_stats_falls_back_to_filesystem_usage_for_snapshots_without_metadata(self) -> None:
|
||||
host = HostConfig.objects.create(host="web-01", address="web-01.example.test")
|
||||
@@ -81,8 +93,87 @@ class StatsSummaryTests(TestCase):
|
||||
expected_usage["allocated_size_bytes"],
|
||||
)
|
||||
|
||||
def test_collect_host_stats_measures_incomplete_data_from_disk_even_with_stale_metadata(self) -> None:
|
||||
host = HostConfig.objects.create(host="web-01", address="web-01.example.test")
|
||||
with TemporaryDirectory() as tmp:
|
||||
incomplete_dir = Path(tmp) / host.host / ".incomplete" / "20260519-051500Z__BROKEN1"
|
||||
data_dir = incomplete_dir / "data"
|
||||
data_dir.mkdir(parents=True)
|
||||
data_dir.joinpath("partial-file").write_text("interrupted backup data\n", encoding="utf-8")
|
||||
expected_usage = tree_usage(data_dir)
|
||||
SnapshotRecord.objects.create(
|
||||
host=host,
|
||||
kind=SnapshotRecord.Kind.INCOMPLETE,
|
||||
dirname=incomplete_dir.name,
|
||||
path=str(incomplete_dir),
|
||||
status="failed",
|
||||
metadata={
|
||||
"stats": {
|
||||
"storage": {
|
||||
"snapshot": {
|
||||
"apparent_size_bytes": 0,
|
||||
"allocated_size_bytes": 0,
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
)
|
||||
|
||||
stats = collect_host_stats(host=host)
|
||||
|
||||
self.assertEqual(
|
||||
stats["backup_data"]["incomplete"]["allocated_size_bytes"],
|
||||
expected_usage["allocated_size_bytes"],
|
||||
)
|
||||
self.assertGreater(stats["backup_data"]["incomplete"]["apparent_size_bytes"], 0)
|
||||
|
||||
def test_collect_host_stats_reports_non_hardlinked_snapshot_data(self) -> None:
|
||||
host = HostConfig.objects.create(host="web-01", address="web-01.example.test")
|
||||
self._snapshot_with_sizes(
|
||||
host,
|
||||
"20260519-021500Z__SCHED01",
|
||||
SnapshotRecord.Kind.SCHEDULED,
|
||||
allocated=1_200,
|
||||
apparent=2_000,
|
||||
hardlinked_apparent=1_500,
|
||||
)
|
||||
|
||||
stats = collect_host_stats(host=host)
|
||||
|
||||
self.assertEqual(stats["backup_data"]["scheduled"]["apparent_size_bytes"], 2_000)
|
||||
self.assertEqual(stats["backup_data"]["scheduled"]["unique_apparent_size_bytes"], 500)
|
||||
self.assertEqual(stats["backup_data"]["total"]["unique_apparent_size_bytes"], 500)
|
||||
|
||||
def _snapshot(self, host: HostConfig, dirname: str, kind: str, *, allocated: int) -> SnapshotRecord:
|
||||
return self._snapshot_with_sizes(host, dirname, kind, allocated=allocated)
|
||||
|
||||
def _incomplete_snapshot_on_disk(self, host: HostConfig, root: Path, dirname: str) -> dict:
|
||||
incomplete_dir = root / host.host / ".incomplete" / dirname
|
||||
data_dir = incomplete_dir / "data"
|
||||
data_dir.mkdir(parents=True)
|
||||
data_dir.joinpath("partial-file").write_text("interrupted backup data\n", encoding="utf-8")
|
||||
usage = tree_usage(data_dir)
|
||||
SnapshotRecord.objects.create(
|
||||
host=host,
|
||||
kind=SnapshotRecord.Kind.INCOMPLETE,
|
||||
dirname=dirname,
|
||||
path=str(incomplete_dir),
|
||||
status="failed",
|
||||
)
|
||||
return usage
|
||||
|
||||
def _snapshot_with_sizes(
|
||||
self,
|
||||
host: HostConfig,
|
||||
dirname: str,
|
||||
kind: str,
|
||||
*,
|
||||
allocated: int,
|
||||
apparent: int | None = None,
|
||||
hardlinked_apparent: int = 0,
|
||||
) -> SnapshotRecord:
|
||||
started_at = datetime.strptime(dirname.split("__", 1)[0], "%Y%m%d-%H%M%SZ").replace(tzinfo=timezone.utc)
|
||||
apparent_size = apparent if apparent is not None else allocated * 2
|
||||
return SnapshotRecord.objects.create(
|
||||
host=host,
|
||||
kind=kind,
|
||||
@@ -94,8 +185,9 @@ class StatsSummaryTests(TestCase):
|
||||
"stats": {
|
||||
"storage": {
|
||||
"snapshot": {
|
||||
"apparent_size_bytes": allocated * 2,
|
||||
"apparent_size_bytes": apparent_size,
|
||||
"allocated_size_bytes": allocated,
|
||||
"hardlinked_apparent_size_bytes": hardlinked_apparent,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -265,12 +265,23 @@ class ViewTests(TestCase):
|
||||
db = HostConfig.objects.create(host="db-01", address="db-01.example.test")
|
||||
scheduled = self._snapshot(web, "20260519-021500Z__SCHED01", kind=SnapshotRecord.Kind.SCHEDULED)
|
||||
manual = self._snapshot(web, "20260519-031500Z__MANUAL1", kind=SnapshotRecord.Kind.MANUAL)
|
||||
incomplete = self._snapshot(db, "20260519-041500Z__BROKEN1", kind=SnapshotRecord.Kind.INCOMPLETE)
|
||||
self._set_snapshot_storage(scheduled, allocated=100)
|
||||
self._set_snapshot_storage(manual, allocated=200)
|
||||
self._set_snapshot_storage(incomplete, allocated=300)
|
||||
with TemporaryDirectory() as tmp:
|
||||
incomplete_dir = Path(tmp) / db.host / ".incomplete" / "20260519-041500Z__BROKEN1"
|
||||
data_dir = incomplete_dir / "data"
|
||||
data_dir.mkdir(parents=True)
|
||||
data_dir.joinpath("partial-file").write_text("interrupted backup data\n", encoding="utf-8")
|
||||
expected_usage = tree_usage(data_dir)
|
||||
SnapshotRecord.objects.create(
|
||||
host=db,
|
||||
kind=SnapshotRecord.Kind.INCOMPLETE,
|
||||
dirname=incomplete_dir.name,
|
||||
path=str(incomplete_dir),
|
||||
status="failed",
|
||||
)
|
||||
|
||||
response = self.client.get(reverse("dashboard_priority_live"))
|
||||
response = self.client.get(reverse("dashboard_priority_live"))
|
||||
|
||||
self.assertEqual(response.status_code, 200)
|
||||
self.assertContains(response, "Scheduled data")
|
||||
@@ -279,8 +290,8 @@ class ViewTests(TestCase):
|
||||
self.assertContains(response, "Total snapshot data")
|
||||
self.assertContains(response, "100 bytes", html=True)
|
||||
self.assertContains(response, "200 bytes", html=True)
|
||||
self.assertContains(response, "300 bytes", html=True)
|
||||
self.assertContains(response, "600 bytes", html=True)
|
||||
self.assertContains(response, filesizeformat(expected_usage["allocated_size_bytes"]))
|
||||
self.assertContains(response, filesizeformat(300 + expected_usage["allocated_size_bytes"]))
|
||||
|
||||
def test_dashboard_hosts_live_returns_hosts_partial(self) -> None:
|
||||
self.client.force_login(self.staff_user)
|
||||
@@ -300,12 +311,23 @@ class ViewTests(TestCase):
|
||||
host = HostConfig.objects.create(host="web-01", address="web-01.example.test")
|
||||
scheduled = self._snapshot(host, "20260519-021500Z__SCHED01", kind=SnapshotRecord.Kind.SCHEDULED)
|
||||
manual = self._snapshot(host, "20260519-031500Z__MANUAL1", kind=SnapshotRecord.Kind.MANUAL)
|
||||
incomplete = self._snapshot(host, "20260519-041500Z__BROKEN1", kind=SnapshotRecord.Kind.INCOMPLETE)
|
||||
self._set_snapshot_storage(scheduled, allocated=100)
|
||||
self._set_snapshot_storage(manual, allocated=200)
|
||||
self._set_snapshot_storage(incomplete, allocated=300)
|
||||
with TemporaryDirectory() as tmp:
|
||||
incomplete_dir = Path(tmp) / host.host / ".incomplete" / "20260519-041500Z__BROKEN1"
|
||||
data_dir = incomplete_dir / "data"
|
||||
data_dir.mkdir(parents=True)
|
||||
data_dir.joinpath("partial-file").write_text("interrupted backup data\n", encoding="utf-8")
|
||||
expected_usage = tree_usage(data_dir)
|
||||
SnapshotRecord.objects.create(
|
||||
host=host,
|
||||
kind=SnapshotRecord.Kind.INCOMPLETE,
|
||||
dirname=incomplete_dir.name,
|
||||
path=str(incomplete_dir),
|
||||
status="failed",
|
||||
)
|
||||
|
||||
response = self.client.get(reverse("dashboard_hosts_live"))
|
||||
response = self.client.get(reverse("dashboard_hosts_live"))
|
||||
|
||||
self.assertEqual(response.status_code, 200)
|
||||
self.assertContains(response, "Scheduled data")
|
||||
@@ -314,8 +336,8 @@ class ViewTests(TestCase):
|
||||
self.assertContains(response, "Total data")
|
||||
self.assertContains(response, "100 bytes", html=True)
|
||||
self.assertContains(response, "200 bytes", html=True)
|
||||
self.assertContains(response, "300 bytes", html=True)
|
||||
self.assertContains(response, "600 bytes", html=True)
|
||||
self.assertContains(response, filesizeformat(expected_usage["allocated_size_bytes"]))
|
||||
self.assertContains(response, filesizeformat(300 + expected_usage["allocated_size_bytes"]))
|
||||
|
||||
def test_dashboard_host_cards_measure_incomplete_data_without_snapshot_metadata(self) -> None:
|
||||
self.client.force_login(self.staff_user)
|
||||
|
||||
Reference in New Issue
Block a user