Merge pull request '(bugfix) Measure incomplete snapshot data from disk' (#70) from issue-69-data-overview-incomplete-sizes into master
Reviewed-on: #70
This commit was merged in pull request #70.
This commit is contained in:
@@ -120,12 +120,15 @@ def _backup_data_by_kind(host: HostConfig) -> dict[str, Any]:
|
||||
row = rows.setdefault(snapshot.kind, _empty_snapshot_data_row())
|
||||
allocated = summary.get("allocated_size_bytes") or summary.get("apparent_size_bytes") or 0
|
||||
apparent = summary.get("apparent_size_bytes") or 0
|
||||
unique_apparent = summary.get("unique_apparent_size_bytes") or 0
|
||||
row["count"] += 1
|
||||
row["allocated_size_bytes"] += int(allocated)
|
||||
row["apparent_size_bytes"] += int(apparent)
|
||||
row["unique_apparent_size_bytes"] += int(unique_apparent)
|
||||
total["count"] += 1
|
||||
total["allocated_size_bytes"] += int(allocated)
|
||||
total["apparent_size_bytes"] += int(apparent)
|
||||
total["unique_apparent_size_bytes"] += int(unique_apparent)
|
||||
|
||||
return {
|
||||
"scheduled": rows[SnapshotRecord.Kind.SCHEDULED],
|
||||
@@ -140,6 +143,7 @@ def _empty_snapshot_data_row() -> dict[str, int]:
|
||||
"count": 0,
|
||||
"allocated_size_bytes": 0,
|
||||
"apparent_size_bytes": 0,
|
||||
"unique_apparent_size_bytes": 0,
|
||||
}
|
||||
|
||||
|
||||
@@ -157,6 +161,7 @@ def _sum_backup_data_by_kind(rows: Iterable[dict[str, dict[str, int]]]) -> dict[
|
||||
total_row["count"] += values.get("count", 0)
|
||||
total_row["allocated_size_bytes"] += values.get("allocated_size_bytes", 0)
|
||||
total_row["apparent_size_bytes"] += values.get("apparent_size_bytes", 0)
|
||||
total_row["unique_apparent_size_bytes"] += values.get("unique_apparent_size_bytes", 0)
|
||||
|
||||
return total_rows
|
||||
|
||||
@@ -168,21 +173,28 @@ def _snapshot_summary(snapshot: SnapshotRecord | None) -> dict[str, Any]:
|
||||
stats = metadata.get("stats") if isinstance(metadata.get("stats"), dict) else {}
|
||||
storage = stats.get("storage") if isinstance(stats.get("storage"), dict) else {}
|
||||
snapshot_storage = storage.get("snapshot") if isinstance(storage.get("snapshot"), dict) else {}
|
||||
has_recorded_size = (
|
||||
_int_at(snapshot_storage, "allocated_size_bytes") is not None
|
||||
or _int_at(snapshot_storage, "apparent_size_bytes") is not None
|
||||
)
|
||||
if not has_recorded_size:
|
||||
if snapshot.kind == SnapshotRecord.Kind.INCOMPLETE:
|
||||
snapshot_storage = _snapshot_storage_from_filesystem(snapshot)
|
||||
else:
|
||||
has_recorded_size = (
|
||||
_int_at(snapshot_storage, "allocated_size_bytes") is not None
|
||||
or _int_at(snapshot_storage, "apparent_size_bytes") is not None
|
||||
)
|
||||
if not has_recorded_size:
|
||||
snapshot_storage = _snapshot_storage_from_filesystem(snapshot)
|
||||
apparent_size = _int_at(snapshot_storage, "apparent_size_bytes")
|
||||
hardlinked_apparent = _int_at(snapshot_storage, "hardlinked_apparent_size_bytes") or 0
|
||||
return {
|
||||
"id": snapshot.id,
|
||||
"dirname": snapshot.dirname,
|
||||
"kind": snapshot.kind,
|
||||
"status": snapshot.status,
|
||||
"started_at": snapshot.started_at,
|
||||
"apparent_size_bytes": _int_at(snapshot_storage, "apparent_size_bytes"),
|
||||
"apparent_size_bytes": apparent_size,
|
||||
"allocated_size_bytes": _int_at(snapshot_storage, "allocated_size_bytes"),
|
||||
"hardlinked_files": _int_at(snapshot_storage, "hardlinked_files"),
|
||||
"hardlinked_apparent_size_bytes": hardlinked_apparent,
|
||||
"unique_apparent_size_bytes": max((apparent_size or 0) - hardlinked_apparent, 0),
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -178,21 +178,28 @@
|
||||
<div class="metric">
|
||||
<div class="label">Scheduled</div>
|
||||
<div class="value">{{ stats_summary.backup_data.scheduled.allocated_size_bytes|filesizeformat }}</div>
|
||||
<div class="muted">unique {{ stats_summary.backup_data.scheduled.unique_apparent_size_bytes|filesizeformat }}</div>
|
||||
</div>
|
||||
<div class="metric">
|
||||
<div class="label">Manual</div>
|
||||
<div class="value">{{ stats_summary.backup_data.manual.allocated_size_bytes|filesizeformat }}</div>
|
||||
<div class="muted">unique {{ stats_summary.backup_data.manual.unique_apparent_size_bytes|filesizeformat }}</div>
|
||||
</div>
|
||||
<div class="metric">
|
||||
<div class="label">Incomplete</div>
|
||||
<div class="value">{{ stats_summary.backup_data.incomplete.allocated_size_bytes|filesizeformat }}</div>
|
||||
<div class="muted">measured from disk</div>
|
||||
</div>
|
||||
<div class="metric">
|
||||
<div class="label">Total</div>
|
||||
<div class="value">{{ stats_summary.backup_data.total.allocated_size_bytes|filesizeformat }}</div>
|
||||
<div class="muted">unique {{ stats_summary.backup_data.total.unique_apparent_size_bytes|filesizeformat }}</div>
|
||||
</div>
|
||||
</section>
|
||||
<p class="muted">Totals use the allocated snapshot size recorded in backup metadata, grouped by snapshot kind.</p>
|
||||
<p class="muted">
|
||||
Main totals use allocated snapshot size. Unique values estimate non-hardlinked visible data; incomplete
|
||||
snapshots are measured from disk because their metadata can be stale.
|
||||
</p>
|
||||
</section>
|
||||
|
||||
{% if stats_summary.runs %}
|
||||
|
||||
@@ -105,18 +105,22 @@
|
||||
<div class="host-card-stat">
|
||||
<div class="label">Scheduled data</div>
|
||||
<div class="value">{{ host.stats_summary.backup_data.scheduled.allocated_size_bytes|filesizeformat }}</div>
|
||||
<div class="muted">unique {{ host.stats_summary.backup_data.scheduled.unique_apparent_size_bytes|filesizeformat }}</div>
|
||||
</div>
|
||||
<div class="host-card-stat">
|
||||
<div class="label">Manual data</div>
|
||||
<div class="value">{{ host.stats_summary.backup_data.manual.allocated_size_bytes|filesizeformat }}</div>
|
||||
<div class="muted">unique {{ host.stats_summary.backup_data.manual.unique_apparent_size_bytes|filesizeformat }}</div>
|
||||
</div>
|
||||
<div class="host-card-stat">
|
||||
<div class="label">Incomplete data</div>
|
||||
<div class="value">{{ host.stats_summary.backup_data.incomplete.allocated_size_bytes|filesizeformat }}</div>
|
||||
<div class="muted">measured from disk</div>
|
||||
</div>
|
||||
<div class="host-card-stat">
|
||||
<div class="label">Total data</div>
|
||||
<div class="value">{{ host.stats_summary.backup_data.total.allocated_size_bytes|filesizeformat }}</div>
|
||||
<div class="muted">unique {{ host.stats_summary.backup_data.total.unique_apparent_size_bytes|filesizeformat }}</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -130,18 +130,22 @@
|
||||
<div>
|
||||
<span class="label">Scheduled data</span>
|
||||
<strong>{{ stats_summary.backup_data.scheduled.allocated_size_bytes|filesizeformat }}</strong>
|
||||
<span class="muted">unique {{ stats_summary.backup_data.scheduled.unique_apparent_size_bytes|filesizeformat }}</span>
|
||||
</div>
|
||||
<div>
|
||||
<span class="label">Manual data</span>
|
||||
<strong>{{ stats_summary.backup_data.manual.allocated_size_bytes|filesizeformat }}</strong>
|
||||
<span class="muted">unique {{ stats_summary.backup_data.manual.unique_apparent_size_bytes|filesizeformat }}</span>
|
||||
</div>
|
||||
<div>
|
||||
<span class="label">Incomplete data</span>
|
||||
<strong>{{ stats_summary.backup_data.incomplete.allocated_size_bytes|filesizeformat }}</strong>
|
||||
<span class="muted">measured from disk</span>
|
||||
</div>
|
||||
<div>
|
||||
<span class="label">Total snapshot data</span>
|
||||
<strong>{{ stats_summary.backup_data.total.allocated_size_bytes|filesizeformat }}</strong>
|
||||
<span class="muted">unique {{ stats_summary.backup_data.total.unique_apparent_size_bytes|filesizeformat }}</span>
|
||||
</div>
|
||||
</div>
|
||||
</article>
|
||||
|
||||
@@ -18,32 +18,44 @@ class StatsSummaryTests(TestCase):
|
||||
self._snapshot(web, "20260519-021500Z__SCHED01", SnapshotRecord.Kind.SCHEDULED, allocated=100)
|
||||
self._snapshot(web, "20260519-031500Z__MANUAL1", SnapshotRecord.Kind.MANUAL, allocated=200)
|
||||
self._snapshot(db, "20260519-041500Z__SCHED02", SnapshotRecord.Kind.SCHEDULED, allocated=300)
|
||||
self._snapshot(db, "20260519-051500Z__BROKEN1", SnapshotRecord.Kind.INCOMPLETE, allocated=400)
|
||||
with TemporaryDirectory() as tmp:
|
||||
incomplete_usage = self._incomplete_snapshot_on_disk(
|
||||
db,
|
||||
Path(tmp),
|
||||
"20260519-051500Z__BROKEN1",
|
||||
)
|
||||
|
||||
stats = collect_dashboard_stats(hosts=[web, db], global_config=None)
|
||||
stats = collect_dashboard_stats(hosts=[web, db], global_config=None)
|
||||
|
||||
self.assertEqual(stats["backup_data"]["scheduled"]["count"], 2)
|
||||
self.assertEqual(stats["backup_data"]["scheduled"]["allocated_size_bytes"], 400)
|
||||
self.assertEqual(stats["backup_data"]["manual"]["allocated_size_bytes"], 200)
|
||||
self.assertEqual(stats["backup_data"]["incomplete"]["allocated_size_bytes"], 400)
|
||||
self.assertEqual(stats["backup_data"]["incomplete"]["allocated_size_bytes"], incomplete_usage["allocated_size_bytes"])
|
||||
self.assertEqual(stats["backup_data"]["total"]["count"], 4)
|
||||
self.assertEqual(stats["backup_data"]["total"]["allocated_size_bytes"], 1000)
|
||||
self.assertEqual(stats["backup_data"]["total"]["allocated_size_bytes"], 600 + incomplete_usage["allocated_size_bytes"])
|
||||
self.assertEqual(stats["backup_data"]["total"]["unique_apparent_size_bytes"], 1200 + incomplete_usage["apparent_size_bytes"])
|
||||
|
||||
def test_collect_host_stats_sums_backup_data_by_snapshot_kind(self) -> None:
|
||||
host = HostConfig.objects.create(host="web-01", address="web-01.example.test")
|
||||
self._snapshot(host, "20260519-021500Z__SCHED01", SnapshotRecord.Kind.SCHEDULED, allocated=100)
|
||||
self._snapshot(host, "20260519-031500Z__SCHED02", SnapshotRecord.Kind.SCHEDULED, allocated=200)
|
||||
self._snapshot(host, "20260519-041500Z__MANUAL1", SnapshotRecord.Kind.MANUAL, allocated=300)
|
||||
self._snapshot(host, "20260519-051500Z__BROKEN1", SnapshotRecord.Kind.INCOMPLETE, allocated=400)
|
||||
with TemporaryDirectory() as tmp:
|
||||
incomplete_usage = self._incomplete_snapshot_on_disk(
|
||||
host,
|
||||
Path(tmp),
|
||||
"20260519-051500Z__BROKEN1",
|
||||
)
|
||||
|
||||
stats = collect_host_stats(host=host)
|
||||
stats = collect_host_stats(host=host)
|
||||
|
||||
self.assertEqual(stats["backup_data"]["scheduled"]["count"], 2)
|
||||
self.assertEqual(stats["backup_data"]["scheduled"]["allocated_size_bytes"], 300)
|
||||
self.assertEqual(stats["backup_data"]["manual"]["allocated_size_bytes"], 300)
|
||||
self.assertEqual(stats["backup_data"]["incomplete"]["allocated_size_bytes"], 400)
|
||||
self.assertEqual(stats["backup_data"]["incomplete"]["allocated_size_bytes"], incomplete_usage["allocated_size_bytes"])
|
||||
self.assertEqual(stats["backup_data"]["total"]["count"], 4)
|
||||
self.assertEqual(stats["backup_data"]["total"]["allocated_size_bytes"], 1000)
|
||||
self.assertEqual(stats["backup_data"]["total"]["allocated_size_bytes"], 600 + incomplete_usage["allocated_size_bytes"])
|
||||
self.assertEqual(stats["backup_data"]["total"]["unique_apparent_size_bytes"], 1200 + incomplete_usage["apparent_size_bytes"])
|
||||
|
||||
def test_collect_host_stats_falls_back_to_filesystem_usage_for_snapshots_without_metadata(self) -> None:
|
||||
host = HostConfig.objects.create(host="web-01", address="web-01.example.test")
|
||||
@@ -81,8 +93,87 @@ class StatsSummaryTests(TestCase):
|
||||
expected_usage["allocated_size_bytes"],
|
||||
)
|
||||
|
||||
def test_collect_host_stats_measures_incomplete_data_from_disk_even_with_stale_metadata(self) -> None:
|
||||
host = HostConfig.objects.create(host="web-01", address="web-01.example.test")
|
||||
with TemporaryDirectory() as tmp:
|
||||
incomplete_dir = Path(tmp) / host.host / ".incomplete" / "20260519-051500Z__BROKEN1"
|
||||
data_dir = incomplete_dir / "data"
|
||||
data_dir.mkdir(parents=True)
|
||||
data_dir.joinpath("partial-file").write_text("interrupted backup data\n", encoding="utf-8")
|
||||
expected_usage = tree_usage(data_dir)
|
||||
SnapshotRecord.objects.create(
|
||||
host=host,
|
||||
kind=SnapshotRecord.Kind.INCOMPLETE,
|
||||
dirname=incomplete_dir.name,
|
||||
path=str(incomplete_dir),
|
||||
status="failed",
|
||||
metadata={
|
||||
"stats": {
|
||||
"storage": {
|
||||
"snapshot": {
|
||||
"apparent_size_bytes": 0,
|
||||
"allocated_size_bytes": 0,
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
)
|
||||
|
||||
stats = collect_host_stats(host=host)
|
||||
|
||||
self.assertEqual(
|
||||
stats["backup_data"]["incomplete"]["allocated_size_bytes"],
|
||||
expected_usage["allocated_size_bytes"],
|
||||
)
|
||||
self.assertGreater(stats["backup_data"]["incomplete"]["apparent_size_bytes"], 0)
|
||||
|
||||
def test_collect_host_stats_reports_non_hardlinked_snapshot_data(self) -> None:
|
||||
host = HostConfig.objects.create(host="web-01", address="web-01.example.test")
|
||||
self._snapshot_with_sizes(
|
||||
host,
|
||||
"20260519-021500Z__SCHED01",
|
||||
SnapshotRecord.Kind.SCHEDULED,
|
||||
allocated=1_200,
|
||||
apparent=2_000,
|
||||
hardlinked_apparent=1_500,
|
||||
)
|
||||
|
||||
stats = collect_host_stats(host=host)
|
||||
|
||||
self.assertEqual(stats["backup_data"]["scheduled"]["apparent_size_bytes"], 2_000)
|
||||
self.assertEqual(stats["backup_data"]["scheduled"]["unique_apparent_size_bytes"], 500)
|
||||
self.assertEqual(stats["backup_data"]["total"]["unique_apparent_size_bytes"], 500)
|
||||
|
||||
def _snapshot(self, host: HostConfig, dirname: str, kind: str, *, allocated: int) -> SnapshotRecord:
|
||||
return self._snapshot_with_sizes(host, dirname, kind, allocated=allocated)
|
||||
|
||||
def _incomplete_snapshot_on_disk(self, host: HostConfig, root: Path, dirname: str) -> dict:
|
||||
incomplete_dir = root / host.host / ".incomplete" / dirname
|
||||
data_dir = incomplete_dir / "data"
|
||||
data_dir.mkdir(parents=True)
|
||||
data_dir.joinpath("partial-file").write_text("interrupted backup data\n", encoding="utf-8")
|
||||
usage = tree_usage(data_dir)
|
||||
SnapshotRecord.objects.create(
|
||||
host=host,
|
||||
kind=SnapshotRecord.Kind.INCOMPLETE,
|
||||
dirname=dirname,
|
||||
path=str(incomplete_dir),
|
||||
status="failed",
|
||||
)
|
||||
return usage
|
||||
|
||||
def _snapshot_with_sizes(
|
||||
self,
|
||||
host: HostConfig,
|
||||
dirname: str,
|
||||
kind: str,
|
||||
*,
|
||||
allocated: int,
|
||||
apparent: int | None = None,
|
||||
hardlinked_apparent: int = 0,
|
||||
) -> SnapshotRecord:
|
||||
started_at = datetime.strptime(dirname.split("__", 1)[0], "%Y%m%d-%H%M%SZ").replace(tzinfo=timezone.utc)
|
||||
apparent_size = apparent if apparent is not None else allocated * 2
|
||||
return SnapshotRecord.objects.create(
|
||||
host=host,
|
||||
kind=kind,
|
||||
@@ -94,8 +185,9 @@ class StatsSummaryTests(TestCase):
|
||||
"stats": {
|
||||
"storage": {
|
||||
"snapshot": {
|
||||
"apparent_size_bytes": allocated * 2,
|
||||
"apparent_size_bytes": apparent_size,
|
||||
"allocated_size_bytes": allocated,
|
||||
"hardlinked_apparent_size_bytes": hardlinked_apparent,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -265,12 +265,23 @@ class ViewTests(TestCase):
|
||||
db = HostConfig.objects.create(host="db-01", address="db-01.example.test")
|
||||
scheduled = self._snapshot(web, "20260519-021500Z__SCHED01", kind=SnapshotRecord.Kind.SCHEDULED)
|
||||
manual = self._snapshot(web, "20260519-031500Z__MANUAL1", kind=SnapshotRecord.Kind.MANUAL)
|
||||
incomplete = self._snapshot(db, "20260519-041500Z__BROKEN1", kind=SnapshotRecord.Kind.INCOMPLETE)
|
||||
self._set_snapshot_storage(scheduled, allocated=100)
|
||||
self._set_snapshot_storage(manual, allocated=200)
|
||||
self._set_snapshot_storage(incomplete, allocated=300)
|
||||
with TemporaryDirectory() as tmp:
|
||||
incomplete_dir = Path(tmp) / db.host / ".incomplete" / "20260519-041500Z__BROKEN1"
|
||||
data_dir = incomplete_dir / "data"
|
||||
data_dir.mkdir(parents=True)
|
||||
data_dir.joinpath("partial-file").write_text("interrupted backup data\n", encoding="utf-8")
|
||||
expected_usage = tree_usage(data_dir)
|
||||
SnapshotRecord.objects.create(
|
||||
host=db,
|
||||
kind=SnapshotRecord.Kind.INCOMPLETE,
|
||||
dirname=incomplete_dir.name,
|
||||
path=str(incomplete_dir),
|
||||
status="failed",
|
||||
)
|
||||
|
||||
response = self.client.get(reverse("dashboard_priority_live"))
|
||||
response = self.client.get(reverse("dashboard_priority_live"))
|
||||
|
||||
self.assertEqual(response.status_code, 200)
|
||||
self.assertContains(response, "Scheduled data")
|
||||
@@ -279,8 +290,8 @@ class ViewTests(TestCase):
|
||||
self.assertContains(response, "Total snapshot data")
|
||||
self.assertContains(response, "100 bytes", html=True)
|
||||
self.assertContains(response, "200 bytes", html=True)
|
||||
self.assertContains(response, "300 bytes", html=True)
|
||||
self.assertContains(response, "600 bytes", html=True)
|
||||
self.assertContains(response, filesizeformat(expected_usage["allocated_size_bytes"]))
|
||||
self.assertContains(response, filesizeformat(300 + expected_usage["allocated_size_bytes"]))
|
||||
|
||||
def test_dashboard_hosts_live_returns_hosts_partial(self) -> None:
|
||||
self.client.force_login(self.staff_user)
|
||||
@@ -300,12 +311,23 @@ class ViewTests(TestCase):
|
||||
host = HostConfig.objects.create(host="web-01", address="web-01.example.test")
|
||||
scheduled = self._snapshot(host, "20260519-021500Z__SCHED01", kind=SnapshotRecord.Kind.SCHEDULED)
|
||||
manual = self._snapshot(host, "20260519-031500Z__MANUAL1", kind=SnapshotRecord.Kind.MANUAL)
|
||||
incomplete = self._snapshot(host, "20260519-041500Z__BROKEN1", kind=SnapshotRecord.Kind.INCOMPLETE)
|
||||
self._set_snapshot_storage(scheduled, allocated=100)
|
||||
self._set_snapshot_storage(manual, allocated=200)
|
||||
self._set_snapshot_storage(incomplete, allocated=300)
|
||||
with TemporaryDirectory() as tmp:
|
||||
incomplete_dir = Path(tmp) / host.host / ".incomplete" / "20260519-041500Z__BROKEN1"
|
||||
data_dir = incomplete_dir / "data"
|
||||
data_dir.mkdir(parents=True)
|
||||
data_dir.joinpath("partial-file").write_text("interrupted backup data\n", encoding="utf-8")
|
||||
expected_usage = tree_usage(data_dir)
|
||||
SnapshotRecord.objects.create(
|
||||
host=host,
|
||||
kind=SnapshotRecord.Kind.INCOMPLETE,
|
||||
dirname=incomplete_dir.name,
|
||||
path=str(incomplete_dir),
|
||||
status="failed",
|
||||
)
|
||||
|
||||
response = self.client.get(reverse("dashboard_hosts_live"))
|
||||
response = self.client.get(reverse("dashboard_hosts_live"))
|
||||
|
||||
self.assertEqual(response.status_code, 200)
|
||||
self.assertContains(response, "Scheduled data")
|
||||
@@ -314,8 +336,8 @@ class ViewTests(TestCase):
|
||||
self.assertContains(response, "Total data")
|
||||
self.assertContains(response, "100 bytes", html=True)
|
||||
self.assertContains(response, "200 bytes", html=True)
|
||||
self.assertContains(response, "300 bytes", html=True)
|
||||
self.assertContains(response, "600 bytes", html=True)
|
||||
self.assertContains(response, filesizeformat(expected_usage["allocated_size_bytes"]))
|
||||
self.assertContains(response, filesizeformat(300 + expected_usage["allocated_size_bytes"]))
|
||||
|
||||
def test_dashboard_host_cards_measure_incomplete_data_without_snapshot_metadata(self) -> None:
|
||||
self.client.force_login(self.staff_user)
|
||||
|
||||
Reference in New Issue
Block a user