(feature) Capture structured backup statistics

Parse rsync --stats output into structured run metrics for file counts,
transferred bytes, literal data, matched data, speedup, and estimated
link-dest savings.

Store collected stats on backup run results and successful snapshot metadata,
including snapshot data usage and backup-root capacity details for future
dashboard graphs and disk-full projections.

Render the collected metrics on run and snapshot detail pages, with tests
covering parsing, metadata persistence, and UI output.
This commit is contained in:
2026-05-19 22:25:04 +02:00
parent 728e5c740a
commit 6940dc55b7
9 changed files with 484 additions and 2 deletions

View File

@@ -54,6 +54,28 @@
</section>
{% endif %}
{% if stats %}
<section class="panel">
<h2>Stats</h2>
<div class="two-col">
<div class="stack">
<div><strong>Duration:</strong> {{ stats.duration_seconds|default:"" }}{% if stats.duration_seconds is not None %}s{% endif %}</div>
<div><strong>Files seen:</strong> {{ stats.rsync.files_total|default:"" }}</div>
<div><strong>Files transferred:</strong> {{ stats.rsync.files_transferred|default:"" }}</div>
<div><strong>Files created:</strong> {{ stats.rsync.files_created|default:"" }}</div>
<div><strong>Files deleted:</strong> {{ stats.rsync.files_deleted|default:"" }}</div>
</div>
<div class="stack">
<div><strong>Total file size:</strong> {{ stats.rsync.total_file_size_bytes|filesizeformat }}</div>
<div><strong>Transferred file size:</strong> {{ stats.rsync.total_transferred_file_size_bytes|filesizeformat }}</div>
<div><strong>Literal data:</strong> {{ stats.rsync.literal_data_bytes|filesizeformat }}</div>
<div><strong>Matched data:</strong> {{ stats.rsync.matched_data_bytes|filesizeformat }}</div>
<div><strong>Estimated link-dest saving:</strong> {{ stats.rsync.link_dest_estimated_savings_bytes|filesizeformat }}</div>
</div>
</div>
</section>
{% endif %}
<section class="panel">
<h2>Result</h2>
<pre>{{ result_json }}</pre>

View File

@@ -38,6 +38,28 @@
</section>
</div>
{% if stats %}
<section class="panel">
<h2>Stats</h2>
<div class="two-col">
<div class="stack">
<div><strong>Duration:</strong> {{ stats.duration_seconds|default:"" }}{% if stats.duration_seconds is not None %}s{% endif %}</div>
<div><strong>Files seen:</strong> {{ stats.rsync.files_total|default:"" }}</div>
<div><strong>Files transferred:</strong> {{ stats.rsync.files_transferred|default:"" }}</div>
<div><strong>Total file size:</strong> {{ stats.rsync.total_file_size_bytes|filesizeformat }}</div>
<div><strong>Estimated link-dest saving:</strong> {{ stats.rsync.link_dest_estimated_savings_bytes|filesizeformat }}</div>
</div>
<div class="stack">
<div><strong>Snapshot apparent size:</strong> {{ stats.storage.snapshot.apparent_size_bytes|filesizeformat }}</div>
<div><strong>Snapshot allocated size:</strong> {{ stats.storage.snapshot.allocated_size_bytes|filesizeformat }}</div>
<div><strong>Hardlinked files:</strong> {{ stats.storage.snapshot.hardlinked_files|default:"" }}</div>
<div><strong>Backup root used:</strong> {{ stats.storage.capacity.used_percent|default:"" }}%</div>
<div><strong>Backup root available:</strong> {{ stats.storage.capacity.available_bytes|filesizeformat }}</div>
</div>
</div>
</section>
{% endif %}
<section class="panel">
<h2>Backup Runs</h2>
<table>

View File

@@ -119,8 +119,16 @@ class RunScheduledConfigSourceTests(SimpleTestCase):
self.assertEqual(timeout_seconds, 900)
self.assertIn("--itemize-changes", command)
self.assertIn("--info=flist2,progress2,stats2", command)
self.assertIn("--stats", command)
log_path.parent.mkdir(parents=True, exist_ok=True)
log_path.write_text("run 42\n", encoding="utf-8")
log_path.write_text(
"Number of files: 42\n"
"Number of regular files transferred: 3\n"
"Total file size: 1,000 bytes\n"
"Literal data: 100 bytes\n"
"Matched data: 900 bytes\n",
encoding="utf-8",
)
return RsyncResult(exit_code=0, command=command)
with patch("pobsync.commands.run_scheduled.run_rsync", side_effect=fake_run_rsync):
@@ -135,6 +143,8 @@ class RunScheduledConfigSourceTests(SimpleTestCase):
self.assertTrue(result["ok"])
self.assertEqual(result["log"], "/tmp/pobsync-dryrun/web-01/run-42/rsync.log")
self.assertEqual(result["timeout_seconds"], 900)
self.assertEqual(result["stats"]["rsync"]["files_total"], 42)
self.assertEqual(result["stats"]["rsync"]["link_dest_estimated_savings_ratio"], 0.9)
def test_dry_run_does_not_duplicate_custom_output_args(self) -> None:
config_source = FakeConfigSource()
@@ -176,6 +186,7 @@ class RunScheduledConfigSourceTests(SimpleTestCase):
command = run_rsync.call_args.args[0]
self.assertTrue(result["ok"])
self.assertIn("--stats", command)
self.assertIn("--itemize-changes", command)
self.assertIn("--info=flist2,progress2,stats2", command)
self.assertTrue(result["verbose_output"])
@@ -195,10 +206,50 @@ class RunScheduledConfigSourceTests(SimpleTestCase):
command = run_rsync.call_args.args[0]
self.assertTrue(result["ok"])
self.assertIn("--stats", command)
self.assertNotIn("--itemize-changes", command)
self.assertNotIn("--info=flist2,progress2,stats2", command)
self.assertFalse(result["verbose_output"])
def test_successful_real_run_records_stats_in_result_and_metadata(self) -> None:
def fake_run_rsync(command, log_path, timeout_seconds, cancel_check=None):
log_path.parent.mkdir(parents=True, exist_ok=True)
log_path.write_text(
"Number of files: 10\n"
"Number of regular files transferred: 2\n"
"Total file size: 2,000 bytes\n"
"Total transferred file size: 500 bytes\n"
"Literal data: 500 bytes\n"
"Matched data: 1,500 bytes\n",
encoding="utf-8",
)
data_dir = log_path.parent.parent / "data"
data_dir.mkdir(parents=True, exist_ok=True)
(data_dir / "payload.txt").write_text("payload", encoding="utf-8")
return RsyncResult(exit_code=0, command=command)
with TemporaryDirectory() as tmp:
backup_root = Path(tmp) / "backups"
with patch("pobsync.commands.run_scheduled.run_rsync", side_effect=fake_run_rsync):
result = run_scheduled(
prefix=Path(tmp) / "home",
host="web-01",
dry_run=False,
config_source=FakeConfigSource(backup_root=str(backup_root)),
)
meta_path = Path(result["snapshot"]) / "meta" / "meta.yaml"
meta_text = meta_path.read_text(encoding="utf-8")
self.assertTrue(result["ok"])
self.assertEqual(result["stats"]["rsync"]["files_total"], 10)
self.assertEqual(result["stats"]["rsync"]["files_transferred"], 2)
self.assertEqual(result["stats"]["rsync"]["link_dest_estimated_savings_bytes"], 1500)
self.assertIn("snapshot", result["stats"]["storage"])
self.assertIn("capacity", result["stats"]["storage"])
self.assertIn("stats:", meta_text)
self.assertIn("files_total: 10", meta_text)
def test_dry_run_reports_cancelled_rsync(self) -> None:
def fake_run_rsync(command, log_path, timeout_seconds, cancel_check=None):
self.assertTrue(cancel_check())

View File

@@ -0,0 +1,60 @@
from __future__ import annotations
import os
from pathlib import Path
from tempfile import TemporaryDirectory
from django.test import SimpleTestCase
from pobsync.run_stats import parse_rsync_stats, tree_usage
class RunStatsTests(SimpleTestCase):
def test_parse_rsync_stats_extracts_counts_bytes_and_savings(self) -> None:
stats = parse_rsync_stats(
"""
Number of files: 1,234 (reg: 1,200, dir: 34)
Number of created files: 12 (reg: 10, dir: 2)
Number of deleted files: 3
Number of regular files transferred: 8
Total file size: 1.50M bytes
Total transferred file size: 24.00K bytes
Literal data: 24.00K bytes
Matched data: 976.00K bytes
File list size: 8.00K
Total bytes sent: 10.00K
Total bytes received: 2.00K
sent 10.00K bytes received 2.00K bytes 1.20K bytes/sec
total size is 1.50M speedup is 125.00
"""
)
self.assertEqual(stats["files_total"], 1234)
self.assertEqual(stats["files_created"], 12)
self.assertEqual(stats["files_deleted"], 3)
self.assertEqual(stats["files_transferred"], 8)
self.assertEqual(stats["total_file_size_bytes"], 1_500_000)
self.assertEqual(stats["total_transferred_file_size_bytes"], 24_000)
self.assertEqual(stats["literal_data_bytes"], 24_000)
self.assertEqual(stats["matched_data_bytes"], 976_000)
self.assertEqual(stats["bytes_sent_received"], 12_000)
self.assertEqual(stats["bytes_per_second"], 1_200)
self.assertEqual(stats["speedup"], 125.0)
self.assertEqual(stats["link_dest_estimated_savings_bytes"], 976_000)
self.assertEqual(stats["link_dest_estimated_savings_ratio"], 0.976)
def test_tree_usage_reports_hardlinked_files(self) -> None:
with TemporaryDirectory() as tmp:
root = Path(tmp)
source = root / "source"
linked = root / "linked"
source.write_bytes(b"abc")
os.link(source, linked)
stats = tree_usage(root)
self.assertEqual(stats["files"], 2)
self.assertEqual(stats["apparent_size_bytes"], 6)
self.assertEqual(stats["hardlinked_files"], 2)
self.assertEqual(stats["hardlinked_apparent_size_bytes"], 6)
self.assertEqual(stats["hardlink_apparent_ratio"], 1.0)

View File

@@ -753,6 +753,18 @@ class ViewTests(TestCase):
"prune_max_delete": 10,
"prune_protect_bases": False,
},
"stats": {
"duration_seconds": 12,
"rsync": {
"files_total": 10,
"files_transferred": 2,
"total_file_size_bytes": 2000,
"total_transferred_file_size_bytes": 500,
"literal_data_bytes": 500,
"matched_data_bytes": 1500,
"link_dest_estimated_savings_bytes": 1500,
},
},
},
)
@@ -766,6 +778,9 @@ class ViewTests(TestCase):
self.assertContains(response, "Requested Options")
self.assertContains(response, "Dry run:</strong> yes")
self.assertContains(response, "Verbose rsync output:</strong> yes")
self.assertContains(response, "Stats")
self.assertContains(response, "Files seen:</strong> 10")
self.assertContains(response, "Estimated link-dest saving")
self.assertContains(response, "&quot;ok&quot;: true")
self.assertContains(response, reverse("snapshot_detail", args=[snapshot.id]))
@@ -811,7 +826,30 @@ class ViewTests(TestCase):
self.client.force_login(self.staff_user)
host = HostConfig.objects.create(host="web-01", address="web-01.example.test")
base = self._snapshot(host, "20260518-021500Z__BASESNAP")
base.metadata = {"status": "success", "snapshot_id": "BASESNAP"}
base.metadata = {
"status": "success",
"snapshot_id": "BASESNAP",
"stats": {
"duration_seconds": 20,
"rsync": {
"files_total": 100,
"files_transferred": 4,
"total_file_size_bytes": 10_000,
"link_dest_estimated_savings_bytes": 7_000,
},
"storage": {
"snapshot": {
"apparent_size_bytes": 10_000,
"allocated_size_bytes": 3_000,
"hardlinked_files": 9,
},
"capacity": {
"used_percent": 30.5,
"available_bytes": 1_000_000,
},
},
},
}
base.save(update_fields=["metadata"])
child = self._snapshot(host, "20260519-021500Z__CHILDSNP")
child.base = base
@@ -824,6 +862,9 @@ class ViewTests(TestCase):
self.assertEqual(response.status_code, 200)
self.assertContains(response, base.dirname)
self.assertContains(response, "BASESNAP")
self.assertContains(response, "Stats")
self.assertContains(response, "Files seen:</strong> 100")
self.assertContains(response, "Hardlinked files:</strong> 9")
self.assertContains(response, child.dirname)
self.assertContains(response, f"Run {run.id}")
self.assertContains(response, reverse("run_detail", args=[run.id]))

View File

@@ -348,10 +348,12 @@ def queue_manual_backup(request, host: str):
@staff_member_required
def run_detail(request, run_id: int):
run = get_object_or_404(BackupRun.objects.select_related("host", "snapshot"), id=run_id)
run_stats = run.result.get("stats") if isinstance(run.result, dict) else {}
context = {
"run": run,
"can_cancel": run.status in {BackupRun.Status.QUEUED, BackupRun.Status.RUNNING},
"requested": run.result.get("requested") if isinstance(run.result, dict) else {},
"stats": run_stats if isinstance(run_stats, dict) else {},
"result_json": _pretty_json(run.result),
}
return render(request, "pobsync_backend/run_detail.html", context)
@@ -389,6 +391,7 @@ def snapshot_detail(request, snapshot_id: int):
)
context = {
"snapshot": snapshot,
"stats": snapshot.metadata.get("stats") if isinstance(snapshot.metadata, dict) else {},
"metadata_json": _pretty_json(snapshot.metadata),
"backup_runs": snapshot.backup_runs.select_related("host").order_by("-created_at"),
"derived_snapshots": snapshot.derived_snapshots.select_related("host").order_by("-started_at", "dirname"),