(feature) Capture structured backup statistics
Parse rsync --stats output into structured run metrics for file counts, transferred bytes, literal data, matched data, speedup, and estimated link-dest savings. Store collected stats on backup run results and successful snapshot metadata, including snapshot data usage and backup-root capacity details for future dashboard graphs and disk-full projections. Render the collected metrics on run and snapshot detail pages, with tests covering parsing, metadata persistence, and UI output.
This commit is contained in:
@@ -8,6 +8,7 @@ from ..errors import ConfigError
|
||||
from ..lock import acquire_host_lock
|
||||
from ..paths import PobsyncPaths
|
||||
from ..rsync import build_rsync_command, build_ssh_command, run_rsync
|
||||
from ..run_stats import collect_storage_stats, read_rsync_stats
|
||||
from ..snapshot import (
|
||||
HostBackupDirs,
|
||||
extract_ts_and_id_from_dirname,
|
||||
@@ -71,6 +72,22 @@ def classify_rsync_failure(exit_code: int | None, log_tail: list[str]) -> dict[s
|
||||
}
|
||||
|
||||
|
||||
def _collect_run_stats(
|
||||
*,
|
||||
log_path: Path,
|
||||
backup_root: Path,
|
||||
duration_seconds: int | None = None,
|
||||
snapshot_dir: Path | None = None,
|
||||
) -> dict[str, Any]:
|
||||
stats: dict[str, Any] = {
|
||||
"rsync": read_rsync_stats(log_path),
|
||||
"storage": collect_storage_stats(backup_root=backup_root, snapshot_dir=snapshot_dir),
|
||||
}
|
||||
if duration_seconds is not None:
|
||||
stats["duration_seconds"] = int(duration_seconds)
|
||||
return stats
|
||||
|
||||
|
||||
def _host_backup_dirs(backup_root: str, host: str) -> HostBackupDirs:
|
||||
return HostBackupDirs(root=Path(backup_root) / host)
|
||||
|
||||
@@ -220,6 +237,10 @@ def run_scheduled(
|
||||
cancel_check=cancel_check,
|
||||
)
|
||||
log_tail = _read_log_tail(dryrun_log)
|
||||
stats = _collect_run_stats(
|
||||
log_path=dryrun_log,
|
||||
backup_root=Path(backup_root),
|
||||
)
|
||||
|
||||
response = {
|
||||
"ok": result.exit_code == 0,
|
||||
@@ -231,6 +252,7 @@ def run_scheduled(
|
||||
"timeout_seconds": effective_timeout_seconds,
|
||||
"verbose_output": True,
|
||||
"ssh_credential": cfg.get("ssh_credential"),
|
||||
"stats": stats,
|
||||
"rsync": {
|
||||
"exit_code": result.exit_code,
|
||||
"command": result.command,
|
||||
@@ -308,6 +330,11 @@ def run_scheduled(
|
||||
meta["duration_seconds"] = int((end_ts - ts).total_seconds())
|
||||
meta["rsync"]["exit_code"] = result.exit_code
|
||||
meta["status"] = "cancelled" if result.cancelled else ("success" if result.exit_code == 0 else "failed")
|
||||
meta["stats"] = _collect_run_stats(
|
||||
log_path=log_path,
|
||||
backup_root=Path(backup_root),
|
||||
duration_seconds=meta["duration_seconds"],
|
||||
)
|
||||
write_yaml_atomic(meta_path, meta)
|
||||
|
||||
if not log_path.exists():
|
||||
@@ -334,6 +361,7 @@ def run_scheduled(
|
||||
"log": str(log_path),
|
||||
"verbose_output": bool(verbose_output),
|
||||
"ssh_credential": cfg.get("ssh_credential"),
|
||||
"stats": meta["stats"],
|
||||
"rsync": {
|
||||
"exit_code": result.exit_code,
|
||||
"command": result.command,
|
||||
@@ -344,6 +372,15 @@ def run_scheduled(
|
||||
|
||||
final_dir = dirs.scheduled / snap_name
|
||||
incomplete_dir.rename(final_dir)
|
||||
final_log_path = final_dir / "meta" / "rsync.log"
|
||||
final_meta_path = final_dir / "meta" / "meta.yaml"
|
||||
meta["stats"] = _collect_run_stats(
|
||||
log_path=final_log_path,
|
||||
backup_root=Path(backup_root),
|
||||
duration_seconds=meta["duration_seconds"],
|
||||
snapshot_dir=final_dir / "data",
|
||||
)
|
||||
write_yaml_atomic(final_meta_path, meta)
|
||||
|
||||
prune_result = None
|
||||
if prune:
|
||||
@@ -368,5 +405,7 @@ def run_scheduled(
|
||||
"base": str(base_dir) if base_dir else None,
|
||||
"rsync": {"exit_code": result.exit_code},
|
||||
"verbose_output": bool(verbose_output),
|
||||
"duration_seconds": meta["duration_seconds"],
|
||||
"stats": meta["stats"],
|
||||
"prune": prune_result,
|
||||
}
|
||||
|
||||
@@ -48,6 +48,7 @@ def build_rsync_command(
|
||||
cmd: list[str] = [rsync_binary]
|
||||
|
||||
cmd.extend(list(rsync_args))
|
||||
_append_stats_arg(cmd)
|
||||
if dry_run or verbose_output:
|
||||
_append_default_verbose_output_args(cmd)
|
||||
|
||||
@@ -131,6 +132,11 @@ def _append_default_verbose_output_args(command: list[str]) -> None:
|
||||
command.append("--info=flist2,progress2,stats2")
|
||||
|
||||
|
||||
def _append_stats_arg(command: list[str]) -> None:
|
||||
if "--stats" not in command:
|
||||
command.append("--stats")
|
||||
|
||||
|
||||
def _has_itemize_arg(command: list[str]) -> bool:
|
||||
for arg in command:
|
||||
if arg == "--itemize-changes":
|
||||
|
||||
238
src/pobsync/run_stats.py
Normal file
238
src/pobsync/run_stats.py
Normal file
@@ -0,0 +1,238 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
|
||||
_COUNT_KEYS = {
|
||||
"Number of files": "files_total",
|
||||
"Number of created files": "files_created",
|
||||
"Number of deleted files": "files_deleted",
|
||||
"Number of regular files transferred": "files_transferred",
|
||||
}
|
||||
|
||||
_BYTE_KEYS = {
|
||||
"Total file size": "total_file_size_bytes",
|
||||
"Total transferred file size": "total_transferred_file_size_bytes",
|
||||
"Literal data": "literal_data_bytes",
|
||||
"Matched data": "matched_data_bytes",
|
||||
"File list size": "file_list_size_bytes",
|
||||
"Total bytes sent": "bytes_sent",
|
||||
"Total bytes received": "bytes_received",
|
||||
}
|
||||
|
||||
_SIZE_UNITS = {
|
||||
"": 1,
|
||||
"b": 1,
|
||||
"k": 1000,
|
||||
"kb": 1000,
|
||||
"m": 1000**2,
|
||||
"mb": 1000**2,
|
||||
"g": 1000**3,
|
||||
"gb": 1000**3,
|
||||
"t": 1000**4,
|
||||
"tb": 1000**4,
|
||||
"p": 1000**5,
|
||||
"pb": 1000**5,
|
||||
}
|
||||
|
||||
|
||||
def parse_rsync_stats(text: str) -> dict[str, Any]:
|
||||
stats: dict[str, Any] = {}
|
||||
for raw_line in text.splitlines():
|
||||
line = raw_line.strip()
|
||||
if not line:
|
||||
continue
|
||||
|
||||
_parse_colon_stat(line, stats)
|
||||
_parse_sent_received(line, stats)
|
||||
_parse_total_size_speedup(line, stats)
|
||||
|
||||
_add_derived_stats(stats)
|
||||
return stats
|
||||
|
||||
|
||||
def read_rsync_stats(log_path: Path) -> dict[str, Any]:
|
||||
try:
|
||||
text = log_path.read_text(encoding="utf-8", errors="replace")
|
||||
except OSError:
|
||||
return {}
|
||||
return parse_rsync_stats(text)
|
||||
|
||||
|
||||
def collect_storage_stats(*, backup_root: Path, snapshot_dir: Path | None = None) -> dict[str, Any]:
|
||||
stats: dict[str, Any] = {
|
||||
"backup_root": str(backup_root),
|
||||
}
|
||||
capacity = filesystem_capacity(backup_root)
|
||||
if capacity:
|
||||
stats["capacity"] = capacity
|
||||
|
||||
if snapshot_dir is not None:
|
||||
snapshot_usage = tree_usage(snapshot_dir)
|
||||
if snapshot_usage:
|
||||
stats["snapshot"] = {
|
||||
"path": str(snapshot_dir),
|
||||
**snapshot_usage,
|
||||
}
|
||||
|
||||
return stats
|
||||
|
||||
|
||||
def filesystem_capacity(path: Path) -> dict[str, Any]:
|
||||
try:
|
||||
stat = path.stat()
|
||||
statvfs = os.statvfs(path)
|
||||
except OSError:
|
||||
return {}
|
||||
|
||||
total = int(statvfs.f_frsize * statvfs.f_blocks)
|
||||
available = int(statvfs.f_frsize * statvfs.f_bavail)
|
||||
free = int(statvfs.f_frsize * statvfs.f_bfree)
|
||||
used = max(total - free, 0)
|
||||
return {
|
||||
"path": str(path),
|
||||
"total_bytes": total,
|
||||
"used_bytes": used,
|
||||
"free_bytes": free,
|
||||
"available_bytes": available,
|
||||
"used_percent": round((used / total) * 100, 2) if total else 0.0,
|
||||
"device": getattr(stat, "st_dev", None),
|
||||
}
|
||||
|
||||
|
||||
def tree_usage(path: Path) -> dict[str, Any]:
|
||||
apparent_size = 0
|
||||
allocated_size = 0
|
||||
files = 0
|
||||
directories = 0
|
||||
hardlinked_files = 0
|
||||
hardlinked_apparent_size = 0
|
||||
seen_allocated_inodes: set[tuple[int, int]] = set()
|
||||
|
||||
try:
|
||||
root_stat = path.lstat()
|
||||
except OSError:
|
||||
return {}
|
||||
|
||||
if path.is_file():
|
||||
files = 1
|
||||
apparent_size = root_stat.st_size
|
||||
allocated_size = int(getattr(root_stat, "st_blocks", 0) * 512)
|
||||
if root_stat.st_nlink > 1:
|
||||
hardlinked_files = 1
|
||||
hardlinked_apparent_size = root_stat.st_size
|
||||
else:
|
||||
for current_root, dirnames, filenames in path.walk():
|
||||
directories += len(dirnames)
|
||||
for filename in filenames:
|
||||
file_path = current_root / filename
|
||||
try:
|
||||
file_stat = file_path.lstat()
|
||||
except OSError:
|
||||
continue
|
||||
files += 1
|
||||
apparent_size += file_stat.st_size
|
||||
inode_key = (file_stat.st_dev, file_stat.st_ino)
|
||||
if inode_key not in seen_allocated_inodes:
|
||||
allocated_size += int(getattr(file_stat, "st_blocks", 0) * 512)
|
||||
seen_allocated_inodes.add(inode_key)
|
||||
if file_stat.st_nlink > 1:
|
||||
hardlinked_files += 1
|
||||
hardlinked_apparent_size += file_stat.st_size
|
||||
|
||||
return {
|
||||
"path": str(path),
|
||||
"apparent_size_bytes": int(apparent_size),
|
||||
"allocated_size_bytes": int(allocated_size),
|
||||
"files": files,
|
||||
"directories": directories,
|
||||
"hardlinked_files": hardlinked_files,
|
||||
"hardlinked_apparent_size_bytes": int(hardlinked_apparent_size),
|
||||
"hardlink_apparent_ratio": round(hardlinked_apparent_size / apparent_size, 4) if apparent_size else 0.0,
|
||||
}
|
||||
|
||||
|
||||
def _parse_colon_stat(line: str, stats: dict[str, Any]) -> None:
|
||||
if ":" not in line:
|
||||
return
|
||||
label, value = line.split(":", 1)
|
||||
label = label.strip()
|
||||
value = value.strip()
|
||||
|
||||
if label in _COUNT_KEYS:
|
||||
parsed = _parse_int_prefix(value)
|
||||
if parsed is not None:
|
||||
stats[_COUNT_KEYS[label]] = parsed
|
||||
elif label in _BYTE_KEYS:
|
||||
parsed = _parse_byte_value(value)
|
||||
if parsed is not None:
|
||||
stats[_BYTE_KEYS[label]] = parsed
|
||||
|
||||
|
||||
def _parse_sent_received(line: str, stats: dict[str, Any]) -> None:
|
||||
match = re.search(
|
||||
r"sent\s+(?P<sent>[\d,]+(?:\.\d+)?\s*[A-Za-z]*)\s+bytes\s+received\s+"
|
||||
r"(?P<received>[\d,]+(?:\.\d+)?\s*[A-Za-z]*)\s+bytes\s+"
|
||||
r"(?P<rate>[\d,]+(?:\.\d+)?\s*[A-Za-z]*)\s+bytes/sec",
|
||||
line,
|
||||
)
|
||||
if not match:
|
||||
return
|
||||
sent = _parse_byte_value(match.group("sent"))
|
||||
received = _parse_byte_value(match.group("received"))
|
||||
rate = _parse_byte_value(match.group("rate"))
|
||||
if sent is not None:
|
||||
stats["bytes_sent"] = sent
|
||||
if received is not None:
|
||||
stats["bytes_received"] = received
|
||||
if rate is not None:
|
||||
stats["bytes_per_second"] = rate
|
||||
|
||||
|
||||
def _parse_total_size_speedup(line: str, stats: dict[str, Any]) -> None:
|
||||
match = re.search(
|
||||
r"total size is\s+(?P<size>[\d,]+(?:\.\d+)?\s*[A-Za-z]*)\s+speedup is\s+(?P<speedup>[\d,]+(?:\.\d+)?)",
|
||||
line,
|
||||
)
|
||||
if not match:
|
||||
return
|
||||
total_size = _parse_byte_value(match.group("size"))
|
||||
if total_size is not None:
|
||||
stats["total_file_size_bytes"] = total_size
|
||||
stats["speedup"] = float(match.group("speedup").replace(",", ""))
|
||||
|
||||
|
||||
def _add_derived_stats(stats: dict[str, Any]) -> None:
|
||||
sent = stats.get("bytes_sent")
|
||||
received = stats.get("bytes_received")
|
||||
if isinstance(sent, int) and isinstance(received, int):
|
||||
stats["bytes_sent_received"] = sent + received
|
||||
|
||||
literal = stats.get("literal_data_bytes")
|
||||
matched = stats.get("matched_data_bytes")
|
||||
if isinstance(literal, int) and isinstance(matched, int):
|
||||
basis_total = literal + matched
|
||||
stats["link_dest_estimated_savings_bytes"] = matched
|
||||
stats["link_dest_estimated_savings_ratio"] = round(matched / basis_total, 4) if basis_total else 0.0
|
||||
|
||||
|
||||
def _parse_int_prefix(value: str) -> int | None:
|
||||
match = re.match(r"([\d,]+)", value)
|
||||
if not match:
|
||||
return None
|
||||
return int(match.group(1).replace(",", ""))
|
||||
|
||||
|
||||
def _parse_byte_value(value: str) -> int | None:
|
||||
match = re.match(r"([\d,]+(?:\.\d+)?)\s*([A-Za-z]*)", value.strip())
|
||||
if not match:
|
||||
return None
|
||||
number = float(match.group(1).replace(",", ""))
|
||||
unit = match.group(2).lower()
|
||||
multiplier = _SIZE_UNITS.get(unit)
|
||||
if multiplier is None:
|
||||
return int(number)
|
||||
return int(number * multiplier)
|
||||
@@ -54,6 +54,28 @@
|
||||
</section>
|
||||
{% endif %}
|
||||
|
||||
{% if stats %}
|
||||
<section class="panel">
|
||||
<h2>Stats</h2>
|
||||
<div class="two-col">
|
||||
<div class="stack">
|
||||
<div><strong>Duration:</strong> {{ stats.duration_seconds|default:"" }}{% if stats.duration_seconds is not None %}s{% endif %}</div>
|
||||
<div><strong>Files seen:</strong> {{ stats.rsync.files_total|default:"" }}</div>
|
||||
<div><strong>Files transferred:</strong> {{ stats.rsync.files_transferred|default:"" }}</div>
|
||||
<div><strong>Files created:</strong> {{ stats.rsync.files_created|default:"" }}</div>
|
||||
<div><strong>Files deleted:</strong> {{ stats.rsync.files_deleted|default:"" }}</div>
|
||||
</div>
|
||||
<div class="stack">
|
||||
<div><strong>Total file size:</strong> {{ stats.rsync.total_file_size_bytes|filesizeformat }}</div>
|
||||
<div><strong>Transferred file size:</strong> {{ stats.rsync.total_transferred_file_size_bytes|filesizeformat }}</div>
|
||||
<div><strong>Literal data:</strong> {{ stats.rsync.literal_data_bytes|filesizeformat }}</div>
|
||||
<div><strong>Matched data:</strong> {{ stats.rsync.matched_data_bytes|filesizeformat }}</div>
|
||||
<div><strong>Estimated link-dest saving:</strong> {{ stats.rsync.link_dest_estimated_savings_bytes|filesizeformat }}</div>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
{% endif %}
|
||||
|
||||
<section class="panel">
|
||||
<h2>Result</h2>
|
||||
<pre>{{ result_json }}</pre>
|
||||
|
||||
@@ -38,6 +38,28 @@
|
||||
</section>
|
||||
</div>
|
||||
|
||||
{% if stats %}
|
||||
<section class="panel">
|
||||
<h2>Stats</h2>
|
||||
<div class="two-col">
|
||||
<div class="stack">
|
||||
<div><strong>Duration:</strong> {{ stats.duration_seconds|default:"" }}{% if stats.duration_seconds is not None %}s{% endif %}</div>
|
||||
<div><strong>Files seen:</strong> {{ stats.rsync.files_total|default:"" }}</div>
|
||||
<div><strong>Files transferred:</strong> {{ stats.rsync.files_transferred|default:"" }}</div>
|
||||
<div><strong>Total file size:</strong> {{ stats.rsync.total_file_size_bytes|filesizeformat }}</div>
|
||||
<div><strong>Estimated link-dest saving:</strong> {{ stats.rsync.link_dest_estimated_savings_bytes|filesizeformat }}</div>
|
||||
</div>
|
||||
<div class="stack">
|
||||
<div><strong>Snapshot apparent size:</strong> {{ stats.storage.snapshot.apparent_size_bytes|filesizeformat }}</div>
|
||||
<div><strong>Snapshot allocated size:</strong> {{ stats.storage.snapshot.allocated_size_bytes|filesizeformat }}</div>
|
||||
<div><strong>Hardlinked files:</strong> {{ stats.storage.snapshot.hardlinked_files|default:"" }}</div>
|
||||
<div><strong>Backup root used:</strong> {{ stats.storage.capacity.used_percent|default:"" }}%</div>
|
||||
<div><strong>Backup root available:</strong> {{ stats.storage.capacity.available_bytes|filesizeformat }}</div>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
{% endif %}
|
||||
|
||||
<section class="panel">
|
||||
<h2>Backup Runs</h2>
|
||||
<table>
|
||||
|
||||
@@ -119,8 +119,16 @@ class RunScheduledConfigSourceTests(SimpleTestCase):
|
||||
self.assertEqual(timeout_seconds, 900)
|
||||
self.assertIn("--itemize-changes", command)
|
||||
self.assertIn("--info=flist2,progress2,stats2", command)
|
||||
self.assertIn("--stats", command)
|
||||
log_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
log_path.write_text("run 42\n", encoding="utf-8")
|
||||
log_path.write_text(
|
||||
"Number of files: 42\n"
|
||||
"Number of regular files transferred: 3\n"
|
||||
"Total file size: 1,000 bytes\n"
|
||||
"Literal data: 100 bytes\n"
|
||||
"Matched data: 900 bytes\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
return RsyncResult(exit_code=0, command=command)
|
||||
|
||||
with patch("pobsync.commands.run_scheduled.run_rsync", side_effect=fake_run_rsync):
|
||||
@@ -135,6 +143,8 @@ class RunScheduledConfigSourceTests(SimpleTestCase):
|
||||
self.assertTrue(result["ok"])
|
||||
self.assertEqual(result["log"], "/tmp/pobsync-dryrun/web-01/run-42/rsync.log")
|
||||
self.assertEqual(result["timeout_seconds"], 900)
|
||||
self.assertEqual(result["stats"]["rsync"]["files_total"], 42)
|
||||
self.assertEqual(result["stats"]["rsync"]["link_dest_estimated_savings_ratio"], 0.9)
|
||||
|
||||
def test_dry_run_does_not_duplicate_custom_output_args(self) -> None:
|
||||
config_source = FakeConfigSource()
|
||||
@@ -176,6 +186,7 @@ class RunScheduledConfigSourceTests(SimpleTestCase):
|
||||
|
||||
command = run_rsync.call_args.args[0]
|
||||
self.assertTrue(result["ok"])
|
||||
self.assertIn("--stats", command)
|
||||
self.assertIn("--itemize-changes", command)
|
||||
self.assertIn("--info=flist2,progress2,stats2", command)
|
||||
self.assertTrue(result["verbose_output"])
|
||||
@@ -195,10 +206,50 @@ class RunScheduledConfigSourceTests(SimpleTestCase):
|
||||
|
||||
command = run_rsync.call_args.args[0]
|
||||
self.assertTrue(result["ok"])
|
||||
self.assertIn("--stats", command)
|
||||
self.assertNotIn("--itemize-changes", command)
|
||||
self.assertNotIn("--info=flist2,progress2,stats2", command)
|
||||
self.assertFalse(result["verbose_output"])
|
||||
|
||||
def test_successful_real_run_records_stats_in_result_and_metadata(self) -> None:
|
||||
def fake_run_rsync(command, log_path, timeout_seconds, cancel_check=None):
|
||||
log_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
log_path.write_text(
|
||||
"Number of files: 10\n"
|
||||
"Number of regular files transferred: 2\n"
|
||||
"Total file size: 2,000 bytes\n"
|
||||
"Total transferred file size: 500 bytes\n"
|
||||
"Literal data: 500 bytes\n"
|
||||
"Matched data: 1,500 bytes\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
data_dir = log_path.parent.parent / "data"
|
||||
data_dir.mkdir(parents=True, exist_ok=True)
|
||||
(data_dir / "payload.txt").write_text("payload", encoding="utf-8")
|
||||
return RsyncResult(exit_code=0, command=command)
|
||||
|
||||
with TemporaryDirectory() as tmp:
|
||||
backup_root = Path(tmp) / "backups"
|
||||
with patch("pobsync.commands.run_scheduled.run_rsync", side_effect=fake_run_rsync):
|
||||
result = run_scheduled(
|
||||
prefix=Path(tmp) / "home",
|
||||
host="web-01",
|
||||
dry_run=False,
|
||||
config_source=FakeConfigSource(backup_root=str(backup_root)),
|
||||
)
|
||||
|
||||
meta_path = Path(result["snapshot"]) / "meta" / "meta.yaml"
|
||||
meta_text = meta_path.read_text(encoding="utf-8")
|
||||
|
||||
self.assertTrue(result["ok"])
|
||||
self.assertEqual(result["stats"]["rsync"]["files_total"], 10)
|
||||
self.assertEqual(result["stats"]["rsync"]["files_transferred"], 2)
|
||||
self.assertEqual(result["stats"]["rsync"]["link_dest_estimated_savings_bytes"], 1500)
|
||||
self.assertIn("snapshot", result["stats"]["storage"])
|
||||
self.assertIn("capacity", result["stats"]["storage"])
|
||||
self.assertIn("stats:", meta_text)
|
||||
self.assertIn("files_total: 10", meta_text)
|
||||
|
||||
def test_dry_run_reports_cancelled_rsync(self) -> None:
|
||||
def fake_run_rsync(command, log_path, timeout_seconds, cancel_check=None):
|
||||
self.assertTrue(cancel_check())
|
||||
|
||||
60
src/pobsync_backend/tests/test_run_stats.py
Normal file
60
src/pobsync_backend/tests/test_run_stats.py
Normal file
@@ -0,0 +1,60 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
from tempfile import TemporaryDirectory
|
||||
|
||||
from django.test import SimpleTestCase
|
||||
|
||||
from pobsync.run_stats import parse_rsync_stats, tree_usage
|
||||
|
||||
|
||||
class RunStatsTests(SimpleTestCase):
|
||||
def test_parse_rsync_stats_extracts_counts_bytes_and_savings(self) -> None:
|
||||
stats = parse_rsync_stats(
|
||||
"""
|
||||
Number of files: 1,234 (reg: 1,200, dir: 34)
|
||||
Number of created files: 12 (reg: 10, dir: 2)
|
||||
Number of deleted files: 3
|
||||
Number of regular files transferred: 8
|
||||
Total file size: 1.50M bytes
|
||||
Total transferred file size: 24.00K bytes
|
||||
Literal data: 24.00K bytes
|
||||
Matched data: 976.00K bytes
|
||||
File list size: 8.00K
|
||||
Total bytes sent: 10.00K
|
||||
Total bytes received: 2.00K
|
||||
sent 10.00K bytes received 2.00K bytes 1.20K bytes/sec
|
||||
total size is 1.50M speedup is 125.00
|
||||
"""
|
||||
)
|
||||
|
||||
self.assertEqual(stats["files_total"], 1234)
|
||||
self.assertEqual(stats["files_created"], 12)
|
||||
self.assertEqual(stats["files_deleted"], 3)
|
||||
self.assertEqual(stats["files_transferred"], 8)
|
||||
self.assertEqual(stats["total_file_size_bytes"], 1_500_000)
|
||||
self.assertEqual(stats["total_transferred_file_size_bytes"], 24_000)
|
||||
self.assertEqual(stats["literal_data_bytes"], 24_000)
|
||||
self.assertEqual(stats["matched_data_bytes"], 976_000)
|
||||
self.assertEqual(stats["bytes_sent_received"], 12_000)
|
||||
self.assertEqual(stats["bytes_per_second"], 1_200)
|
||||
self.assertEqual(stats["speedup"], 125.0)
|
||||
self.assertEqual(stats["link_dest_estimated_savings_bytes"], 976_000)
|
||||
self.assertEqual(stats["link_dest_estimated_savings_ratio"], 0.976)
|
||||
|
||||
def test_tree_usage_reports_hardlinked_files(self) -> None:
|
||||
with TemporaryDirectory() as tmp:
|
||||
root = Path(tmp)
|
||||
source = root / "source"
|
||||
linked = root / "linked"
|
||||
source.write_bytes(b"abc")
|
||||
os.link(source, linked)
|
||||
|
||||
stats = tree_usage(root)
|
||||
|
||||
self.assertEqual(stats["files"], 2)
|
||||
self.assertEqual(stats["apparent_size_bytes"], 6)
|
||||
self.assertEqual(stats["hardlinked_files"], 2)
|
||||
self.assertEqual(stats["hardlinked_apparent_size_bytes"], 6)
|
||||
self.assertEqual(stats["hardlink_apparent_ratio"], 1.0)
|
||||
@@ -753,6 +753,18 @@ class ViewTests(TestCase):
|
||||
"prune_max_delete": 10,
|
||||
"prune_protect_bases": False,
|
||||
},
|
||||
"stats": {
|
||||
"duration_seconds": 12,
|
||||
"rsync": {
|
||||
"files_total": 10,
|
||||
"files_transferred": 2,
|
||||
"total_file_size_bytes": 2000,
|
||||
"total_transferred_file_size_bytes": 500,
|
||||
"literal_data_bytes": 500,
|
||||
"matched_data_bytes": 1500,
|
||||
"link_dest_estimated_savings_bytes": 1500,
|
||||
},
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
@@ -766,6 +778,9 @@ class ViewTests(TestCase):
|
||||
self.assertContains(response, "Requested Options")
|
||||
self.assertContains(response, "Dry run:</strong> yes")
|
||||
self.assertContains(response, "Verbose rsync output:</strong> yes")
|
||||
self.assertContains(response, "Stats")
|
||||
self.assertContains(response, "Files seen:</strong> 10")
|
||||
self.assertContains(response, "Estimated link-dest saving")
|
||||
self.assertContains(response, ""ok": true")
|
||||
self.assertContains(response, reverse("snapshot_detail", args=[snapshot.id]))
|
||||
|
||||
@@ -811,7 +826,30 @@ class ViewTests(TestCase):
|
||||
self.client.force_login(self.staff_user)
|
||||
host = HostConfig.objects.create(host="web-01", address="web-01.example.test")
|
||||
base = self._snapshot(host, "20260518-021500Z__BASESNAP")
|
||||
base.metadata = {"status": "success", "snapshot_id": "BASESNAP"}
|
||||
base.metadata = {
|
||||
"status": "success",
|
||||
"snapshot_id": "BASESNAP",
|
||||
"stats": {
|
||||
"duration_seconds": 20,
|
||||
"rsync": {
|
||||
"files_total": 100,
|
||||
"files_transferred": 4,
|
||||
"total_file_size_bytes": 10_000,
|
||||
"link_dest_estimated_savings_bytes": 7_000,
|
||||
},
|
||||
"storage": {
|
||||
"snapshot": {
|
||||
"apparent_size_bytes": 10_000,
|
||||
"allocated_size_bytes": 3_000,
|
||||
"hardlinked_files": 9,
|
||||
},
|
||||
"capacity": {
|
||||
"used_percent": 30.5,
|
||||
"available_bytes": 1_000_000,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
base.save(update_fields=["metadata"])
|
||||
child = self._snapshot(host, "20260519-021500Z__CHILDSNP")
|
||||
child.base = base
|
||||
@@ -824,6 +862,9 @@ class ViewTests(TestCase):
|
||||
self.assertEqual(response.status_code, 200)
|
||||
self.assertContains(response, base.dirname)
|
||||
self.assertContains(response, "BASESNAP")
|
||||
self.assertContains(response, "Stats")
|
||||
self.assertContains(response, "Files seen:</strong> 100")
|
||||
self.assertContains(response, "Hardlinked files:</strong> 9")
|
||||
self.assertContains(response, child.dirname)
|
||||
self.assertContains(response, f"Run {run.id}")
|
||||
self.assertContains(response, reverse("run_detail", args=[run.id]))
|
||||
|
||||
@@ -348,10 +348,12 @@ def queue_manual_backup(request, host: str):
|
||||
@staff_member_required
|
||||
def run_detail(request, run_id: int):
|
||||
run = get_object_or_404(BackupRun.objects.select_related("host", "snapshot"), id=run_id)
|
||||
run_stats = run.result.get("stats") if isinstance(run.result, dict) else {}
|
||||
context = {
|
||||
"run": run,
|
||||
"can_cancel": run.status in {BackupRun.Status.QUEUED, BackupRun.Status.RUNNING},
|
||||
"requested": run.result.get("requested") if isinstance(run.result, dict) else {},
|
||||
"stats": run_stats if isinstance(run_stats, dict) else {},
|
||||
"result_json": _pretty_json(run.result),
|
||||
}
|
||||
return render(request, "pobsync_backend/run_detail.html", context)
|
||||
@@ -389,6 +391,7 @@ def snapshot_detail(request, snapshot_id: int):
|
||||
)
|
||||
context = {
|
||||
"snapshot": snapshot,
|
||||
"stats": snapshot.metadata.get("stats") if isinstance(snapshot.metadata, dict) else {},
|
||||
"metadata_json": _pretty_json(snapshot.metadata),
|
||||
"backup_runs": snapshot.backup_runs.select_related("host").order_by("-created_at"),
|
||||
"derived_snapshots": snapshot.derived_snapshots.select_related("host").order_by("-started_at", "dirname"),
|
||||
|
||||
Reference in New Issue
Block a user