Limit snapshot storage scans recorded by backup workers so very large backup targets cannot make run finalization walk unbounded file trees. Limited scans now record scan_limited, entries_scanned, and max_entries in snapshot storage metadata. Closes #100
95 lines
3.5 KiB
Python
95 lines
3.5 KiB
Python
from __future__ import annotations
|
|
|
|
import os
|
|
from pathlib import Path
|
|
from tempfile import TemporaryDirectory
|
|
|
|
from django.test import SimpleTestCase
|
|
|
|
from pobsync.run_stats import collect_storage_stats, parse_rsync_stats, tree_usage
|
|
|
|
|
|
class RunStatsTests(SimpleTestCase):
|
|
def test_parse_rsync_stats_extracts_counts_bytes_and_savings(self) -> None:
|
|
stats = parse_rsync_stats(
|
|
"""
|
|
Number of files: 1,234 (reg: 1,200, dir: 34)
|
|
Number of created files: 12 (reg: 10, dir: 2)
|
|
Number of deleted files: 3
|
|
Number of regular files transferred: 8
|
|
Total file size: 1.50M bytes
|
|
Total transferred file size: 24.00K bytes
|
|
Literal data: 24.00K bytes
|
|
Matched data: 976.00K bytes
|
|
File list size: 8.00K
|
|
Total bytes sent: 10.00K
|
|
Total bytes received: 2.00K
|
|
sent 10.00K bytes received 2.00K bytes 1.20K bytes/sec
|
|
total size is 1.50M speedup is 125.00
|
|
"""
|
|
)
|
|
|
|
self.assertEqual(stats["files_total"], 1234)
|
|
self.assertEqual(stats["files_created"], 12)
|
|
self.assertEqual(stats["files_deleted"], 3)
|
|
self.assertEqual(stats["files_transferred"], 8)
|
|
self.assertEqual(stats["total_file_size_bytes"], 1_500_000)
|
|
self.assertEqual(stats["total_transferred_file_size_bytes"], 24_000)
|
|
self.assertEqual(stats["literal_data_bytes"], 24_000)
|
|
self.assertEqual(stats["matched_data_bytes"], 976_000)
|
|
self.assertEqual(stats["bytes_sent_received"], 12_000)
|
|
self.assertEqual(stats["bytes_per_second"], 1_200)
|
|
self.assertEqual(stats["speedup"], 125.0)
|
|
self.assertEqual(stats["link_dest_estimated_savings_bytes"], 976_000)
|
|
self.assertEqual(stats["link_dest_estimated_savings_ratio"], 0.976)
|
|
|
|
def test_tree_usage_reports_hardlinked_files(self) -> None:
|
|
with TemporaryDirectory() as tmp:
|
|
root = Path(tmp)
|
|
source = root / "source"
|
|
linked = root / "linked"
|
|
source.write_bytes(b"abc")
|
|
os.link(source, linked)
|
|
|
|
stats = tree_usage(root)
|
|
|
|
self.assertEqual(stats["files"], 2)
|
|
self.assertEqual(stats["apparent_size_bytes"], 6)
|
|
self.assertEqual(stats["hardlinked_files"], 2)
|
|
self.assertEqual(stats["hardlinked_apparent_size_bytes"], 6)
|
|
self.assertEqual(stats["hardlink_apparent_ratio"], 1.0)
|
|
self.assertFalse(stats["scan_limited"])
|
|
|
|
def test_tree_usage_can_limit_large_scans(self) -> None:
|
|
with TemporaryDirectory() as tmp:
|
|
root = Path(tmp)
|
|
for index in range(5):
|
|
(root / f"file-{index}").write_bytes(b"x")
|
|
|
|
stats = tree_usage(root, max_entries=2)
|
|
|
|
self.assertEqual(stats["files"], 2)
|
|
self.assertEqual(stats["entries_scanned"], 2)
|
|
self.assertEqual(stats["max_entries"], 2)
|
|
self.assertTrue(stats["scan_limited"])
|
|
self.assertEqual(stats["apparent_size_bytes"], 2)
|
|
|
|
def test_collect_storage_stats_marks_limited_snapshot_scan(self) -> None:
|
|
with TemporaryDirectory() as tmp:
|
|
root = Path(tmp)
|
|
snapshot = root / "snapshot"
|
|
snapshot.mkdir()
|
|
for index in range(4):
|
|
(snapshot / f"file-{index}").write_bytes(b"x")
|
|
|
|
stats = collect_storage_stats(
|
|
backup_root=root,
|
|
snapshot_dir=snapshot,
|
|
snapshot_max_entries=1,
|
|
)
|
|
|
|
self.assertEqual(stats["snapshot"]["files"], 1)
|
|
self.assertEqual(stats["snapshot"]["entries_scanned"], 1)
|
|
self.assertEqual(stats["snapshot"]["max_entries"], 1)
|
|
self.assertTrue(stats["snapshot"]["scan_limited"])
|