(bugfix) Bound snapshot storage metadata scans #101
@@ -24,6 +24,7 @@ from ..util import ensure_dir, realpath_startswith, sanitize_host, write_yaml_at
|
|||||||
|
|
||||||
DEFAULT_DRY_RUN_TIMEOUT_SECONDS = 900
|
DEFAULT_DRY_RUN_TIMEOUT_SECONDS = 900
|
||||||
RSYNC_PARTIAL_VANISHED_EXIT_CODE = 24
|
RSYNC_PARTIAL_VANISHED_EXIT_CODE = 24
|
||||||
|
SNAPSHOT_STORAGE_SCAN_MAX_ENTRIES = 200_000
|
||||||
|
|
||||||
|
|
||||||
def dry_run_log_path(host: str, run_id: int | None = None) -> Path:
|
def dry_run_log_path(host: str, run_id: int | None = None) -> Path:
|
||||||
@@ -100,7 +101,11 @@ def _collect_run_stats(
|
|||||||
) -> dict[str, Any]:
|
) -> dict[str, Any]:
|
||||||
stats: dict[str, Any] = {
|
stats: dict[str, Any] = {
|
||||||
"rsync": read_rsync_stats(log_path),
|
"rsync": read_rsync_stats(log_path),
|
||||||
"storage": collect_storage_stats(backup_root=backup_root, snapshot_dir=snapshot_dir),
|
"storage": collect_storage_stats(
|
||||||
|
backup_root=backup_root,
|
||||||
|
snapshot_dir=snapshot_dir,
|
||||||
|
snapshot_max_entries=SNAPSHOT_STORAGE_SCAN_MAX_ENTRIES if snapshot_dir is not None else None,
|
||||||
|
),
|
||||||
}
|
}
|
||||||
if duration_seconds is not None:
|
if duration_seconds is not None:
|
||||||
stats["duration_seconds"] = int(duration_seconds)
|
stats["duration_seconds"] = int(duration_seconds)
|
||||||
|
|||||||
@@ -62,7 +62,12 @@ def read_rsync_stats(log_path: Path) -> dict[str, Any]:
|
|||||||
return parse_rsync_stats(text)
|
return parse_rsync_stats(text)
|
||||||
|
|
||||||
|
|
||||||
def collect_storage_stats(*, backup_root: Path, snapshot_dir: Path | None = None) -> dict[str, Any]:
|
def collect_storage_stats(
|
||||||
|
*,
|
||||||
|
backup_root: Path,
|
||||||
|
snapshot_dir: Path | None = None,
|
||||||
|
snapshot_max_entries: int | None = None,
|
||||||
|
) -> dict[str, Any]:
|
||||||
stats: dict[str, Any] = {
|
stats: dict[str, Any] = {
|
||||||
"backup_root": str(backup_root),
|
"backup_root": str(backup_root),
|
||||||
}
|
}
|
||||||
@@ -71,7 +76,7 @@ def collect_storage_stats(*, backup_root: Path, snapshot_dir: Path | None = None
|
|||||||
stats["capacity"] = capacity
|
stats["capacity"] = capacity
|
||||||
|
|
||||||
if snapshot_dir is not None:
|
if snapshot_dir is not None:
|
||||||
snapshot_usage = tree_usage(snapshot_dir)
|
snapshot_usage = tree_usage(snapshot_dir, max_entries=snapshot_max_entries)
|
||||||
if snapshot_usage:
|
if snapshot_usage:
|
||||||
stats["snapshot"] = {
|
stats["snapshot"] = {
|
||||||
"path": str(snapshot_dir),
|
"path": str(snapshot_dir),
|
||||||
@@ -103,13 +108,15 @@ def filesystem_capacity(path: Path) -> dict[str, Any]:
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def tree_usage(path: Path) -> dict[str, Any]:
|
def tree_usage(path: Path, *, max_entries: int | None = None) -> dict[str, Any]:
|
||||||
apparent_size = 0
|
apparent_size = 0
|
||||||
allocated_size = 0
|
allocated_size = 0
|
||||||
files = 0
|
files = 0
|
||||||
directories = 0
|
directories = 0
|
||||||
hardlinked_files = 0
|
hardlinked_files = 0
|
||||||
hardlinked_apparent_size = 0
|
hardlinked_apparent_size = 0
|
||||||
|
entries_scanned = 0
|
||||||
|
scan_limited = False
|
||||||
seen_allocated_inodes: set[tuple[int, int]] = set()
|
seen_allocated_inodes: set[tuple[int, int]] = set()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@@ -119,6 +126,7 @@ def tree_usage(path: Path) -> dict[str, Any]:
|
|||||||
|
|
||||||
if path.is_file():
|
if path.is_file():
|
||||||
files = 1
|
files = 1
|
||||||
|
entries_scanned = 1
|
||||||
apparent_size = root_stat.st_size
|
apparent_size = root_stat.st_size
|
||||||
allocated_size = int(getattr(root_stat, "st_blocks", 0) * 512)
|
allocated_size = int(getattr(root_stat, "st_blocks", 0) * 512)
|
||||||
if root_stat.st_nlink > 1:
|
if root_stat.st_nlink > 1:
|
||||||
@@ -126,14 +134,25 @@ def tree_usage(path: Path) -> dict[str, Any]:
|
|||||||
hardlinked_apparent_size = root_stat.st_size
|
hardlinked_apparent_size = root_stat.st_size
|
||||||
else:
|
else:
|
||||||
for current_root, dirnames, filenames in path.walk():
|
for current_root, dirnames, filenames in path.walk():
|
||||||
directories += len(dirnames)
|
for _dirname in dirnames:
|
||||||
|
if _scan_limit_reached(entries_scanned, max_entries):
|
||||||
|
scan_limited = True
|
||||||
|
break
|
||||||
|
directories += 1
|
||||||
|
entries_scanned += 1
|
||||||
|
if scan_limited:
|
||||||
|
break
|
||||||
for filename in filenames:
|
for filename in filenames:
|
||||||
|
if _scan_limit_reached(entries_scanned, max_entries):
|
||||||
|
scan_limited = True
|
||||||
|
break
|
||||||
file_path = current_root / filename
|
file_path = current_root / filename
|
||||||
try:
|
try:
|
||||||
file_stat = file_path.lstat()
|
file_stat = file_path.lstat()
|
||||||
except OSError:
|
except OSError:
|
||||||
continue
|
continue
|
||||||
files += 1
|
files += 1
|
||||||
|
entries_scanned += 1
|
||||||
apparent_size += file_stat.st_size
|
apparent_size += file_stat.st_size
|
||||||
inode_key = (file_stat.st_dev, file_stat.st_ino)
|
inode_key = (file_stat.st_dev, file_stat.st_ino)
|
||||||
if inode_key not in seen_allocated_inodes:
|
if inode_key not in seen_allocated_inodes:
|
||||||
@@ -142,6 +161,8 @@ def tree_usage(path: Path) -> dict[str, Any]:
|
|||||||
if file_stat.st_nlink > 1:
|
if file_stat.st_nlink > 1:
|
||||||
hardlinked_files += 1
|
hardlinked_files += 1
|
||||||
hardlinked_apparent_size += file_stat.st_size
|
hardlinked_apparent_size += file_stat.st_size
|
||||||
|
if scan_limited:
|
||||||
|
break
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"path": str(path),
|
"path": str(path),
|
||||||
@@ -149,12 +170,19 @@ def tree_usage(path: Path) -> dict[str, Any]:
|
|||||||
"allocated_size_bytes": int(allocated_size),
|
"allocated_size_bytes": int(allocated_size),
|
||||||
"files": files,
|
"files": files,
|
||||||
"directories": directories,
|
"directories": directories,
|
||||||
|
"entries_scanned": entries_scanned,
|
||||||
|
"scan_limited": scan_limited,
|
||||||
|
"max_entries": max_entries,
|
||||||
"hardlinked_files": hardlinked_files,
|
"hardlinked_files": hardlinked_files,
|
||||||
"hardlinked_apparent_size_bytes": int(hardlinked_apparent_size),
|
"hardlinked_apparent_size_bytes": int(hardlinked_apparent_size),
|
||||||
"hardlink_apparent_ratio": round(hardlinked_apparent_size / apparent_size, 4) if apparent_size else 0.0,
|
"hardlink_apparent_ratio": round(hardlinked_apparent_size / apparent_size, 4) if apparent_size else 0.0,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _scan_limit_reached(entries_scanned: int, max_entries: int | None) -> bool:
|
||||||
|
return max_entries is not None and max_entries >= 0 and entries_scanned >= max_entries
|
||||||
|
|
||||||
|
|
||||||
def _parse_colon_stat(line: str, stats: dict[str, Any]) -> None:
|
def _parse_colon_stat(line: str, stats: dict[str, Any]) -> None:
|
||||||
if ":" not in line:
|
if ":" not in line:
|
||||||
return
|
return
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ from unittest.mock import patch
|
|||||||
|
|
||||||
from django.test import SimpleTestCase
|
from django.test import SimpleTestCase
|
||||||
|
|
||||||
from pobsync.commands.run_scheduled import run_scheduled
|
from pobsync.commands.run_scheduled import SNAPSHOT_STORAGE_SCAN_MAX_ENTRIES, run_scheduled
|
||||||
from pobsync.errors import ConfigError
|
from pobsync.errors import ConfigError
|
||||||
from pobsync.rsync import RsyncResult
|
from pobsync.rsync import RsyncResult
|
||||||
|
|
||||||
@@ -270,9 +270,12 @@ class RunScheduledConfigSourceTests(SimpleTestCase):
|
|||||||
self.assertEqual(result["stats"]["rsync"]["files_transferred"], 2)
|
self.assertEqual(result["stats"]["rsync"]["files_transferred"], 2)
|
||||||
self.assertEqual(result["stats"]["rsync"]["link_dest_estimated_savings_bytes"], 1500)
|
self.assertEqual(result["stats"]["rsync"]["link_dest_estimated_savings_bytes"], 1500)
|
||||||
self.assertIn("snapshot", result["stats"]["storage"])
|
self.assertIn("snapshot", result["stats"]["storage"])
|
||||||
|
self.assertEqual(result["stats"]["storage"]["snapshot"]["max_entries"], SNAPSHOT_STORAGE_SCAN_MAX_ENTRIES)
|
||||||
|
self.assertFalse(result["stats"]["storage"]["snapshot"]["scan_limited"])
|
||||||
self.assertIn("capacity", result["stats"]["storage"])
|
self.assertIn("capacity", result["stats"]["storage"])
|
||||||
self.assertIn("stats:", meta_text)
|
self.assertIn("stats:", meta_text)
|
||||||
self.assertIn("files_total: 10", meta_text)
|
self.assertIn("files_total: 10", meta_text)
|
||||||
|
self.assertIn(f"max_entries: {SNAPSHOT_STORAGE_SCAN_MAX_ENTRIES}", meta_text)
|
||||||
|
|
||||||
def test_real_run_reports_running_state_callback_before_rsync_returns(self) -> None:
|
def test_real_run_reports_running_state_callback_before_rsync_returns(self) -> None:
|
||||||
states = []
|
states = []
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ from tempfile import TemporaryDirectory
|
|||||||
|
|
||||||
from django.test import SimpleTestCase
|
from django.test import SimpleTestCase
|
||||||
|
|
||||||
from pobsync.run_stats import parse_rsync_stats, tree_usage
|
from pobsync.run_stats import collect_storage_stats, parse_rsync_stats, tree_usage
|
||||||
|
|
||||||
|
|
||||||
class RunStatsTests(SimpleTestCase):
|
class RunStatsTests(SimpleTestCase):
|
||||||
@@ -58,3 +58,37 @@ total size is 1.50M speedup is 125.00
|
|||||||
self.assertEqual(stats["hardlinked_files"], 2)
|
self.assertEqual(stats["hardlinked_files"], 2)
|
||||||
self.assertEqual(stats["hardlinked_apparent_size_bytes"], 6)
|
self.assertEqual(stats["hardlinked_apparent_size_bytes"], 6)
|
||||||
self.assertEqual(stats["hardlink_apparent_ratio"], 1.0)
|
self.assertEqual(stats["hardlink_apparent_ratio"], 1.0)
|
||||||
|
self.assertFalse(stats["scan_limited"])
|
||||||
|
|
||||||
|
def test_tree_usage_can_limit_large_scans(self) -> None:
|
||||||
|
with TemporaryDirectory() as tmp:
|
||||||
|
root = Path(tmp)
|
||||||
|
for index in range(5):
|
||||||
|
(root / f"file-{index}").write_bytes(b"x")
|
||||||
|
|
||||||
|
stats = tree_usage(root, max_entries=2)
|
||||||
|
|
||||||
|
self.assertEqual(stats["files"], 2)
|
||||||
|
self.assertEqual(stats["entries_scanned"], 2)
|
||||||
|
self.assertEqual(stats["max_entries"], 2)
|
||||||
|
self.assertTrue(stats["scan_limited"])
|
||||||
|
self.assertEqual(stats["apparent_size_bytes"], 2)
|
||||||
|
|
||||||
|
def test_collect_storage_stats_marks_limited_snapshot_scan(self) -> None:
|
||||||
|
with TemporaryDirectory() as tmp:
|
||||||
|
root = Path(tmp)
|
||||||
|
snapshot = root / "snapshot"
|
||||||
|
snapshot.mkdir()
|
||||||
|
for index in range(4):
|
||||||
|
(snapshot / f"file-{index}").write_bytes(b"x")
|
||||||
|
|
||||||
|
stats = collect_storage_stats(
|
||||||
|
backup_root=root,
|
||||||
|
snapshot_dir=snapshot,
|
||||||
|
snapshot_max_entries=1,
|
||||||
|
)
|
||||||
|
|
||||||
|
self.assertEqual(stats["snapshot"]["files"], 1)
|
||||||
|
self.assertEqual(stats["snapshot"]["entries_scanned"], 1)
|
||||||
|
self.assertEqual(stats["snapshot"]["max_entries"], 1)
|
||||||
|
self.assertTrue(stats["snapshot"]["scan_limited"])
|
||||||
|
|||||||
Reference in New Issue
Block a user