(bugfix) Make snapshot pruning robust to archived permissions

Repair user permissions inside snapshot trees before deleting them so
retention prune and incomplete cleanup can remove directories copied with
restrictive rsync archive modes.

Add path validation for scheduled/manual snapshot deletes and cover
non-traversable nested directories in retention tests.
This commit is contained in:
2026-05-28 20:58:37 +02:00
parent f86c67aeee
commit 3893df4640
2 changed files with 141 additions and 7 deletions

View File

@@ -103,6 +103,7 @@ def run_sql_retention_apply(
raise ConfigError(f"Refusing to delete unsupported snapshot kind: {snap_kind!r}")
path = _snapshot_delete_path(path=Path(snap_path), dirname=dirname)
_validate_snapshot_delete_path(host=host, kind=snap_kind, path=path, dirname=dirname)
reason = str(item.get("reason") or "outside retention policy")
if not path.exists():
actions.append(f"skip missing {snap_kind}/{dirname}")
@@ -339,14 +340,55 @@ def _validate_incomplete_delete_path(*, host: str, path: Path, dirname: str) ->
raise ConfigError(f"Refusing to delete incomplete snapshot outside host backup root: {path}")
def _validate_snapshot_delete_path(*, host: str, kind: str, path: Path, dirname: str) -> None:
if kind not in {SnapshotRecord.Kind.SCHEDULED, SnapshotRecord.Kind.MANUAL}:
raise ConfigError(f"Refusing to delete unsupported snapshot kind: {kind!r}")
path_parts = path.parts
if path.name != dirname or kind not in path_parts or host not in path_parts:
raise ConfigError(f"Refusing to delete unexpected snapshot path: {path}")
kind_index = path_parts.index(kind)
if kind_index == 0 or path_parts[kind_index - 1] != host:
raise ConfigError(f"Refusing to delete snapshot outside host backup root: {path}")
def _remove_snapshot_tree(path: Path) -> None:
_make_directories_user_writable(path)
shutil.rmtree(path)
_make_snapshot_tree_user_removable(path)
shutil.rmtree(path, onexc=_retry_remove_with_user_permissions)
def _make_directories_user_writable(path: Path) -> None:
for directory in [path, *[child for child in path.rglob("*") if child.is_dir() and not child.is_symlink()]]:
mode = directory.stat().st_mode
if mode & stat.S_IWUSR:
def _make_snapshot_tree_user_removable(path: Path) -> None:
stack = [path]
while stack:
directory = stack.pop()
if directory.is_symlink():
continue
directory.chmod(mode | stat.S_IWUSR)
_make_path_user_removable(directory)
try:
children = list(directory.iterdir())
except OSError:
continue
for child in children:
if child.is_dir() and not child.is_symlink():
stack.append(child)
def _retry_remove_with_user_permissions(function: Any, path: str, excinfo: BaseException) -> None:
failed_path = Path(path)
_make_path_user_removable(failed_path)
function(path)
def _make_path_user_removable(path: Path) -> None:
try:
mode = path.stat().st_mode
except OSError:
return
wanted = stat.S_IRUSR | stat.S_IWUSR
if path.is_dir() and not path.is_symlink():
wanted |= stat.S_IXUSR
if mode & wanted == wanted:
return
try:
path.chmod(mode | wanted)
except OSError:
return

View File

@@ -154,6 +154,68 @@ class SqlRetentionTests(TestCase):
],
)
def test_apply_deletes_snapshot_with_non_traversable_nested_directory(self) -> None:
with TemporaryDirectory() as tmp:
prefix = Path(tmp) / "home"
host = HostConfig.objects.create(
host="web-01",
address="web-01.example.test",
retention_daily=0,
retention_weekly=0,
retention_monthly=0,
retention_yearly=0,
)
old_dir = Path(tmp) / "backups" / host.host / "scheduled" / "20260518-021500Z__OLD"
restricted_dir = old_dir / "data" / "var" / "lib" / "snapd" / "void"
restricted_dir.mkdir(parents=True)
restricted_dir.joinpath("state").write_text("preserved permissions\n")
restricted_dir.chmod(0)
new_dir = Path(tmp) / "backups" / host.host / "scheduled" / "20260519-021500Z__NEW"
new_dir.mkdir(parents=True)
old = self._snapshot(host, old_dir.name, path=str(old_dir))
self._snapshot(host, new_dir.name, path=str(new_dir))
result = run_sql_retention_apply(
prefix=prefix,
host=host.host,
kind="scheduled",
protect_bases=False,
yes=True,
max_delete=1,
acquire_lock=False,
)
self.assertFalse(old_dir.exists())
self.assertFalse(SnapshotRecord.objects.filter(pk=old.pk).exists())
self.assertEqual(result["deleted"][0]["dirname"], old.dirname)
def test_apply_rejects_scheduled_snapshot_path_outside_host_kind_directory(self) -> None:
host = HostConfig.objects.create(
host="web-01",
address="web-01.example.test",
retention_daily=0,
retention_weekly=0,
retention_monthly=0,
retention_yearly=0,
)
self._snapshot(
host,
"20260518-021500Z__OLD",
path="/backups/web-01/manual/20260518-021500Z__OLD",
)
self._snapshot(host, "20260519-021500Z__NEW")
with self.assertRaisesRegex(ConfigError, "unexpected snapshot path"):
run_sql_retention_apply(
prefix=Path("/tmp/pobsync-test"),
host=host.host,
kind="scheduled",
protect_bases=False,
yes=True,
max_delete=1,
acquire_lock=False,
)
def test_apply_respects_max_delete(self) -> None:
host = HostConfig.objects.create(
host="web-01",
@@ -213,6 +275,36 @@ class SqlRetentionTests(TestCase):
self.assertEqual(purged.action, PurgedSnapshot.Action.INCOMPLETE_CLEANUP)
self.assertEqual(purged.reason, "manual incomplete cleanup")
def test_incomplete_cleanup_deletes_non_traversable_nested_directory(self) -> None:
with TemporaryDirectory() as tmp:
prefix = Path(tmp) / "home"
host = HostConfig.objects.create(host="web-01", address="web-01.example.test")
incomplete_dir = Path(tmp) / "backups" / host.host / ".incomplete" / "20260519-031500Z__BROKEN01"
restricted_dir = incomplete_dir / "data" / "var" / "lib" / "snapd" / "void"
restricted_dir.mkdir(parents=True)
restricted_dir.joinpath("state").write_text("interrupted\n")
restricted_dir.chmod(0)
record = SnapshotRecord.objects.create(
host=host,
kind=SnapshotRecord.Kind.INCOMPLETE,
dirname=incomplete_dir.name,
path=str(incomplete_dir),
status="failed",
started_at=datetime(2026, 5, 19, 3, 15, tzinfo=timezone.utc),
)
result = run_incomplete_cleanup(
prefix=prefix,
host=host.host,
yes=True,
max_delete=1,
acquire_lock=False,
)
self.assertFalse(incomplete_dir.exists())
self.assertFalse(SnapshotRecord.objects.filter(pk=record.pk).exists())
self.assertEqual(result["deleted"][0]["dirname"], incomplete_dir.name)
def test_incomplete_cleanup_respects_max_delete(self) -> None:
host = HostConfig.objects.create(host="web-01", address="web-01.example.test")
SnapshotRecord.objects.create(