From d0c23deb72213326ae5a7865d3327487d6113ea7 Mon Sep 17 00:00:00 2001
From: Peter van Arkel
Date: Thu, 21 May 2026 03:26:21 +0200
Subject: [PATCH] (release) Add explicit incomplete snapshot cleanup
Add a dedicated cleanup path for incomplete snapshots instead of letting
retention prune them implicitly. The retention plan now exposes a guarded
form that requires host and delete-count confirmation before removing
.incomplete snapshot directories and their SQL records.
Keep scheduled/manual retention behavior unchanged, add path safety checks,
and cover cleanup success, confirmation failures, max-delete limits, and
unexpected paths in tests.
Refs #10
---
src/pobsync_backend/forms.py | 30 +++++++
src/pobsync_backend/retention.py | 79 +++++++++++++++++++
.../pobsync_backend/retention_plan.html | 35 ++++++++
.../tests/test_sql_retention.py | 74 ++++++++++++++++-
src/pobsync_backend/tests/test_views.py | 62 +++++++++++++++
src/pobsync_backend/views.py | 46 ++++++++++-
src/pobsync_server/urls.py | 5 ++
7 files changed, 329 insertions(+), 2 deletions(-)
diff --git a/src/pobsync_backend/forms.py b/src/pobsync_backend/forms.py
index b177a40..174ca00 100644
--- a/src/pobsync_backend/forms.py
+++ b/src/pobsync_backend/forms.py
@@ -274,6 +274,36 @@ class RetentionApplyForm(forms.Form):
return value
+class IncompleteCleanupForm(forms.Form):
+ max_delete = forms.IntegerField(min_value=0, initial=0)
+ confirm_delete_count = forms.IntegerField(min_value=0)
+ confirm_host = forms.CharField()
+
+ def __init__(self, *args, host_name: str, expected_delete_count: int, **kwargs) -> None:
+ self.host_name = host_name
+ self.expected_delete_count = expected_delete_count
+ super().__init__(*args, **kwargs)
+ self.fields["confirm_host"].help_text = f"Type {host_name} to confirm incomplete snapshot cleanup."
+ self.fields["confirm_delete_count"].help_text = (
+ f"Type {expected_delete_count} to confirm the current number of incomplete snapshots."
+ )
+ self.fields["max_delete"].help_text = (
+ f"Must be at least {expected_delete_count} for the incomplete snapshots shown here."
+ )
+
+ def clean_confirm_host(self) -> str:
+ value = self.cleaned_data["confirm_host"].strip()
+ if value != self.host_name:
+ raise forms.ValidationError(f"Type {self.host_name} to confirm.")
+ return value
+
+ def clean_confirm_delete_count(self) -> int:
+ value = self.cleaned_data["confirm_delete_count"]
+ if value != self.expected_delete_count:
+ raise forms.ValidationError(f"Type {self.expected_delete_count} to confirm the incomplete count.")
+ return value
+
+
class ScheduleConfigForm(forms.ModelForm):
cron_expr = forms.CharField(
label="Schedule expression",
diff --git a/src/pobsync_backend/retention.py b/src/pobsync_backend/retention.py
index d054078..d505683 100644
--- a/src/pobsync_backend/retention.py
+++ b/src/pobsync_backend/retention.py
@@ -131,6 +131,76 @@ def run_sql_retention_apply(
return _do_apply()
+def run_incomplete_cleanup(
+ *,
+ prefix: Path,
+ host: str,
+ yes: bool,
+ max_delete: int,
+ acquire_lock: bool = True,
+) -> dict[str, Any]:
+ host = sanitize_host(host)
+ if not yes:
+ raise ConfigError("Refusing to delete incomplete snapshots without --yes")
+ if max_delete < 0:
+ raise ConfigError("--max-delete must be >= 0")
+
+ paths = PobsyncPaths(home=prefix)
+
+ def _do_cleanup() -> dict[str, Any]:
+ host_config = _enabled_host_config(host)
+ incomplete_list = [
+ _snapshot_to_item(snapshot, reasons=["manual incomplete cleanup"])
+ for snapshot in _incomplete_snapshots_for_host(host_config)
+ ]
+ if max_delete == 0 and len(incomplete_list) > 0:
+ raise ConfigError("Incomplete cleanup blocked by --max-delete=0")
+ if len(incomplete_list) > max_delete:
+ raise ConfigError(
+ f"Refusing to delete {len(incomplete_list)} incomplete snapshots (exceeds --max-delete={max_delete})"
+ )
+
+ actions: list[str] = []
+ deleted: list[dict[str, Any]] = []
+
+ for item in incomplete_list:
+ dirname = item["dirname"]
+ snap_path = Path(item["path"])
+ path = _snapshot_delete_path(path=snap_path, dirname=dirname)
+ _validate_incomplete_delete_path(host=host, path=path, dirname=dirname)
+
+ if not path.exists():
+ actions.append(f"skip missing incomplete/{dirname}")
+ elif not path.is_dir():
+ raise ConfigError(f"Refusing to delete non-directory path: {path}")
+ else:
+ _remove_snapshot_tree(path)
+ actions.append(f"deleted incomplete {dirname}")
+
+ SnapshotRecord.objects.filter(
+ host__host=host,
+ kind=SnapshotRecord.Kind.INCOMPLETE,
+ dirname=dirname,
+ ).delete()
+ deleted.append({"dirname": dirname, "kind": SnapshotRecord.Kind.INCOMPLETE, "path": str(path)})
+
+ return {
+ "ok": True,
+ "host": host,
+ "kind": SnapshotRecord.Kind.INCOMPLETE,
+ "max_delete": max_delete,
+ "source": "sql",
+ "planned_delete_count": len(incomplete_list),
+ "deleted": deleted,
+ "actions": actions,
+ }
+
+ if acquire_lock:
+ with acquire_host_lock(paths.locks_dir, host, command="incomplete-cleanup"):
+ return _do_cleanup()
+ return _do_cleanup()
+
+
def _enabled_host_config(host: str) -> HostConfig:
try:
return HostConfig.objects.get(host=host, enabled=True)
@@ -212,6 +282,15 @@ def _snapshot_delete_path(*, path: Path, dirname: str) -> Path:
return path
+def _validate_incomplete_delete_path(*, host: str, path: Path, dirname: str) -> None:
+ path_parts = path.parts
+ if path.name != dirname or ".incomplete" not in path_parts or host not in path_parts:
+ raise ConfigError(f"Refusing to delete unexpected incomplete snapshot path: {path}")
+ incomplete_index = path_parts.index(".incomplete")
+ if incomplete_index == 0 or path_parts[incomplete_index - 1] != host:
+ raise ConfigError(f"Refusing to delete incomplete snapshot outside host backup root: {path}")
+
+
def _remove_snapshot_tree(path: Path) -> None:
_make_directories_user_writable(path)
shutil.rmtree(path)
diff --git a/src/pobsync_backend/templates/pobsync_backend/retention_plan.html b/src/pobsync_backend/templates/pobsync_backend/retention_plan.html
index ccacb38..617b7dc 100644
--- a/src/pobsync_backend/templates/pobsync_backend/retention_plan.html
+++ b/src/pobsync_backend/templates/pobsync_backend/retention_plan.html
@@ -40,6 +40,10 @@
{{ plan.incomplete|length }} incomplete snapshot(s) exist for this host. Retention does not delete incomplete
snapshots automatically because they can indicate an interrupted backup that should be inspected first.
+
+ After inspection, use the dedicated cleanup form below to delete only incomplete snapshot directories and their
+ SQL records. Successful scheduled and manual snapshots are not touched by this cleanup.
+
{% endif %}
@@ -190,6 +194,37 @@
{% endfor %}
+
+ Cleanup Incomplete Snapshots
+
{% endif %}
{% endblock %}
diff --git a/src/pobsync_backend/tests/test_sql_retention.py b/src/pobsync_backend/tests/test_sql_retention.py
index bdb903f..9821338 100644
--- a/src/pobsync_backend/tests/test_sql_retention.py
+++ b/src/pobsync_backend/tests/test_sql_retention.py
@@ -12,7 +12,7 @@ from django.test import TestCase
from pobsync.errors import ConfigError
from pobsync_backend.models import HostConfig, SnapshotRecord
-from pobsync_backend.retention import run_sql_retention_apply, run_sql_retention_plan
+from pobsync_backend.retention import run_incomplete_cleanup, run_sql_retention_apply, run_sql_retention_plan
class SqlRetentionTests(TestCase):
@@ -152,6 +152,78 @@ class SqlRetentionTests(TestCase):
acquire_lock=False,
)
+ def test_incomplete_cleanup_deletes_directory_and_record(self) -> None:
+ with TemporaryDirectory() as tmp:
+ prefix = Path(tmp) / "home"
+ host = HostConfig.objects.create(host="web-01", address="web-01.example.test")
+ incomplete_dir = Path(tmp) / "backups" / host.host / ".incomplete" / "20260519-031500Z__BROKEN01"
+ incomplete_dir.mkdir(parents=True)
+ incomplete_dir.joinpath("partial-file").write_text("interrupted\n")
+ record = SnapshotRecord.objects.create(
+ host=host,
+ kind=SnapshotRecord.Kind.INCOMPLETE,
+ dirname=incomplete_dir.name,
+ path=str(incomplete_dir),
+ status="failed",
+ started_at=datetime(2026, 5, 19, 3, 15, tzinfo=timezone.utc),
+ )
+
+ result = run_incomplete_cleanup(
+ prefix=prefix,
+ host=host.host,
+ yes=True,
+ max_delete=1,
+ acquire_lock=False,
+ )
+
+ self.assertFalse(incomplete_dir.exists())
+ self.assertFalse(SnapshotRecord.objects.filter(pk=record.pk).exists())
+ self.assertEqual(
+ result["deleted"],
+ [{"dirname": incomplete_dir.name, "kind": SnapshotRecord.Kind.INCOMPLETE, "path": str(incomplete_dir)}],
+ )
+ self.assertEqual(result["planned_delete_count"], 1)
+
+ def test_incomplete_cleanup_respects_max_delete(self) -> None:
+ host = HostConfig.objects.create(host="web-01", address="web-01.example.test")
+ SnapshotRecord.objects.create(
+ host=host,
+ kind=SnapshotRecord.Kind.INCOMPLETE,
+ dirname="20260519-031500Z__BROKEN01",
+ path=f"/backups/{host.host}/.incomplete/20260519-031500Z__BROKEN01",
+ status="failed",
+ started_at=datetime(2026, 5, 19, 3, 15, tzinfo=timezone.utc),
+ )
+
+ with self.assertRaisesRegex(ConfigError, "blocked by --max-delete=0"):
+ run_incomplete_cleanup(
+ prefix=Path("/tmp/pobsync-test"),
+ host=host.host,
+ yes=True,
+ max_delete=0,
+ acquire_lock=False,
+ )
+
+ def test_incomplete_cleanup_rejects_unexpected_path(self) -> None:
+ host = HostConfig.objects.create(host="web-01", address="web-01.example.test")
+ SnapshotRecord.objects.create(
+ host=host,
+ kind=SnapshotRecord.Kind.INCOMPLETE,
+ dirname="20260519-031500Z__BROKEN01",
+ path=f"/backups/{host.host}/scheduled/20260519-031500Z__BROKEN01",
+ status="failed",
+ started_at=datetime(2026, 5, 19, 3, 15, tzinfo=timezone.utc),
+ )
+
+ with self.assertRaisesRegex(ConfigError, "unexpected incomplete snapshot path"):
+ run_incomplete_cleanup(
+ prefix=Path("/tmp/pobsync-test"),
+ host=host.host,
+ yes=True,
+ max_delete=1,
+ acquire_lock=False,
+ )
+
def test_management_command_plans_from_sql(self) -> None:
host = HostConfig.objects.create(
host="web-01",
diff --git a/src/pobsync_backend/tests/test_views.py b/src/pobsync_backend/tests/test_views.py
index 3e59f30..5dd0861 100644
--- a/src/pobsync_backend/tests/test_views.py
+++ b/src/pobsync_backend/tests/test_views.py
@@ -1620,6 +1620,68 @@ class ViewTests(TestCase):
self.assertContains(response, "Incomplete Snapshots")
self.assertContains(response, "20260519-031500Z__BROKEN01")
self.assertContains(response, "excluded from retention cleanup")
+ self.assertContains(response, "Delete incomplete snapshots")
+ self.assertContains(response, "Type 1 to confirm the current number of incomplete snapshots.")
+
+ def test_incomplete_cleanup_deletes_incomplete_snapshot_after_confirmation(self) -> None:
+ self.client.force_login(self.staff_user)
+ with TemporaryDirectory() as tmp:
+ home = Path(tmp) / "home"
+ host = HostConfig.objects.create(host="web-01", address="web-01.example.test")
+ incomplete_dir = Path(tmp) / "backups" / host.host / ".incomplete" / "20260519-031500Z__BROKEN01"
+ incomplete_dir.mkdir(parents=True)
+ incomplete_dir.joinpath("partial-file").write_text("interrupted\n")
+ record = SnapshotRecord.objects.create(
+ host=host,
+ kind=SnapshotRecord.Kind.INCOMPLETE,
+ dirname=incomplete_dir.name,
+ path=str(incomplete_dir),
+ status="failed",
+ started_at=datetime(2026, 5, 19, 3, 15, tzinfo=timezone.utc),
+ )
+
+ with override_settings(POBSYNC_HOME=str(home)):
+ response = self.client.post(
+ reverse("cleanup_host_incomplete_snapshots", args=[host.host]),
+ {
+ "max_delete": "1",
+ "confirm_host": host.host,
+ "confirm_delete_count": "1",
+ },
+ follow=True,
+ )
+
+ self.assertFalse(incomplete_dir.exists())
+
+ self.assertRedirects(response, reverse("host_retention_plan", args=[host.host]))
+ self.assertContains(response, "Deleted 1 incomplete snapshot(s) for web-01.")
+ self.assertFalse(SnapshotRecord.objects.filter(pk=record.pk).exists())
+
+ def test_incomplete_cleanup_rejects_bad_confirmation(self) -> None:
+ self.client.force_login(self.staff_user)
+ host = HostConfig.objects.create(host="web-01", address="web-01.example.test")
+ SnapshotRecord.objects.create(
+ host=host,
+ kind=SnapshotRecord.Kind.INCOMPLETE,
+ dirname="20260519-031500Z__BROKEN01",
+ path=f"/backups/{host.host}/.incomplete/20260519-031500Z__BROKEN01",
+ status="failed",
+ started_at=datetime(2026, 5, 19, 3, 15, tzinfo=timezone.utc),
+ )
+
+ response = self.client.post(
+ reverse("cleanup_host_incomplete_snapshots", args=[host.host]),
+ {
+ "max_delete": "1",
+ "confirm_host": "wrong",
+ "confirm_delete_count": "1",
+ },
+ follow=True,
+ )
+
+ self.assertRedirects(response, reverse("host_retention_plan", args=[host.host]))
+ self.assertContains(response, "Incomplete cleanup confirmation is invalid.")
+ self.assertEqual(SnapshotRecord.objects.filter(kind=SnapshotRecord.Kind.INCOMPLETE).count(), 1)
def test_host_detail_surfaces_retention_warnings(self) -> None:
self.client.force_login(self.staff_user)
diff --git a/src/pobsync_backend/views.py b/src/pobsync_backend/views.py
index 7561287..2e54e95 100644
--- a/src/pobsync_backend/views.py
+++ b/src/pobsync_backend/views.py
@@ -25,6 +25,7 @@ from .forms import (
CreateHostConfigForm,
GlobalConfigForm,
HostConfigForm,
+ IncompleteCleanupForm,
ManualBackupForm,
RetentionApplyForm,
SshCredentialGenerateForm,
@@ -34,7 +35,7 @@ from .forms import (
from .host_ops import ensure_host_directories
from .models import BackupRun, GlobalConfig, HostConfig, ScheduleConfig, SnapshotRecord, SshCredential
from .preflight import collect_backup_gate, effective_host_config_preview, run_remote_preflight
-from .retention import run_sql_retention_apply, run_sql_retention_plan
+from .retention import run_incomplete_cleanup, run_sql_retention_apply, run_sql_retention_plan
from .self_check import collect_self_checks, summarize_self_checks
from .scheduler import next_due_after
from .snapshot_discovery import discover_snapshots, inspect_snapshot_discovery
@@ -569,6 +570,7 @@ def host_retention_plan(request, host: str):
schedule = _schedule_for_host(host_config)
scheduled_prune_limit = schedule.prune_max_delete if schedule and schedule.prune else None
delete_count = len(plan["delete"])
+ incomplete_count = len(plan["incomplete"])
context = {
"host": host_config,
"kind": kind,
@@ -587,6 +589,14 @@ def host_retention_plan(request, host: str):
"confirm_delete_count": delete_count,
},
),
+ "incomplete_cleanup_form": IncompleteCleanupForm(
+ host_name=host_config.host,
+ expected_delete_count=incomplete_count,
+ initial={
+ "max_delete": incomplete_count,
+ "confirm_delete_count": incomplete_count,
+ },
+ ),
}
return render(request, "pobsync_backend/retention_plan.html", context)
@@ -643,6 +653,40 @@ def apply_host_retention(request, host: str):
return target
+@staff_member_required
+@require_POST
+def cleanup_host_incomplete_snapshots(request, host: str):
+ host_config = get_object_or_404(HostConfig, host=host)
+ try:
+ plan = run_sql_retention_plan(host=host_config.host, kind="all", protect_bases=True)
+ except PobsyncError as exc:
+ messages.error(request, str(exc))
+ return redirect("host_retention_plan", host=host_config.host)
+
+ incomplete_count = len(plan.get("incomplete") or [])
+ form = IncompleteCleanupForm(
+ request.POST,
+ host_name=host_config.host,
+ expected_delete_count=incomplete_count,
+ )
+ if not form.is_valid():
+ messages.error(request, "Incomplete cleanup confirmation is invalid.")
+ return redirect("host_retention_plan", host=host_config.host)
+
+ try:
+ result = run_incomplete_cleanup(
+ prefix=Path(settings.POBSYNC_HOME),
+ host=host_config.host,
+ yes=True,
+ max_delete=form.cleaned_data["max_delete"],
+ )
+ except PobsyncError as exc:
+ messages.error(request, str(exc))
+ else:
+ messages.success(request, f"Deleted {len(result['deleted'])} incomplete snapshot(s) for {host_config.host}.")
+ return redirect("host_retention_plan", host=host_config.host)
+
+
@staff_member_required
def edit_host_config(request, host: str):
host_config = get_object_or_404(HostConfig, host=host)
diff --git a/src/pobsync_server/urls.py b/src/pobsync_server/urls.py
index 1973928..0fabd24 100644
--- a/src/pobsync_server/urls.py
+++ b/src/pobsync_server/urls.py
@@ -27,6 +27,11 @@ urlpatterns = [
path("hosts//discover-snapshots/", views.discover_host_snapshots, name="discover_host_snapshots"),
path("hosts//retention-apply/", views.apply_host_retention, name="apply_host_retention"),
path("hosts//retention-plan/", views.host_retention_plan, name="host_retention_plan"),
+ path(
+ "hosts//incomplete-cleanup/",
+ views.cleanup_host_incomplete_snapshots,
+ name="cleanup_host_incomplete_snapshots",
+ ),
path("hosts//schedule/", views.edit_host_schedule, name="edit_host_schedule"),
path("runs//", views.run_detail, name="run_detail"),
path("runs//rsync-log/", views.run_rsync_log, name="run_rsync_log"),