diff --git a/src/pobsync_backend/forms.py b/src/pobsync_backend/forms.py index b177a40..174ca00 100644 --- a/src/pobsync_backend/forms.py +++ b/src/pobsync_backend/forms.py @@ -274,6 +274,36 @@ class RetentionApplyForm(forms.Form): return value +class IncompleteCleanupForm(forms.Form): + max_delete = forms.IntegerField(min_value=0, initial=0) + confirm_delete_count = forms.IntegerField(min_value=0) + confirm_host = forms.CharField() + + def __init__(self, *args, host_name: str, expected_delete_count: int, **kwargs) -> None: + self.host_name = host_name + self.expected_delete_count = expected_delete_count + super().__init__(*args, **kwargs) + self.fields["confirm_host"].help_text = f"Type {host_name} to confirm incomplete snapshot cleanup." + self.fields["confirm_delete_count"].help_text = ( + f"Type {expected_delete_count} to confirm the current number of incomplete snapshots." + ) + self.fields["max_delete"].help_text = ( + f"Must be at least {expected_delete_count} for the incomplete snapshots shown here." + ) + + def clean_confirm_host(self) -> str: + value = self.cleaned_data["confirm_host"].strip() + if value != self.host_name: + raise forms.ValidationError(f"Type {self.host_name} to confirm.") + return value + + def clean_confirm_delete_count(self) -> int: + value = self.cleaned_data["confirm_delete_count"] + if value != self.expected_delete_count: + raise forms.ValidationError(f"Type {self.expected_delete_count} to confirm the incomplete count.") + return value + + class ScheduleConfigForm(forms.ModelForm): cron_expr = forms.CharField( label="Schedule expression", diff --git a/src/pobsync_backend/retention.py b/src/pobsync_backend/retention.py index d054078..d505683 100644 --- a/src/pobsync_backend/retention.py +++ b/src/pobsync_backend/retention.py @@ -131,6 +131,76 @@ def run_sql_retention_apply( return _do_apply() +def run_incomplete_cleanup( + *, + prefix: Path, + host: str, + yes: bool, + max_delete: int, + acquire_lock: bool = True, +) -> dict[str, Any]: + host = sanitize_host(host) + if not yes: + raise ConfigError("Refusing to delete incomplete snapshots without --yes") + if max_delete < 0: + raise ConfigError("--max-delete must be >= 0") + + paths = PobsyncPaths(home=prefix) + + def _do_cleanup() -> dict[str, Any]: + host_config = _enabled_host_config(host) + incomplete_list = [ + _snapshot_to_item(snapshot, reasons=["manual incomplete cleanup"]) + for snapshot in _incomplete_snapshots_for_host(host_config) + ] + if max_delete == 0 and len(incomplete_list) > 0: + raise ConfigError("Incomplete cleanup blocked by --max-delete=0") + if len(incomplete_list) > max_delete: + raise ConfigError( + f"Refusing to delete {len(incomplete_list)} incomplete snapshots (exceeds --max-delete={max_delete})" + ) + + actions: list[str] = [] + deleted: list[dict[str, Any]] = [] + + for item in incomplete_list: + dirname = item["dirname"] + snap_path = Path(item["path"]) + path = _snapshot_delete_path(path=snap_path, dirname=dirname) + _validate_incomplete_delete_path(host=host, path=path, dirname=dirname) + + if not path.exists(): + actions.append(f"skip missing incomplete/{dirname}") + elif not path.is_dir(): + raise ConfigError(f"Refusing to delete non-directory path: {path}") + else: + _remove_snapshot_tree(path) + actions.append(f"deleted incomplete {dirname}") + + SnapshotRecord.objects.filter( + host__host=host, + kind=SnapshotRecord.Kind.INCOMPLETE, + dirname=dirname, + ).delete() + deleted.append({"dirname": dirname, "kind": SnapshotRecord.Kind.INCOMPLETE, "path": str(path)}) + + return { + "ok": True, + "host": host, + "kind": SnapshotRecord.Kind.INCOMPLETE, + "max_delete": max_delete, + "source": "sql", + "planned_delete_count": len(incomplete_list), + "deleted": deleted, + "actions": actions, + } + + if acquire_lock: + with acquire_host_lock(paths.locks_dir, host, command="incomplete-cleanup"): + return _do_cleanup() + return _do_cleanup() + + def _enabled_host_config(host: str) -> HostConfig: try: return HostConfig.objects.get(host=host, enabled=True) @@ -212,6 +282,15 @@ def _snapshot_delete_path(*, path: Path, dirname: str) -> Path: return path +def _validate_incomplete_delete_path(*, host: str, path: Path, dirname: str) -> None: + path_parts = path.parts + if path.name != dirname or ".incomplete" not in path_parts or host not in path_parts: + raise ConfigError(f"Refusing to delete unexpected incomplete snapshot path: {path}") + incomplete_index = path_parts.index(".incomplete") + if incomplete_index == 0 or path_parts[incomplete_index - 1] != host: + raise ConfigError(f"Refusing to delete incomplete snapshot outside host backup root: {path}") + + def _remove_snapshot_tree(path: Path) -> None: _make_directories_user_writable(path) shutil.rmtree(path) diff --git a/src/pobsync_backend/templates/pobsync_backend/retention_plan.html b/src/pobsync_backend/templates/pobsync_backend/retention_plan.html index ccacb38..617b7dc 100644 --- a/src/pobsync_backend/templates/pobsync_backend/retention_plan.html +++ b/src/pobsync_backend/templates/pobsync_backend/retention_plan.html @@ -40,6 +40,10 @@ {{ plan.incomplete|length }} incomplete snapshot(s) exist for this host. Retention does not delete incomplete snapshots automatically because they can indicate an interrupted backup that should be inspected first.

+

+ After inspection, use the dedicated cleanup form below to delete only incomplete snapshot directories and their + SQL records. Successful scheduled and manual snapshots are not touched by this cleanup. +

{% endif %} @@ -190,6 +194,37 @@ {% endfor %} + +

Cleanup Incomplete Snapshots

+
+ {% csrf_token %} + {{ incomplete_cleanup_form.non_field_errors }} + +
+ {{ incomplete_cleanup_form.max_delete.errors }} + + {{ incomplete_cleanup_form.max_delete }} +
{{ incomplete_cleanup_form.max_delete.help_text }}
+
+ +
+ {{ incomplete_cleanup_form.confirm_host.errors }} + + {{ incomplete_cleanup_form.confirm_host }} +
{{ incomplete_cleanup_form.confirm_host.help_text }}
+
+ +
+ {{ incomplete_cleanup_form.confirm_delete_count.errors }} + + {{ incomplete_cleanup_form.confirm_delete_count }} +
{{ incomplete_cleanup_form.confirm_delete_count.help_text }}
+
+ +
+ +
+
{% endif %} {% endblock %} diff --git a/src/pobsync_backend/tests/test_sql_retention.py b/src/pobsync_backend/tests/test_sql_retention.py index bdb903f..9821338 100644 --- a/src/pobsync_backend/tests/test_sql_retention.py +++ b/src/pobsync_backend/tests/test_sql_retention.py @@ -12,7 +12,7 @@ from django.test import TestCase from pobsync.errors import ConfigError from pobsync_backend.models import HostConfig, SnapshotRecord -from pobsync_backend.retention import run_sql_retention_apply, run_sql_retention_plan +from pobsync_backend.retention import run_incomplete_cleanup, run_sql_retention_apply, run_sql_retention_plan class SqlRetentionTests(TestCase): @@ -152,6 +152,78 @@ class SqlRetentionTests(TestCase): acquire_lock=False, ) + def test_incomplete_cleanup_deletes_directory_and_record(self) -> None: + with TemporaryDirectory() as tmp: + prefix = Path(tmp) / "home" + host = HostConfig.objects.create(host="web-01", address="web-01.example.test") + incomplete_dir = Path(tmp) / "backups" / host.host / ".incomplete" / "20260519-031500Z__BROKEN01" + incomplete_dir.mkdir(parents=True) + incomplete_dir.joinpath("partial-file").write_text("interrupted\n") + record = SnapshotRecord.objects.create( + host=host, + kind=SnapshotRecord.Kind.INCOMPLETE, + dirname=incomplete_dir.name, + path=str(incomplete_dir), + status="failed", + started_at=datetime(2026, 5, 19, 3, 15, tzinfo=timezone.utc), + ) + + result = run_incomplete_cleanup( + prefix=prefix, + host=host.host, + yes=True, + max_delete=1, + acquire_lock=False, + ) + + self.assertFalse(incomplete_dir.exists()) + self.assertFalse(SnapshotRecord.objects.filter(pk=record.pk).exists()) + self.assertEqual( + result["deleted"], + [{"dirname": incomplete_dir.name, "kind": SnapshotRecord.Kind.INCOMPLETE, "path": str(incomplete_dir)}], + ) + self.assertEqual(result["planned_delete_count"], 1) + + def test_incomplete_cleanup_respects_max_delete(self) -> None: + host = HostConfig.objects.create(host="web-01", address="web-01.example.test") + SnapshotRecord.objects.create( + host=host, + kind=SnapshotRecord.Kind.INCOMPLETE, + dirname="20260519-031500Z__BROKEN01", + path=f"/backups/{host.host}/.incomplete/20260519-031500Z__BROKEN01", + status="failed", + started_at=datetime(2026, 5, 19, 3, 15, tzinfo=timezone.utc), + ) + + with self.assertRaisesRegex(ConfigError, "blocked by --max-delete=0"): + run_incomplete_cleanup( + prefix=Path("/tmp/pobsync-test"), + host=host.host, + yes=True, + max_delete=0, + acquire_lock=False, + ) + + def test_incomplete_cleanup_rejects_unexpected_path(self) -> None: + host = HostConfig.objects.create(host="web-01", address="web-01.example.test") + SnapshotRecord.objects.create( + host=host, + kind=SnapshotRecord.Kind.INCOMPLETE, + dirname="20260519-031500Z__BROKEN01", + path=f"/backups/{host.host}/scheduled/20260519-031500Z__BROKEN01", + status="failed", + started_at=datetime(2026, 5, 19, 3, 15, tzinfo=timezone.utc), + ) + + with self.assertRaisesRegex(ConfigError, "unexpected incomplete snapshot path"): + run_incomplete_cleanup( + prefix=Path("/tmp/pobsync-test"), + host=host.host, + yes=True, + max_delete=1, + acquire_lock=False, + ) + def test_management_command_plans_from_sql(self) -> None: host = HostConfig.objects.create( host="web-01", diff --git a/src/pobsync_backend/tests/test_views.py b/src/pobsync_backend/tests/test_views.py index 3e59f30..5dd0861 100644 --- a/src/pobsync_backend/tests/test_views.py +++ b/src/pobsync_backend/tests/test_views.py @@ -1620,6 +1620,68 @@ class ViewTests(TestCase): self.assertContains(response, "Incomplete Snapshots") self.assertContains(response, "20260519-031500Z__BROKEN01") self.assertContains(response, "excluded from retention cleanup") + self.assertContains(response, "Delete incomplete snapshots") + self.assertContains(response, "Type 1 to confirm the current number of incomplete snapshots.") + + def test_incomplete_cleanup_deletes_incomplete_snapshot_after_confirmation(self) -> None: + self.client.force_login(self.staff_user) + with TemporaryDirectory() as tmp: + home = Path(tmp) / "home" + host = HostConfig.objects.create(host="web-01", address="web-01.example.test") + incomplete_dir = Path(tmp) / "backups" / host.host / ".incomplete" / "20260519-031500Z__BROKEN01" + incomplete_dir.mkdir(parents=True) + incomplete_dir.joinpath("partial-file").write_text("interrupted\n") + record = SnapshotRecord.objects.create( + host=host, + kind=SnapshotRecord.Kind.INCOMPLETE, + dirname=incomplete_dir.name, + path=str(incomplete_dir), + status="failed", + started_at=datetime(2026, 5, 19, 3, 15, tzinfo=timezone.utc), + ) + + with override_settings(POBSYNC_HOME=str(home)): + response = self.client.post( + reverse("cleanup_host_incomplete_snapshots", args=[host.host]), + { + "max_delete": "1", + "confirm_host": host.host, + "confirm_delete_count": "1", + }, + follow=True, + ) + + self.assertFalse(incomplete_dir.exists()) + + self.assertRedirects(response, reverse("host_retention_plan", args=[host.host])) + self.assertContains(response, "Deleted 1 incomplete snapshot(s) for web-01.") + self.assertFalse(SnapshotRecord.objects.filter(pk=record.pk).exists()) + + def test_incomplete_cleanup_rejects_bad_confirmation(self) -> None: + self.client.force_login(self.staff_user) + host = HostConfig.objects.create(host="web-01", address="web-01.example.test") + SnapshotRecord.objects.create( + host=host, + kind=SnapshotRecord.Kind.INCOMPLETE, + dirname="20260519-031500Z__BROKEN01", + path=f"/backups/{host.host}/.incomplete/20260519-031500Z__BROKEN01", + status="failed", + started_at=datetime(2026, 5, 19, 3, 15, tzinfo=timezone.utc), + ) + + response = self.client.post( + reverse("cleanup_host_incomplete_snapshots", args=[host.host]), + { + "max_delete": "1", + "confirm_host": "wrong", + "confirm_delete_count": "1", + }, + follow=True, + ) + + self.assertRedirects(response, reverse("host_retention_plan", args=[host.host])) + self.assertContains(response, "Incomplete cleanup confirmation is invalid.") + self.assertEqual(SnapshotRecord.objects.filter(kind=SnapshotRecord.Kind.INCOMPLETE).count(), 1) def test_host_detail_surfaces_retention_warnings(self) -> None: self.client.force_login(self.staff_user) diff --git a/src/pobsync_backend/views.py b/src/pobsync_backend/views.py index 7561287..2e54e95 100644 --- a/src/pobsync_backend/views.py +++ b/src/pobsync_backend/views.py @@ -25,6 +25,7 @@ from .forms import ( CreateHostConfigForm, GlobalConfigForm, HostConfigForm, + IncompleteCleanupForm, ManualBackupForm, RetentionApplyForm, SshCredentialGenerateForm, @@ -34,7 +35,7 @@ from .forms import ( from .host_ops import ensure_host_directories from .models import BackupRun, GlobalConfig, HostConfig, ScheduleConfig, SnapshotRecord, SshCredential from .preflight import collect_backup_gate, effective_host_config_preview, run_remote_preflight -from .retention import run_sql_retention_apply, run_sql_retention_plan +from .retention import run_incomplete_cleanup, run_sql_retention_apply, run_sql_retention_plan from .self_check import collect_self_checks, summarize_self_checks from .scheduler import next_due_after from .snapshot_discovery import discover_snapshots, inspect_snapshot_discovery @@ -569,6 +570,7 @@ def host_retention_plan(request, host: str): schedule = _schedule_for_host(host_config) scheduled_prune_limit = schedule.prune_max_delete if schedule and schedule.prune else None delete_count = len(plan["delete"]) + incomplete_count = len(plan["incomplete"]) context = { "host": host_config, "kind": kind, @@ -587,6 +589,14 @@ def host_retention_plan(request, host: str): "confirm_delete_count": delete_count, }, ), + "incomplete_cleanup_form": IncompleteCleanupForm( + host_name=host_config.host, + expected_delete_count=incomplete_count, + initial={ + "max_delete": incomplete_count, + "confirm_delete_count": incomplete_count, + }, + ), } return render(request, "pobsync_backend/retention_plan.html", context) @@ -643,6 +653,40 @@ def apply_host_retention(request, host: str): return target +@staff_member_required +@require_POST +def cleanup_host_incomplete_snapshots(request, host: str): + host_config = get_object_or_404(HostConfig, host=host) + try: + plan = run_sql_retention_plan(host=host_config.host, kind="all", protect_bases=True) + except PobsyncError as exc: + messages.error(request, str(exc)) + return redirect("host_retention_plan", host=host_config.host) + + incomplete_count = len(plan.get("incomplete") or []) + form = IncompleteCleanupForm( + request.POST, + host_name=host_config.host, + expected_delete_count=incomplete_count, + ) + if not form.is_valid(): + messages.error(request, "Incomplete cleanup confirmation is invalid.") + return redirect("host_retention_plan", host=host_config.host) + + try: + result = run_incomplete_cleanup( + prefix=Path(settings.POBSYNC_HOME), + host=host_config.host, + yes=True, + max_delete=form.cleaned_data["max_delete"], + ) + except PobsyncError as exc: + messages.error(request, str(exc)) + else: + messages.success(request, f"Deleted {len(result['deleted'])} incomplete snapshot(s) for {host_config.host}.") + return redirect("host_retention_plan", host=host_config.host) + + @staff_member_required def edit_host_config(request, host: str): host_config = get_object_or_404(HostConfig, host=host) diff --git a/src/pobsync_server/urls.py b/src/pobsync_server/urls.py index 1973928..0fabd24 100644 --- a/src/pobsync_server/urls.py +++ b/src/pobsync_server/urls.py @@ -27,6 +27,11 @@ urlpatterns = [ path("hosts//discover-snapshots/", views.discover_host_snapshots, name="discover_host_snapshots"), path("hosts//retention-apply/", views.apply_host_retention, name="apply_host_retention"), path("hosts//retention-plan/", views.host_retention_plan, name="host_retention_plan"), + path( + "hosts//incomplete-cleanup/", + views.cleanup_host_incomplete_snapshots, + name="cleanup_host_incomplete_snapshots", + ), path("hosts//schedule/", views.edit_host_schedule, name="edit_host_schedule"), path("runs//", views.run_detail, name="run_detail"), path("runs//rsync-log/", views.run_rsync_log, name="run_rsync_log"),