(bugfix) Mark retention failures as backup warnings

Add a warning status for BackupRun records so successful snapshots are not
reported as failed when post-run SQL retention fails.

Keep the prune error in the run result, link the successful snapshot, and
let the management command complete with a warning instead of raising a
backup failure.

Include warning runs in backup trend summaries and add a regression test
for successful backups with failed retention cleanup.
This commit is contained in:
2026-05-19 23:20:52 +02:00
parent 1e04da9de8
commit 8bff241b12
6 changed files with 49 additions and 15 deletions

View File

@@ -111,7 +111,7 @@ def execute_backup_run(
)
except Exception as exc:
result["prune"] = {"ok": False, "error": str(exc), "type": type(exc).__name__}
run.status = BackupRun.Status.FAILED
run.status = BackupRun.Status.WARNING
run.result = result
run.snapshot = snapshot_record
run.save(
@@ -125,7 +125,6 @@ def execute_backup_run(
"result",
],
)
raise
run.snapshot = snapshot_record
run.result = result

View File

@@ -60,5 +60,8 @@ class Command(BaseCommand):
if run.status == BackupRun.Status.SUCCESS:
self.stdout.write(self.style.SUCCESS(f"Backup completed for {host.host}."))
return
if run.status == BackupRun.Status.WARNING:
self.stdout.write(self.style.WARNING(f"Backup completed with warnings for {host.host}; run id={run.id}"))
return
raise CommandError(f"Backup failed for {host.host}; run id={run.id}")

View File

@@ -0,0 +1,27 @@
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("pobsync_backend", "0007_filesystem_ssh_credentials"),
]
operations = [
migrations.AlterField(
model_name="backuprun",
name="status",
field=models.CharField(
choices=[
("queued", "Queued"),
("running", "Running"),
("success", "Success"),
("warning", "Warning"),
("failed", "Failed"),
("cancelled", "Cancelled"),
],
default="queued",
max_length=16,
),
),
]

View File

@@ -105,6 +105,7 @@ class BackupRun(models.Model):
QUEUED = "queued", "Queued"
RUNNING = "running", "Running"
SUCCESS = "success", "Success"
WARNING = "warning", "Warning"
FAILED = "failed", "Failed"
CANCELLED = "cancelled", "Cancelled"

View File

@@ -13,7 +13,7 @@ from .models import BackupRun, GlobalConfig, HostConfig, SnapshotRecord
def collect_dashboard_stats(*, hosts: Iterable[HostConfig], global_config: GlobalConfig | None) -> dict[str, Any]:
runs = list(
BackupRun.objects.select_related("host", "snapshot")
.filter(status=BackupRun.Status.SUCCESS)
.filter(status__in=_COMPLETED_BACKUP_STATUSES)
.order_by("-started_at", "-created_at")[:100]
)
real_runs = [_run_summary(run) for run in runs if _is_real_run(run)]
@@ -52,7 +52,7 @@ def collect_dashboard_stats(*, hosts: Iterable[HostConfig], global_config: Globa
def collect_host_stats(*, host: HostConfig, limit: int = 8) -> dict[str, Any]:
runs = list(host.runs.select_related("snapshot").filter(status=BackupRun.Status.SUCCESS).order_by("-started_at", "-created_at")[:50])
runs = list(host.runs.select_related("snapshot").filter(status__in=_COMPLETED_BACKUP_STATUSES).order_by("-started_at", "-created_at")[:50])
real_runs = [_run_summary(run) for run in runs if _is_real_run(run)]
trend_runs = [run for run in real_runs if run["has_stats"]][:limit]
latest_snapshot = host.snapshots.order_by("-started_at", "-discovered_at", "-id").first()
@@ -198,3 +198,6 @@ def _int_at(data: dict[str, Any], *keys: str) -> int | None:
if isinstance(value, float):
return int(value)
return None
_COMPLETED_BACKUP_STATUSES = [BackupRun.Status.SUCCESS, BackupRun.Status.WARNING]

View File

@@ -102,7 +102,7 @@ class RunBackupRecordsSnapshotTests(TestCase):
self.assertEqual(run.status, BackupRun.Status.SUCCESS)
self.assertEqual(run.result["prune"], {"ok": True, "source": "sql", "deleted": []})
def test_prune_failure_is_recorded_on_backup_run(self) -> None:
def test_prune_failure_marks_backup_run_as_warning(self) -> None:
with TemporaryDirectory() as tmp:
backup_root = Path(tmp) / "backups"
GlobalConfig.objects.create(name="default", backup_root=str(backup_root))
@@ -128,19 +128,20 @@ class RunBackupRecordsSnapshotTests(TestCase):
}
retention_apply.side_effect = ConfigError("Deletion blocked by --max-delete=0")
with self.assertRaises(ConfigError):
output = StringIO()
call_command(
"run_pobsync_backup",
host.host,
prefix=str(Path(tmp) / "home"),
prune=True,
prune_max_delete=0,
stdout=StringIO(),
stdout=output,
)
run = BackupRun.objects.get()
self.assertEqual(run.status, BackupRun.Status.FAILED)
self.assertEqual(run.status, BackupRun.Status.WARNING)
self.assertIsNotNone(run.snapshot)
self.assertIn("completed with warnings", output.getvalue())
self.assertEqual(run.result["prune"]["ok"], False)
self.assertEqual(run.result["prune"]["type"], "ConfigError")
self.assertEqual(run.result["prune"]["error"], "Deletion blocked by --max-delete=0")