(bugfix) Mark retention failures as backup warnings
Add a warning status for BackupRun records so successful snapshots are not reported as failed when post-run SQL retention fails. Keep the prune error in the run result, link the successful snapshot, and let the management command complete with a warning instead of raising a backup failure. Include warning runs in backup trend summaries and add a regression test for successful backups with failed retention cleanup.
This commit is contained in:
@@ -111,7 +111,7 @@ def execute_backup_run(
|
||||
)
|
||||
except Exception as exc:
|
||||
result["prune"] = {"ok": False, "error": str(exc), "type": type(exc).__name__}
|
||||
run.status = BackupRun.Status.FAILED
|
||||
run.status = BackupRun.Status.WARNING
|
||||
run.result = result
|
||||
run.snapshot = snapshot_record
|
||||
run.save(
|
||||
@@ -125,7 +125,6 @@ def execute_backup_run(
|
||||
"result",
|
||||
],
|
||||
)
|
||||
raise
|
||||
|
||||
run.snapshot = snapshot_record
|
||||
run.result = result
|
||||
|
||||
@@ -60,5 +60,8 @@ class Command(BaseCommand):
|
||||
if run.status == BackupRun.Status.SUCCESS:
|
||||
self.stdout.write(self.style.SUCCESS(f"Backup completed for {host.host}."))
|
||||
return
|
||||
if run.status == BackupRun.Status.WARNING:
|
||||
self.stdout.write(self.style.WARNING(f"Backup completed with warnings for {host.host}; run id={run.id}"))
|
||||
return
|
||||
|
||||
raise CommandError(f"Backup failed for {host.host}; run id={run.id}")
|
||||
|
||||
@@ -0,0 +1,27 @@
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
("pobsync_backend", "0007_filesystem_ssh_credentials"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterField(
|
||||
model_name="backuprun",
|
||||
name="status",
|
||||
field=models.CharField(
|
||||
choices=[
|
||||
("queued", "Queued"),
|
||||
("running", "Running"),
|
||||
("success", "Success"),
|
||||
("warning", "Warning"),
|
||||
("failed", "Failed"),
|
||||
("cancelled", "Cancelled"),
|
||||
],
|
||||
default="queued",
|
||||
max_length=16,
|
||||
),
|
||||
),
|
||||
]
|
||||
@@ -105,6 +105,7 @@ class BackupRun(models.Model):
|
||||
QUEUED = "queued", "Queued"
|
||||
RUNNING = "running", "Running"
|
||||
SUCCESS = "success", "Success"
|
||||
WARNING = "warning", "Warning"
|
||||
FAILED = "failed", "Failed"
|
||||
CANCELLED = "cancelled", "Cancelled"
|
||||
|
||||
|
||||
@@ -13,7 +13,7 @@ from .models import BackupRun, GlobalConfig, HostConfig, SnapshotRecord
|
||||
def collect_dashboard_stats(*, hosts: Iterable[HostConfig], global_config: GlobalConfig | None) -> dict[str, Any]:
|
||||
runs = list(
|
||||
BackupRun.objects.select_related("host", "snapshot")
|
||||
.filter(status=BackupRun.Status.SUCCESS)
|
||||
.filter(status__in=_COMPLETED_BACKUP_STATUSES)
|
||||
.order_by("-started_at", "-created_at")[:100]
|
||||
)
|
||||
real_runs = [_run_summary(run) for run in runs if _is_real_run(run)]
|
||||
@@ -52,7 +52,7 @@ def collect_dashboard_stats(*, hosts: Iterable[HostConfig], global_config: Globa
|
||||
|
||||
|
||||
def collect_host_stats(*, host: HostConfig, limit: int = 8) -> dict[str, Any]:
|
||||
runs = list(host.runs.select_related("snapshot").filter(status=BackupRun.Status.SUCCESS).order_by("-started_at", "-created_at")[:50])
|
||||
runs = list(host.runs.select_related("snapshot").filter(status__in=_COMPLETED_BACKUP_STATUSES).order_by("-started_at", "-created_at")[:50])
|
||||
real_runs = [_run_summary(run) for run in runs if _is_real_run(run)]
|
||||
trend_runs = [run for run in real_runs if run["has_stats"]][:limit]
|
||||
latest_snapshot = host.snapshots.order_by("-started_at", "-discovered_at", "-id").first()
|
||||
@@ -198,3 +198,6 @@ def _int_at(data: dict[str, Any], *keys: str) -> int | None:
|
||||
if isinstance(value, float):
|
||||
return int(value)
|
||||
return None
|
||||
|
||||
|
||||
_COMPLETED_BACKUP_STATUSES = [BackupRun.Status.SUCCESS, BackupRun.Status.WARNING]
|
||||
|
||||
@@ -102,7 +102,7 @@ class RunBackupRecordsSnapshotTests(TestCase):
|
||||
self.assertEqual(run.status, BackupRun.Status.SUCCESS)
|
||||
self.assertEqual(run.result["prune"], {"ok": True, "source": "sql", "deleted": []})
|
||||
|
||||
def test_prune_failure_is_recorded_on_backup_run(self) -> None:
|
||||
def test_prune_failure_marks_backup_run_as_warning(self) -> None:
|
||||
with TemporaryDirectory() as tmp:
|
||||
backup_root = Path(tmp) / "backups"
|
||||
GlobalConfig.objects.create(name="default", backup_root=str(backup_root))
|
||||
@@ -128,19 +128,20 @@ class RunBackupRecordsSnapshotTests(TestCase):
|
||||
}
|
||||
retention_apply.side_effect = ConfigError("Deletion blocked by --max-delete=0")
|
||||
|
||||
with self.assertRaises(ConfigError):
|
||||
output = StringIO()
|
||||
call_command(
|
||||
"run_pobsync_backup",
|
||||
host.host,
|
||||
prefix=str(Path(tmp) / "home"),
|
||||
prune=True,
|
||||
prune_max_delete=0,
|
||||
stdout=StringIO(),
|
||||
stdout=output,
|
||||
)
|
||||
|
||||
run = BackupRun.objects.get()
|
||||
self.assertEqual(run.status, BackupRun.Status.FAILED)
|
||||
self.assertEqual(run.status, BackupRun.Status.WARNING)
|
||||
self.assertIsNotNone(run.snapshot)
|
||||
self.assertIn("completed with warnings", output.getvalue())
|
||||
self.assertEqual(run.result["prune"]["ok"], False)
|
||||
self.assertEqual(run.result["prune"]["type"], "ConfigError")
|
||||
self.assertEqual(run.result["prune"]["error"], "Deletion blocked by --max-delete=0")
|
||||
|
||||
Reference in New Issue
Block a user