(release) Track worker heartbeat for running jobs

Record worker pid, host, claim time, and heartbeat metadata on running
backup jobs so operators can see which worker owns a run.

Refresh the heartbeat while rsync is active and reconcile stale running
runs when the worker heartbeat stops. Add a worker option to tune or
disable stale-run reconciliation.

Refs #11
This commit is contained in:
2026-05-21 03:16:38 +02:00
parent 404b7f7500
commit 4c8ed24561
6 changed files with 184 additions and 10 deletions

View File

@@ -1373,6 +1373,29 @@ class ViewTests(TestCase):
self.assertContains(response, "Cancel run")
self.assertContains(response, reverse("cancel_run", args=[run.id]))
def test_run_detail_renders_worker_execution_metadata(self) -> None:
self.client.force_login(self.staff_user)
host = HostConfig.objects.create(host="web-01", address="web-01.example.test")
run = BackupRun.objects.create(
host=host,
status=BackupRun.Status.RUNNING,
result={
"execution": {
"worker_host": "backup-01",
"worker_pid": 4242,
"heartbeat_at": "2026-05-21T10:30:00+00:00",
}
},
)
response = self.client.get(reverse("run_detail", args=[run.id]))
self.assertEqual(response.status_code, 200)
self.assertContains(response, "Worker:")
self.assertContains(response, "backup-01")
self.assertContains(response, "pid 4242")
self.assertContains(response, "Worker heartbeat:")
def test_cancel_run_marks_queued_run_cancelled(self) -> None:
self.client.force_login(self.staff_user)
host = HostConfig.objects.create(host="web-01", address="web-01.example.test")