Merge pull request '(feature) Add backup safety and preflight validation' (#13) from issue-3-backup-safety-preflight-validation into master

Reviewed-on: #13
This commit was merged in pull request #13.
This commit is contained in:
2026-05-21 00:58:23 +02:00
8 changed files with 701 additions and 36 deletions

View File

@@ -77,6 +77,14 @@ def _host_yaml_data(host_config: HostConfig) -> dict[str, Any]:
return validate_dict(data, HOST_SCHEMA, path="host") return validate_dict(data, HOST_SCHEMA, path="host")
def global_config_object_data(global_config: GlobalConfig) -> dict[str, Any]:
return _global_yaml_data(global_config)
def host_config_object_data(host_config: HostConfig) -> dict[str, Any]:
return _host_yaml_data(host_config)
def global_config_data(name: str = "default") -> dict[str, Any]: def global_config_data(name: str = "default") -> dict[str, Any]:
try: try:
global_config = GlobalConfig.objects.get(name=name) global_config = GlobalConfig.objects.get(name=name)

View File

@@ -0,0 +1,232 @@
from __future__ import annotations
import shlex
import subprocess
from dataclasses import dataclass
from typing import Any
from pobsync.config.merge import build_effective_config
from pobsync.rsync import build_ssh_command
from .config_repository import global_config_object_data, host_config_object_data
from .config_source import DjangoConfigSource
from .host_ops import collect_host_checks
from .models import GlobalConfig, HostConfig
from .self_check import SelfCheck
DRY_RUN_BLOCKING_CHECKS = {
"Host global config",
"Host address",
"Host SSH key file",
"Host effective source root",
"Host effective SSH user",
"Host effective SSH port",
"Host effective SSH credential",
"Host effective rsync recursion",
}
@dataclass(frozen=True)
class BackupGate:
state: str
message: str
checks: list[SelfCheck]
real_blockers: list[SelfCheck]
dry_run_blockers: list[SelfCheck]
warnings: list[SelfCheck]
@property
def can_queue_real(self) -> bool:
return not self.real_blockers
@property
def can_queue_dry_run(self) -> bool:
return not self.dry_run_blockers
def collect_backup_gate(host: HostConfig, global_config: GlobalConfig | None = None) -> BackupGate:
checks = collect_host_checks(host, global_config)
remote_preflight_check = _remote_preflight_self_check(host)
if remote_preflight_check is not None:
checks.append(remote_preflight_check)
real_blockers = [check for check in checks if check.status == "failed"]
dry_run_blockers = [check for check in real_blockers if check.name in DRY_RUN_BLOCKING_CHECKS]
warnings = [check for check in checks if check.status == "warning"]
if real_blockers:
state = "blocked"
message = "Real backups are blocked until failed host checks are resolved."
elif warnings:
state = "warning"
message = "Backups can run, but review the warnings first."
else:
state = "ready"
message = "This host is ready for backup runs."
return BackupGate(
state=state,
message=message,
checks=checks,
real_blockers=real_blockers,
dry_run_blockers=dry_run_blockers,
warnings=warnings,
)
def run_remote_preflight(host: HostConfig, *, timeout_seconds: int = 20) -> dict[str, Any]:
config = DjangoConfigSource().effective_config_for_host(host.host)
ssh_cfg = config.get("ssh", {}) or {}
rsync_cfg = config.get("rsync", {}) or {}
address = str(config.get("address") or host.address)
user = str(ssh_cfg.get("user") or "root")
source_root = str(config.get("source_root") or (config.get("defaults", {}) or {}).get("source_root") or "/")
rsync_binary = str(rsync_cfg.get("binary") or "rsync")
target = f"{user}@{address}"
ssh_cmd = build_ssh_command(ssh_cfg)
checks = [
_run_remote_check(
name="SSH reachability",
command=[*ssh_cmd, "-oBatchMode=yes", target, "true"],
timeout_seconds=timeout_seconds,
),
_run_remote_check(
name="Remote rsync",
command=[
*ssh_cmd,
"-oBatchMode=yes",
target,
"sh",
"-lc",
f"command -v {shlex.quote(rsync_binary)} >/dev/null",
],
timeout_seconds=timeout_seconds,
),
_run_remote_check(
name="Remote source root",
command=[
*ssh_cmd,
"-oBatchMode=yes",
target,
"sh",
"-lc",
f"test -e {shlex.quote(source_root)} && test -r {shlex.quote(source_root)}",
],
timeout_seconds=timeout_seconds,
),
]
result = {
"ok": all(check["ok"] for check in checks),
"checks": checks,
"target": target,
"source_root": source_root,
"rsync_binary": rsync_binary,
"timeout_seconds": timeout_seconds,
}
host.config = {**(host.config or {}), "last_preflight": result}
host.save(update_fields=["config", "updated_at"])
return result
def effective_host_config_preview(host: HostConfig, global_config: GlobalConfig) -> dict[str, Any]:
config = build_effective_config(global_config_object_data(global_config), host_config_object_data(host))
credential = host.ssh_credential or global_config.default_ssh_credential
ssh = config.get("ssh", {}) or {}
rsync = config.get("rsync", {}) or {}
retention = config.get("retention", {}) or {}
return {
"source_root": config.get("source_root", ""),
"destination_subdir": (config.get("defaults", {}) or {}).get("destination_subdir", ""),
"includes": list(config.get("includes") or []),
"excludes": list(config.get("excludes_effective") or []),
"ssh": {
"user": ssh.get("user", ""),
"port": ssh.get("port", ""),
"options": list(ssh.get("options") or []),
"credential": str(credential) if credential else "",
},
"rsync": {
"binary": rsync.get("binary", ""),
"args": list(rsync.get("args_effective") or []),
"timeout_seconds": rsync.get("timeout_seconds", 0),
"bwlimit_kbps": rsync.get("bwlimit_kbps", 0),
},
"retention": {
"daily": retention.get("daily", 0),
"weekly": retention.get("weekly", 0),
"monthly": retention.get("monthly", 0),
"yearly": retention.get("yearly", 0),
},
}
def _run_remote_check(*, name: str, command: list[str], timeout_seconds: int) -> dict[str, Any]:
try:
result = subprocess.run(
command,
check=False,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
timeout=timeout_seconds,
)
except subprocess.TimeoutExpired as exc:
return {
"name": name,
"ok": False,
"exit_code": 124,
"message": f"{name} timed out after {timeout_seconds}s.",
"detail": _clip_output((exc.stderr or exc.stdout or "").strip()),
}
except OSError as exc:
return {
"name": name,
"ok": False,
"exit_code": None,
"message": f"{name} could not start.",
"detail": str(exc),
}
return {
"name": name,
"ok": result.returncode == 0,
"exit_code": result.returncode,
"message": f"{name} passed." if result.returncode == 0 else f"{name} failed.",
"detail": _clip_output((result.stderr or result.stdout or "").strip()),
}
def _remote_preflight_self_check(host: HostConfig) -> SelfCheck | None:
preflight = (host.config or {}).get("last_preflight")
if not isinstance(preflight, dict):
return SelfCheck(
"Remote preflight",
"warning",
"No remote connection preflight has been run yet.",
"Run connection preflight before the first real backup.",
)
checks = preflight.get("checks")
if not isinstance(checks, list):
return SelfCheck("Remote preflight", "failed", "Stored remote preflight result is invalid.")
failed = [str(check.get("name", "unknown")) for check in checks if isinstance(check, dict) and not check.get("ok")]
if failed:
return SelfCheck(
"Remote preflight",
"failed",
"Remote connection preflight failed.",
", ".join(failed),
)
return SelfCheck(
"Remote preflight",
"ok",
"Remote connection preflight passed.",
f"{preflight.get('target', '')} {preflight.get('source_root', '')}".strip(),
)
def _clip_output(value: str, *, max_chars: int = 800) -> str:
if len(value) <= max_chars:
return value
return f"{value[:max_chars]}..."

View File

@@ -81,6 +81,7 @@
.status.success { color: var(--success); border-color: #a7d8b9; background: #edf8f1; } .status.success { color: var(--success); border-color: #a7d8b9; background: #edf8f1; }
.status.ok { color: var(--success); border-color: #a7d8b9; background: #edf8f1; } .status.ok { color: var(--success); border-color: #a7d8b9; background: #edf8f1; }
.status.failed { color: var(--failed); border-color: #e8b4b4; background: #fff0f0; } .status.failed { color: var(--failed); border-color: #e8b4b4; background: #fff0f0; }
.status.blocked { color: var(--failed); border-color: #e8b4b4; background: #fff0f0; }
.status.running { color: var(--running); border-color: #e7cf8a; background: #fff8df; } .status.running { color: var(--running); border-color: #e7cf8a; background: #fff8df; }
.status.warning { color: var(--running); border-color: #e7cf8a; background: #fff8df; } .status.warning { color: var(--running); border-color: #e7cf8a; background: #fff8df; }
.status.queued { color: var(--link); border-color: #b5cdea; background: #eef6ff; } .status.queued { color: var(--link); border-color: #b5cdea; background: #eef6ff; }

View File

@@ -21,6 +21,10 @@
{% csrf_token %} {% csrf_token %}
<button type="submit" class="secondary">Scan SSH host key</button> <button type="submit" class="secondary">Scan SSH host key</button>
</form> </form>
<form method="post" action="{% url 'run_host_preflight' host.host %}">
{% csrf_token %}
<button type="submit" class="secondary">Run connection preflight</button>
</form>
</section> </section>
<section class="grid" aria-label="Host summary"> <section class="grid" aria-label="Host summary">
@@ -64,6 +68,46 @@
</section> </section>
</div> </div>
{% if effective_config %}
<section class="panel">
<h2>Effective Config</h2>
<div class="two-col">
<div class="stack">
<div><strong>Source root:</strong> {{ effective_config.source_root }}</div>
<div><strong>Destination subdir:</strong> {{ effective_config.destination_subdir|default:"none" }}</div>
<div><strong>SSH:</strong> {{ effective_config.ssh.user }}@{{ host.address }}:{{ effective_config.ssh.port }}</div>
<div><strong>SSH key:</strong> {{ effective_config.ssh.credential|default:"none selected" }}</div>
<div><strong>SSH options:</strong> {{ effective_config.ssh.options|join:" " }}</div>
<div><strong>Rsync binary:</strong> {{ effective_config.rsync.binary }}</div>
<div><strong>Rsync args:</strong> {{ effective_config.rsync.args|join:" " }}</div>
<div><strong>Timeout:</strong> {{ effective_config.rsync.timeout_seconds }}s</div>
<div><strong>Bandwidth limit:</strong> {{ effective_config.rsync.bwlimit_kbps }} KB/s</div>
<div>
<strong>Retention:</strong>
d{{ effective_config.retention.daily }}
w{{ effective_config.retention.weekly }}
m{{ effective_config.retention.monthly }}
y{{ effective_config.retention.yearly }}
</div>
</div>
<div class="stack">
<div><strong>Includes:</strong> {{ effective_config.includes|length }}</div>
{% if effective_config.includes %}
<pre>{{ effective_config.includes|join:"&#10;" }}</pre>
{% else %}
<div class="muted">No include rules configured.</div>
{% endif %}
<div><strong>Excludes:</strong> {{ effective_config.excludes|length }}</div>
{% if effective_config.excludes %}
<pre>{{ effective_config.excludes|join:"&#10;" }}</pre>
{% else %}
<div class="muted">No exclude rules configured.</div>
{% endif %}
</div>
</div>
</section>
{% endif %}
<section class="panel"> <section class="panel">
<h2>Snapshot Discovery</h2> <h2>Snapshot Discovery</h2>
<div class="stack"> <div class="stack">
@@ -159,14 +203,47 @@
</table> </table>
</section> </section>
{% if last_preflight %}
<section class="panel">
<h2>Connection Preflight</h2>
<div class="stack spaced">
<div><strong>Status:</strong> <span class="status {% if last_preflight.ok %}ok{% else %}failed{% endif %}">{% if last_preflight.ok %}ok{% else %}failed{% endif %}</span></div>
<div><strong>Target:</strong> {{ last_preflight.target }}</div>
<div><strong>Source root:</strong> {{ last_preflight.source_root }}</div>
<div><strong>Remote rsync:</strong> {{ last_preflight.rsync_binary }}</div>
</div>
<table>
<thead>
<tr>
<th>Status</th>
<th>Check</th>
<th>Message</th>
<th>Detail</th>
</tr>
</thead>
<tbody>
{% for check in last_preflight.checks %}
<tr>
<td><span class="status {% if check.ok %}ok{% else %}failed{% endif %}">{% if check.ok %}ok{% else %}failed{% endif %}</span></td>
<td>{{ check.name }}</td>
<td>{{ check.message }}</td>
<td class="muted">{{ check.detail }}</td>
</tr>
{% endfor %}
</tbody>
</table>
</section>
{% endif %}
<section class="panel"> <section class="panel">
<h2>Backup Control</h2> <h2>Backup Control</h2>
<div class="operator-state"> <div class="operator-state">
{% if active_run %} {% if active_run %}
<span class="status {{ active_run.status }}">{{ active_run.status }}</span> <span class="status {{ active_run.status }}">{{ active_run.status }}</span>
<a href="{% url 'run_detail' active_run.id %}">Run {{ active_run.id }}</a> <a href="{% url 'run_detail' active_run.id %}">Run {{ active_run.id }}</a>
{% elif can_queue_backup %} {% elif has_global_config and host.enabled %}
<span class="status success">ready</span> <span class="status {{ backup_gate.state }}">{{ backup_gate.state }}</span>
<span class="muted">{{ backup_gate.message }}</span>
{% elif not host.enabled %} {% elif not host.enabled %}
<span class="status failed">disabled</span> <span class="status failed">disabled</span>
{% elif not has_global_config %} {% elif not has_global_config %}
@@ -180,20 +257,24 @@
<input type="hidden" name="dry_run" value="on"> <input type="hidden" name="dry_run" value="on">
<input type="hidden" name="verbose_output" value="on"> <input type="hidden" name="verbose_output" value="on">
<input type="hidden" name="prune_max_delete" value="10"> <input type="hidden" name="prune_max_delete" value="10">
<button type="submit" class="secondary" {% if not can_queue_backup %}disabled{% endif %}>Queue dry-run</button> <button type="submit" class="secondary" {% if not can_queue_dry_run %}disabled{% endif %}>Queue dry-run</button>
</form> </form>
<form class="inline-form" method="post" action="{% url 'queue_manual_backup' host.host %}"> <form class="inline-form" method="post" action="{% url 'queue_manual_backup' host.host %}">
{% csrf_token %} {% csrf_token %}
<input type="hidden" name="prune_max_delete" value="10"> <input type="hidden" name="prune_max_delete" value="10">
<button type="submit" {% if not can_queue_backup %}disabled{% endif %}>Queue backup</button> <button type="submit" {% if not can_queue_real_backup %}disabled{% endif %}>Queue backup</button>
</form> </form>
</section> </section>
{% if not can_queue_backup %} {% if active_run %}
<p class="muted">Wait for the active run to finish, or cancel it from the run detail page.</p>
{% elif not can_queue_dry_run or not can_queue_real_backup %}
{% if not has_global_config %} {% if not has_global_config %}
<p class="muted">Create the default global config before queueing backups.</p> <p class="muted">Create the default global config before queueing backups.</p>
{% elif not host.enabled %} {% elif not host.enabled %}
<p class="muted">Enable this host before queueing backups.</p> <p class="muted">Enable this host before queueing backups.</p>
{% elif backup_gate.real_blockers %}
<p class="muted">Real backups are blocked by failed preflight checks. Dry-runs may still be available when storage-only checks fail.</p>
{% endif %} {% endif %}
{% endif %} {% endif %}
@@ -212,7 +293,7 @@
{% endfor %} {% endfor %}
<div class="actions"> <div class="actions">
<button type="submit" {% if not can_queue_backup %}disabled{% endif %}>Queue with options</button> <button type="submit" {% if not can_queue_dry_run and not can_queue_real_backup %}disabled{% endif %}>Queue with options</button>
</div> </div>
</form> </form>
</section> </section>

View File

@@ -27,12 +27,51 @@
<h2>Failure</h2> <h2>Failure</h2>
<div class="stack"> <div class="stack">
<div><strong>Category:</strong> {{ failure.category|default:"unknown" }}</div> <div><strong>Category:</strong> {{ failure.category|default:"unknown" }}</div>
<div><strong>Summary:</strong> {{ failure.summary|default:"" }}</div> <div><strong>Summary:</strong> {{ failure_summary }}</div>
<div><strong>Hint:</strong> {{ failure.hint|default:"" }}</div> <div><strong>Hint:</strong> {{ failure.hint|default:"" }}</div>
</div> </div>
</section> </section>
{% endif %} {% endif %}
{% if dry_run_summary %}
<section class="panel highlight {{ dry_run_summary.highlight_class }}">
<h2>Dry Run Summary</h2>
<section class="grid" aria-label="Dry run summary">
<div class="metric"><div class="label">Status</div><div class="value">{{ dry_run_summary.status }}</div></div>
<div class="metric"><div class="label">Files Seen</div><div class="value">{{ dry_run_summary.files_seen|default:"unknown" }}</div></div>
<div class="metric"><div class="label">Would Transfer</div><div class="value">{{ dry_run_summary.files_would_transfer|default:"unknown" }}</div></div>
<div class="metric"><div class="label">Transfer Estimate</div><div class="value">{{ dry_run_summary.transfer_estimate_bytes|filesizeformat }}</div></div>
<div class="metric"><div class="label">Total Size</div><div class="value">{{ dry_run_summary.total_file_size_bytes|filesizeformat }}</div></div>
<div class="metric"><div class="label">Link-Dest Saving</div><div class="value">{{ dry_run_summary.link_dest_estimated_savings_bytes|filesizeformat }}</div></div>
</section>
<div class="stack">
{% if dry_run_summary.duration_seconds is not None %}
<div><strong>Duration:</strong> {{ dry_run_summary.duration_seconds }}s</div>
{% endif %}
<div>
<strong>Log:</strong>
{% if dry_run_summary.log_available %}
<a href="{% url 'run_rsync_log' run.id %}">Open full rsync log</a>
{% elif rsync_log_path %}
<span class="muted">{{ rsync_log_path }} (missing)</span>
{% else %}
<span class="muted">not recorded yet</span>
{% endif %}
</div>
{% if dry_run_summary.warnings %}
<div><strong>Warnings:</strong></div>
<ul>
{% for warning in dry_run_summary.warnings %}
<li>{{ warning }}</li>
{% endfor %}
</ul>
{% else %}
<div><strong>Warnings:</strong> none recorded</div>
{% endif %}
</div>
</section>
{% endif %}
<div class="two-col"> <div class="two-col">
<section class="panel"> <section class="panel">
<h2>Timing</h2> <h2>Timing</h2>

View File

@@ -557,18 +557,22 @@ class ViewTests(TestCase):
def test_host_detail_renders_config_schedule_runs_and_snapshots(self) -> None: def test_host_detail_renders_config_schedule_runs_and_snapshots(self) -> None:
self.client.force_login(self.staff_user) self.client.force_login(self.staff_user)
GlobalConfig.objects.create(name="default", backup_root="/backups") with TemporaryDirectory() as tmp:
host = HostConfig.objects.create( backup_root = Path(tmp)
host="web-01", GlobalConfig.objects.create(name="default", backup_root=str(backup_root), rsync_args=["--archive"])
address="web-01.example.test", host = HostConfig.objects.create(
source_root="/srv", host="web-01",
retention_daily=7, address="web-01.example.test",
) source_root="/srv",
ScheduleConfig.objects.create(host=host, cron_expr="15 2 * * *", prune=True, last_status="success") retention_daily=7,
snapshot = self._snapshot(host, "20260519-021500Z__ABCDEFGH") )
BackupRun.objects.create(host=host, status=BackupRun.Status.SUCCESS, snapshot=snapshot) for subdir in ("scheduled", "manual", ".incomplete"):
(backup_root / host.host / subdir).mkdir(parents=True)
ScheduleConfig.objects.create(host=host, cron_expr="15 2 * * *", prune=True, last_status="success")
snapshot = self._snapshot(host, "20260519-021500Z__ABCDEFGH")
BackupRun.objects.create(host=host, status=BackupRun.Status.SUCCESS, snapshot=snapshot)
response = self.client.get(reverse("host_detail", args=[host.host])) response = self.client.get(reverse("host_detail", args=[host.host]))
self.assertEqual(response.status_code, 200) self.assertEqual(response.status_code, 200)
self.assertContains(response, "web-01") self.assertContains(response, "web-01")
@@ -582,17 +586,122 @@ class ViewTests(TestCase):
self.assertContains(response, "Discover snapshots") self.assertContains(response, "Discover snapshots")
self.assertContains(response, "Edit schedule") self.assertContains(response, "Edit schedule")
self.assertContains(response, "Edit config") self.assertContains(response, "Edit config")
self.assertContains(response, "Run connection preflight")
self.assertContains(response, "Backup Control") self.assertContains(response, "Backup Control")
self.assertContains(response, "Queue dry-run") self.assertContains(response, "Queue dry-run")
self.assertContains(response, "Queue backup") self.assertContains(response, "Queue backup")
self.assertContains(response, "Host Check") self.assertContains(response, "Host Check")
self.assertContains(response, reverse("prepare_host_directories", args=[host.host])) self.assertContains(response, reverse("prepare_host_directories", args=[host.host]))
self.assertContains(response, "ready") self.assertContains(response, "warning")
self.assertContains(response, "Snapshot Discovery") self.assertContains(response, "Snapshot Discovery")
self.assertContains(response, reverse("queue_manual_backup", args=[host.host])) self.assertContains(response, reverse("queue_manual_backup", args=[host.host]))
self.assertContains(response, reverse("run_detail", args=[BackupRun.objects.get().id])) self.assertContains(response, reverse("run_detail", args=[BackupRun.objects.get().id]))
self.assertContains(response, reverse("snapshot_detail", args=[snapshot.id])) self.assertContains(response, reverse("snapshot_detail", args=[snapshot.id]))
def test_host_detail_renders_effective_config_preview(self) -> None:
self.client.force_login(self.staff_user)
credential = SshCredential.objects.create(name="default-key", key_path="/var/lib/pobsync/id_ed25519")
GlobalConfig.objects.create(
name="default",
backup_root="/backups",
default_ssh_credential=credential,
ssh_user="root",
ssh_port=2222,
ssh_options=["-oBatchMode=yes"],
rsync_args=["--archive", "--numeric-ids"],
rsync_extra_args=["--delete"],
rsync_timeout_seconds=300,
rsync_bwlimit_kbps=2048,
default_source_root="/",
excludes_default=["/proc/***", "/sys/***"],
retention_daily=14,
retention_weekly=4,
retention_monthly=2,
retention_yearly=1,
)
host = HostConfig.objects.create(
host="web-01",
address="web-01.example.test",
includes=["/srv/www/***"],
excludes_add=["/srv/www/cache/***"],
rsync_extra_args=["--one-file-system"],
)
response = self.client.get(reverse("host_detail", args=[host.host]))
self.assertEqual(response.status_code, 200)
self.assertContains(response, "Effective Config")
self.assertContains(response, "root@web-01.example.test:2222")
self.assertContains(response, "default-key")
self.assertContains(response, "-oBatchMode=yes")
self.assertContains(response, "--archive --numeric-ids --delete --one-file-system")
self.assertContains(response, "/srv/www/***")
self.assertContains(response, "/srv/www/cache/***")
self.assertContains(response, "d14")
self.assertContains(response, "w8")
def test_run_host_preflight_stores_remote_check_result(self) -> None:
self.client.force_login(self.staff_user)
GlobalConfig.objects.create(name="default", backup_root="/backups", rsync_args=["--archive"])
host = HostConfig.objects.create(host="web-01", address="web-01.example.test")
with patch(
"pobsync_backend.preflight.subprocess.run",
return_value=subprocess.CompletedProcess(args=["ssh"], returncode=0, stdout="", stderr=""),
) as run:
response = self.client.post(reverse("run_host_preflight", args=[host.host]), follow=True)
self.assertRedirects(response, reverse("host_detail", args=[host.host]))
self.assertContains(response, "Connection preflight passed for web-01.")
self.assertContains(response, "Connection Preflight")
self.assertContains(response, "SSH reachability")
self.assertContains(response, "Remote rsync")
self.assertContains(response, "Remote source root")
self.assertEqual(run.call_count, 3)
host.refresh_from_db()
self.assertTrue(host.config["last_preflight"]["ok"])
self.assertEqual(host.config["last_preflight"]["target"], "root@web-01.example.test")
def test_queue_manual_backup_blocks_real_backup_after_failed_remote_preflight(self) -> None:
self.client.force_login(self.staff_user)
with TemporaryDirectory() as tmp:
backup_root = Path(tmp)
GlobalConfig.objects.create(name="default", backup_root=str(backup_root), rsync_args=["--archive"])
host = HostConfig.objects.create(
host="web-01",
address="web-01.example.test",
config={
"last_preflight": {
"ok": False,
"target": "root@web-01.example.test",
"source_root": "/",
"rsync_binary": "rsync",
"checks": [
{
"name": "Remote rsync",
"ok": False,
"exit_code": 127,
"message": "Remote rsync failed.",
"detail": "rsync missing",
}
],
}
},
)
for subdir in ("scheduled", "manual", ".incomplete"):
(backup_root / host.host / subdir).mkdir(parents=True)
response = self.client.post(
reverse("queue_manual_backup", args=[host.host]),
{"prune_max_delete": "10"},
follow=True,
)
self.assertRedirects(response, reverse("host_detail", args=[host.host]))
self.assertContains(response, "Cannot queue real backup until failed preflight checks are resolved")
self.assertContains(response, "Remote preflight")
self.assertFalse(BackupRun.objects.exists())
def test_host_detail_renders_backup_trends(self) -> None: def test_host_detail_renders_backup_trends(self) -> None:
self.client.force_login(self.staff_user) self.client.force_login(self.staff_user)
GlobalConfig.objects.create(name="default", backup_root="/backups") GlobalConfig.objects.create(name="default", backup_root="/backups")
@@ -779,7 +888,7 @@ class ViewTests(TestCase):
def test_queue_manual_backup_creates_queued_run_and_redirects_to_run_detail(self) -> None: def test_queue_manual_backup_creates_queued_run_and_redirects_to_run_detail(self) -> None:
self.client.force_login(self.staff_user) self.client.force_login(self.staff_user)
GlobalConfig.objects.create(name="default", backup_root="/backups") GlobalConfig.objects.create(name="default", backup_root="/backups", rsync_args=["--archive"])
host = HostConfig.objects.create(host="web-01", address="web-01.example.test") host = HostConfig.objects.create(host="web-01", address="web-01.example.test")
response = self.client.post( response = self.client.post(
@@ -812,14 +921,18 @@ class ViewTests(TestCase):
def test_queue_manual_backup_quick_action_can_queue_real_backup(self) -> None: def test_queue_manual_backup_quick_action_can_queue_real_backup(self) -> None:
self.client.force_login(self.staff_user) self.client.force_login(self.staff_user)
GlobalConfig.objects.create(name="default", backup_root="/backups") with TemporaryDirectory() as tmp:
host = HostConfig.objects.create(host="web-01", address="web-01.example.test") backup_root = Path(tmp)
GlobalConfig.objects.create(name="default", backup_root=str(backup_root), rsync_args=["--archive"])
host = HostConfig.objects.create(host="web-01", address="web-01.example.test")
for subdir in ("scheduled", "manual", ".incomplete"):
(backup_root / host.host / subdir).mkdir(parents=True)
response = self.client.post( response = self.client.post(
reverse("queue_manual_backup", args=[host.host]), reverse("queue_manual_backup", args=[host.host]),
{"prune_max_delete": "10"}, {"prune_max_delete": "10"},
follow=True, follow=True,
) )
run = BackupRun.objects.get(host=host) run = BackupRun.objects.get(host=host)
self.assertRedirects(response, reverse("run_detail", args=[run.id])) self.assertRedirects(response, reverse("run_detail", args=[run.id]))
@@ -834,6 +947,41 @@ class ViewTests(TestCase):
}, },
) )
def test_queue_manual_backup_blocks_real_backup_when_host_directories_are_missing(self) -> None:
self.client.force_login(self.staff_user)
with TemporaryDirectory() as tmp:
backup_root = Path(tmp)
GlobalConfig.objects.create(name="default", backup_root=str(backup_root), rsync_args=["--archive"])
host = HostConfig.objects.create(host="web-01", address="web-01.example.test")
response = self.client.post(
reverse("queue_manual_backup", args=[host.host]),
{"prune_max_delete": "10"},
follow=True,
)
self.assertRedirects(response, reverse("host_detail", args=[host.host]))
self.assertContains(response, "Cannot queue real backup until failed preflight checks are resolved")
self.assertContains(response, "Host backup root")
self.assertFalse(BackupRun.objects.exists())
def test_queue_manual_backup_allows_dry_run_with_only_storage_preflight_failures(self) -> None:
self.client.force_login(self.staff_user)
with TemporaryDirectory() as tmp:
backup_root = Path(tmp)
GlobalConfig.objects.create(name="default", backup_root=str(backup_root), rsync_args=["--archive"])
host = HostConfig.objects.create(host="web-01", address="web-01.example.test")
response = self.client.post(
reverse("queue_manual_backup", args=[host.host]),
{"dry_run": "on", "verbose_output": "on", "prune_max_delete": "10"},
follow=True,
)
run = BackupRun.objects.get(host=host)
self.assertRedirects(response, reverse("run_detail", args=[run.id]))
self.assertEqual(run.result["requested"]["dry_run"], True)
def test_queue_manual_backup_requires_default_global_config(self) -> None: def test_queue_manual_backup_requires_default_global_config(self) -> None:
self.client.force_login(self.staff_user) self.client.force_login(self.staff_user)
host = HostConfig.objects.create(host="web-01", address="web-01.example.test") host = HostConfig.objects.create(host="web-01", address="web-01.example.test")
@@ -846,7 +994,7 @@ class ViewTests(TestCase):
def test_queue_manual_backup_rejects_disabled_host(self) -> None: def test_queue_manual_backup_rejects_disabled_host(self) -> None:
self.client.force_login(self.staff_user) self.client.force_login(self.staff_user)
GlobalConfig.objects.create(name="default", backup_root="/backups") GlobalConfig.objects.create(name="default", backup_root="/backups", rsync_args=["--archive"])
host = HostConfig.objects.create(host="web-01", address="web-01.example.test", enabled=False) host = HostConfig.objects.create(host="web-01", address="web-01.example.test", enabled=False)
response = self.client.post(reverse("queue_manual_backup", args=[host.host]), {"dry_run": "on"}, follow=True) response = self.client.post(reverse("queue_manual_backup", args=[host.host]), {"dry_run": "on"}, follow=True)
@@ -918,12 +1066,69 @@ class ViewTests(TestCase):
self.assertContains(response, "--archive") self.assertContains(response, "--archive")
self.assertContains(response, "Rsync Log") self.assertContains(response, "Rsync Log")
self.assertContains(response, "sending incremental file list") self.assertContains(response, "sending incremental file list")
self.assertContains(response, "Dry Run Summary")
self.assertContains(response, "Files Seen")
self.assertContains(response, "Would Transfer")
self.assertContains(response, "Transfer Estimate")
self.assertContains(response, "Warnings:</strong> none recorded")
self.assertContains(response, "Stats") self.assertContains(response, "Stats")
self.assertContains(response, "Files seen:</strong> 10") self.assertContains(response, "Files seen:</strong> 10")
self.assertContains(response, "Estimated link-dest saving") self.assertContains(response, "Estimated link-dest saving")
self.assertContains(response, "&quot;ok&quot;: true") self.assertContains(response, "&quot;ok&quot;: true")
self.assertContains(response, reverse("snapshot_detail", args=[snapshot.id])) self.assertContains(response, reverse("snapshot_detail", args=[snapshot.id]))
def test_run_detail_surfaces_dry_run_warnings_and_log_link(self) -> None:
self.client.force_login(self.staff_user)
host = HostConfig.objects.create(host="web-01", address="web-01.example.test")
with TemporaryDirectory() as tmp:
log_path = Path(tmp) / "dry-run" / "rsync.log"
log_path.parent.mkdir(parents=True)
log_path.write_text("WARNING: noisy shell output\npermission denied\n", encoding="utf-8")
run = BackupRun.objects.create(
host=host,
status=BackupRun.Status.FAILED,
rsync_exit_code=255,
result={
"ok": False,
"dry_run": True,
"log": str(log_path),
"failure": {
"category": "transport",
"message": "Rsync transport failed.",
"hint": "Check SSH access.",
},
"stats": {
"duration_seconds": 4,
"rsync": {
"files_total": 25,
"files_transferred": 3,
"total_file_size_bytes": 10_000,
"total_transferred_file_size_bytes": 1_500,
},
},
"rsync": {
"exit_code": 255,
"log_tail": ["WARNING: noisy shell output", "permission denied"],
},
},
)
response = self.client.get(reverse("run_detail", args=[run.id]))
self.assertEqual(response.status_code, 200)
self.assertContains(response, "Dry Run Summary")
self.assertContains(response, "failed")
self.assertContains(response, "Files Seen")
self.assertContains(response, "25")
self.assertContains(response, "Would Transfer")
self.assertContains(response, "3")
self.assertContains(response, "1.5")
self.assertContains(response, "Open full rsync log")
self.assertContains(response, reverse("run_rsync_log", args=[run.id]))
self.assertContains(response, "Rsync transport failed.")
self.assertContains(response, "Check SSH access.")
self.assertContains(response, "WARNING: noisy shell output")
def test_run_detail_links_existing_rsync_log(self) -> None: def test_run_detail_links_existing_rsync_log(self) -> None:
self.client.force_login(self.staff_user) self.client.force_login(self.staff_user)
host = HostConfig.objects.create(host="web-01", address="web-01.example.test") host = HostConfig.objects.create(host="web-01", address="web-01.example.test")

View File

@@ -29,8 +29,9 @@ from .forms import (
ScheduleConfigForm, ScheduleConfigForm,
SshCredentialForm, SshCredentialForm,
) )
from .host_ops import collect_host_checks, ensure_host_directories from .host_ops import ensure_host_directories
from .models import BackupRun, GlobalConfig, HostConfig, ScheduleConfig, SnapshotRecord, SshCredential from .models import BackupRun, GlobalConfig, HostConfig, ScheduleConfig, SnapshotRecord, SshCredential
from .preflight import collect_backup_gate, effective_host_config_preview, run_remote_preflight
from .retention import run_sql_retention_apply, run_sql_retention_plan from .retention import run_sql_retention_apply, run_sql_retention_plan
from .self_check import collect_self_checks, summarize_self_checks from .self_check import collect_self_checks, summarize_self_checks
from .scheduler import next_due_after from .scheduler import next_due_after
@@ -260,14 +261,15 @@ def create_host_config(request):
@staff_member_required @staff_member_required
def host_detail(request, host: str): def host_detail(request, host: str):
host_config = get_object_or_404(HostConfig, host=host) host_config = get_object_or_404(HostConfig, host=host)
global_config = GlobalConfig.objects.filter(name="default").first()
schedule = _schedule_for_host(host_config) schedule = _schedule_for_host(host_config)
queued_runs = host_config.runs.filter(status=BackupRun.Status.QUEUED) queued_runs = host_config.runs.filter(status=BackupRun.Status.QUEUED)
running_runs = host_config.runs.filter(status=BackupRun.Status.RUNNING) running_runs = host_config.runs.filter(status=BackupRun.Status.RUNNING)
active_run = host_config.runs.filter( active_run = host_config.runs.filter(
status__in=[BackupRun.Status.QUEUED, BackupRun.Status.RUNNING] status__in=[BackupRun.Status.QUEUED, BackupRun.Status.RUNNING]
).order_by("created_at", "id").first() ).order_by("created_at", "id").first()
has_global_config = GlobalConfig.objects.filter(name="default").exists() has_global_config = global_config is not None
host_checks = collect_host_checks(host_config) backup_gate = collect_backup_gate(host_config, global_config)
stats_summary = collect_host_stats(host=host_config, limit=10) stats_summary = collect_host_stats(host=host_config, limit=10)
context = { context = {
"host": host_config, "host": host_config,
@@ -275,11 +277,15 @@ def host_detail(request, host: str):
"next_run_at": _next_run_for_schedule(schedule, host_config), "next_run_at": _next_run_for_schedule(schedule, host_config),
"scheduler_timezone": timezone.get_current_timezone_name(), "scheduler_timezone": timezone.get_current_timezone_name(),
"discovery": inspect_snapshot_discovery(host=host_config), "discovery": inspect_snapshot_discovery(host=host_config),
"host_checks": host_checks, "host_checks": backup_gate.checks,
"host_check_summary": summarize_self_checks(host_checks), "host_check_summary": summarize_self_checks(backup_gate.checks),
"backup_gate": backup_gate,
"last_preflight": (host_config.config or {}).get("last_preflight") if isinstance(host_config.config, dict) else {},
"effective_config": effective_host_config_preview(host_config, global_config) if global_config else {},
"stats_summary": stats_summary, "stats_summary": stats_summary,
"manual_backup_form": ManualBackupForm(initial=_default_manual_backup_initial(host_config)), "manual_backup_form": ManualBackupForm(initial=_default_manual_backup_initial(host_config)),
"can_queue_backup": host_config.enabled and has_global_config, "can_queue_dry_run": host_config.enabled and has_global_config and backup_gate.can_queue_dry_run and active_run is None,
"can_queue_real_backup": host_config.enabled and has_global_config and backup_gate.can_queue_real and active_run is None,
"has_global_config": has_global_config, "has_global_config": has_global_config,
"active_run": active_run, "active_run": active_run,
"latest_runs": host_config.runs.select_related("snapshot").order_by("-created_at")[:10], "latest_runs": host_config.runs.select_related("snapshot").order_by("-created_at")[:10],
@@ -331,6 +337,34 @@ def scan_host_known_key(request, host: str):
return redirect("host_detail", host=host_config.host) return redirect("host_detail", host=host_config.host)
@staff_member_required
@require_POST
def run_host_preflight(request, host: str):
host_config = get_object_or_404(HostConfig, host=host)
if not host_config.enabled:
messages.error(request, f"Cannot run preflight for disabled host {host_config.host}.")
return redirect("host_detail", host=host_config.host)
if not GlobalConfig.objects.filter(name="default").exists():
messages.error(request, "Create the default global config before running preflight.")
return redirect("host_detail", host=host_config.host)
try:
result = run_remote_preflight(host_config)
except Exception as exc:
messages.error(request, f"Connection preflight failed for {host_config.host}: {exc}")
else:
if result.get("ok"):
messages.success(request, f"Connection preflight passed for {host_config.host}.")
else:
failed = [
str(check.get("name"))
for check in result.get("checks", [])
if isinstance(check, dict) and not check.get("ok")
]
messages.error(request, f"Connection preflight failed for {host_config.host}: {', '.join(failed)}.")
return redirect("host_detail", host=host_config.host)
@staff_member_required @staff_member_required
@require_POST @require_POST
def queue_manual_backup(request, host: str): def queue_manual_backup(request, host: str):
@@ -338,7 +372,8 @@ def queue_manual_backup(request, host: str):
if not host_config.enabled: if not host_config.enabled:
messages.error(request, f"Cannot queue backup for disabled host {host_config.host}.") messages.error(request, f"Cannot queue backup for disabled host {host_config.host}.")
return redirect("host_detail", host=host_config.host) return redirect("host_detail", host=host_config.host)
if not GlobalConfig.objects.filter(name="default").exists(): global_config = GlobalConfig.objects.filter(name="default").first()
if global_config is None:
messages.error(request, "Create the default global config before queueing backups.") messages.error(request, "Create the default global config before queueing backups.")
return redirect("host_detail", host=host_config.host) return redirect("host_detail", host=host_config.host)
@@ -347,6 +382,17 @@ def queue_manual_backup(request, host: str):
messages.error(request, "Manual backup options are invalid.") messages.error(request, "Manual backup options are invalid.")
return redirect("host_detail", host=host_config.host) return redirect("host_detail", host=host_config.host)
backup_gate = collect_backup_gate(host_config, global_config)
if form.cleaned_data["dry_run"]:
if not backup_gate.can_queue_dry_run:
blockers = ", ".join(check.name for check in backup_gate.dry_run_blockers)
messages.error(request, f"Cannot queue dry-run until failed preflight checks are resolved: {blockers}.")
return redirect("host_detail", host=host_config.host)
elif not backup_gate.can_queue_real:
blockers = ", ".join(check.name for check in backup_gate.real_blockers)
messages.error(request, f"Cannot queue real backup until failed preflight checks are resolved: {blockers}.")
return redirect("host_detail", host=host_config.host)
run = queue_backup_run( run = queue_backup_run(
host=host_config, host=host_config,
dry_run=form.cleaned_data["dry_run"], dry_run=form.cleaned_data["dry_run"],
@@ -369,19 +415,29 @@ def run_detail(request, run_id: int):
prune_result = result.get("prune") if isinstance(result.get("prune"), dict) else {} prune_result = result.get("prune") if isinstance(result.get("prune"), dict) else {}
rsync_log_path = _run_rsync_log_path(run) rsync_log_path = _run_rsync_log_path(run)
rsync_log_tail = _run_rsync_log_tail(rsync_result, rsync_log_path) rsync_log_tail = _run_rsync_log_tail(rsync_result, rsync_log_path)
requested = result.get("requested") if isinstance(result.get("requested"), dict) else {}
context = { context = {
"run": run, "run": run,
"can_cancel": run.status in {BackupRun.Status.QUEUED, BackupRun.Status.RUNNING}, "can_cancel": run.status in {BackupRun.Status.QUEUED, BackupRun.Status.RUNNING},
"requested": result.get("requested") if isinstance(result.get("requested"), dict) else {}, "requested": requested,
"stats": run_stats if isinstance(run_stats, dict) else {}, "stats": run_stats if isinstance(run_stats, dict) else {},
"rsync": rsync_result, "rsync": rsync_result,
"rsync_command": _run_rsync_command(rsync_result), "rsync_command": _run_rsync_command(rsync_result),
"failure": failure, "failure": failure,
"failure_summary": failure.get("message") or failure.get("summary") or "",
"prune_result": prune_result, "prune_result": prune_result,
"has_prune_result": bool(prune_result), "has_prune_result": bool(prune_result),
"rsync_log_path": str(rsync_log_path) if rsync_log_path is not None else "", "rsync_log_path": str(rsync_log_path) if rsync_log_path is not None else "",
"rsync_log_exists": bool(rsync_log_path and rsync_log_path.exists()), "rsync_log_exists": bool(rsync_log_path and rsync_log_path.exists()),
"rsync_log_tail": rsync_log_tail, "rsync_log_tail": rsync_log_tail,
"dry_run_summary": _dry_run_summary(
result=result,
requested=requested,
stats=run_stats if isinstance(run_stats, dict) else {},
failure=failure,
rsync_log_tail=rsync_log_tail,
rsync_log_exists=bool(rsync_log_path and rsync_log_path.exists()),
),
"result_json": _pretty_json(run.result), "result_json": _pretty_json(run.result),
} }
return render(request, "pobsync_backend/run_detail.html", context) return render(request, "pobsync_backend/run_detail.html", context)
@@ -693,6 +749,48 @@ def _run_rsync_log_tail(rsync_result: dict, log_path: Path | None, *, max_lines:
return [] return []
def _dry_run_summary(
*,
result: dict,
requested: dict,
stats: dict,
failure: dict,
rsync_log_tail: list[str],
rsync_log_exists: bool,
) -> dict[str, object]:
if not (result.get("dry_run") or requested.get("dry_run")):
return {}
rsync_stats = stats.get("rsync") if isinstance(stats.get("rsync"), dict) else {}
warnings = []
if failure:
message = failure.get("message") or failure.get("summary")
hint = failure.get("hint")
if message:
warnings.append(str(message))
if hint:
warnings.append(str(hint))
for line in rsync_log_tail:
lowered = line.lower()
if "warning" in lowered or "permission denied" in lowered or "failed" in lowered:
warnings.append(line)
return {
"ok": result.get("ok"),
"status": "passed" if result.get("ok") else ("failed" if result.get("ok") is False else "running"),
"highlight_class": "success" if result.get("ok") else ("failed" if result.get("ok") is False else "warning"),
"files_seen": rsync_stats.get("files_total"),
"files_would_transfer": rsync_stats.get("files_transferred"),
"total_file_size_bytes": rsync_stats.get("total_file_size_bytes"),
"transfer_estimate_bytes": rsync_stats.get("total_transferred_file_size_bytes")
or rsync_stats.get("literal_data_bytes"),
"link_dest_estimated_savings_bytes": rsync_stats.get("link_dest_estimated_savings_bytes"),
"duration_seconds": stats.get("duration_seconds"),
"log_available": rsync_log_exists,
"warnings": list(dict.fromkeys(warnings)),
}
def _log_context(request) -> dict[str, object]: def _log_context(request) -> dict[str, object]:
units = ("pobsync-web.service", "pobsync-worker.service", "pobsync-scheduler.service") units = ("pobsync-web.service", "pobsync-worker.service", "pobsync-scheduler.service")
priorities = { priorities = {

View File

@@ -21,6 +21,7 @@ urlpatterns = [
path("hosts/<str:host>/config/", views.edit_host_config, name="edit_host_config"), path("hosts/<str:host>/config/", views.edit_host_config, name="edit_host_config"),
path("hosts/<str:host>/prepare-directories/", views.prepare_host_directories, name="prepare_host_directories"), path("hosts/<str:host>/prepare-directories/", views.prepare_host_directories, name="prepare_host_directories"),
path("hosts/<str:host>/scan-known-key/", views.scan_host_known_key, name="scan_host_known_key"), path("hosts/<str:host>/scan-known-key/", views.scan_host_known_key, name="scan_host_known_key"),
path("hosts/<str:host>/preflight/", views.run_host_preflight, name="run_host_preflight"),
path("hosts/<str:host>/queue-backup/", views.queue_manual_backup, name="queue_manual_backup"), path("hosts/<str:host>/queue-backup/", views.queue_manual_backup, name="queue_manual_backup"),
path("hosts/<str:host>/discover-snapshots/", views.discover_host_snapshots, name="discover_host_snapshots"), path("hosts/<str:host>/discover-snapshots/", views.discover_host_snapshots, name="discover_host_snapshots"),
path("hosts/<str:host>/retention-apply/", views.apply_host_retention, name="apply_host_retention"), path("hosts/<str:host>/retention-apply/", views.apply_host_retention, name="apply_host_retention"),