(feature) Improve run debugging and log filtering #12
@@ -62,6 +62,10 @@
|
|||||||
.metric .label { color: var(--muted); font-size: 12px; text-transform: uppercase; }
|
.metric .label { color: var(--muted); font-size: 12px; text-transform: uppercase; }
|
||||||
.metric .value { font-size: 26px; font-weight: 650; margin-top: 4px; }
|
.metric .value { font-size: 26px; font-weight: 650; margin-top: 4px; }
|
||||||
.panel { padding: 16px; margin-bottom: 18px; overflow: auto; }
|
.panel { padding: 16px; margin-bottom: 18px; overflow: auto; }
|
||||||
|
.panel.highlight { border-left: 4px solid var(--border); }
|
||||||
|
.panel.highlight.failed { border-left-color: var(--failed); background: #fff7f7; }
|
||||||
|
.panel.highlight.warning { border-left-color: var(--running); background: #fffaf0; }
|
||||||
|
.panel.highlight.success { border-left-color: var(--success); background: #f5fbf7; }
|
||||||
table { width: 100%; border-collapse: collapse; min-width: 640px; }
|
table { width: 100%; border-collapse: collapse; min-width: 640px; }
|
||||||
th, td { border-bottom: 1px solid var(--border); padding: 9px 8px; text-align: left; vertical-align: top; }
|
th, td { border-bottom: 1px solid var(--border); padding: 9px 8px; text-align: left; vertical-align: top; }
|
||||||
th { color: var(--muted); font-size: 12px; font-weight: 650; text-transform: uppercase; }
|
th { color: var(--muted); font-size: 12px; font-weight: 650; text-transform: uppercase; }
|
||||||
|
|||||||
@@ -17,11 +17,22 @@
|
|||||||
|
|
||||||
<section class="grid" aria-label="Run summary">
|
<section class="grid" aria-label="Run summary">
|
||||||
<div class="metric"><div class="label">Host</div><div class="value">{{ run.host.host }}</div></div>
|
<div class="metric"><div class="label">Host</div><div class="value">{{ run.host.host }}</div></div>
|
||||||
<div class="metric"><div class="label">Status</div><div class="value">{{ run.status }}</div></div>
|
<div class="metric"><div class="label">Status</div><div class="value"><span class="status {{ run.status }}">{{ run.status }}</span></div></div>
|
||||||
<div class="metric"><div class="label">Type</div><div class="value">{{ run.run_type }}</div></div>
|
<div class="metric"><div class="label">Type</div><div class="value">{{ run.run_type }}</div></div>
|
||||||
<div class="metric"><div class="label">Rsync</div><div class="value">{{ run.rsync_exit_code|default:"" }}</div></div>
|
<div class="metric"><div class="label">Rsync</div><div class="value">{{ run.rsync_exit_code|default:"" }}</div></div>
|
||||||
</section>
|
</section>
|
||||||
|
|
||||||
|
{% if failure %}
|
||||||
|
<section class="panel highlight failed">
|
||||||
|
<h2>Failure</h2>
|
||||||
|
<div class="stack">
|
||||||
|
<div><strong>Category:</strong> {{ failure.category|default:"unknown" }}</div>
|
||||||
|
<div><strong>Summary:</strong> {{ failure.summary|default:"" }}</div>
|
||||||
|
<div><strong>Hint:</strong> {{ failure.hint|default:"" }}</div>
|
||||||
|
</div>
|
||||||
|
</section>
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
<div class="two-col">
|
<div class="two-col">
|
||||||
<section class="panel">
|
<section class="panel">
|
||||||
<h2>Timing</h2>
|
<h2>Timing</h2>
|
||||||
@@ -64,6 +75,36 @@
|
|||||||
</section>
|
</section>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
|
||||||
|
<section class="panel">
|
||||||
|
<h2>Rsync Command</h2>
|
||||||
|
{% if rsync_command %}
|
||||||
|
<pre>{% for part in rsync_command %}{{ part }}{% if not forloop.last %}
|
||||||
|
{% endif %}{% endfor %}</pre>
|
||||||
|
{% else %}
|
||||||
|
<p class="muted">No rsync command recorded yet.</p>
|
||||||
|
{% endif %}
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section class="panel">
|
||||||
|
<h2>Rsync Log</h2>
|
||||||
|
<div class="stack spaced">
|
||||||
|
{% if rsync_log_exists %}
|
||||||
|
<div><a href="{% url 'run_rsync_log' run.id %}">Open full rsync log</a></div>
|
||||||
|
<div class="muted">{{ rsync_log_path }}</div>
|
||||||
|
{% elif rsync_log_path %}
|
||||||
|
<div class="muted">{{ rsync_log_path }} (missing)</div>
|
||||||
|
{% else %}
|
||||||
|
<div class="muted">No rsync log path recorded yet.</div>
|
||||||
|
{% endif %}
|
||||||
|
</div>
|
||||||
|
{% if rsync_log_tail %}
|
||||||
|
<pre>{% for line in rsync_log_tail %}{{ line }}{% if not forloop.last %}
|
||||||
|
{% endif %}{% endfor %}</pre>
|
||||||
|
{% else %}
|
||||||
|
<p class="muted">No recent rsync log output recorded yet.</p>
|
||||||
|
{% endif %}
|
||||||
|
</section>
|
||||||
|
|
||||||
{% if stats %}
|
{% if stats %}
|
||||||
<section class="panel">
|
<section class="panel">
|
||||||
<h2>Stats</h2>
|
<h2>Stats</h2>
|
||||||
@@ -86,8 +127,21 @@
|
|||||||
</section>
|
</section>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
|
||||||
|
{% if has_prune_result %}
|
||||||
|
<section class="panel highlight {% if prune_result.ok %}success{% else %}warning{% endif %}">
|
||||||
|
<h2>Retention</h2>
|
||||||
|
<div class="stack">
|
||||||
|
<div><strong>Status:</strong> {% if prune_result.ok %}ok{% else %}warning{% endif %}</div>
|
||||||
|
{% if prune_result.source %}<div><strong>Source:</strong> {{ prune_result.source }}</div>{% endif %}
|
||||||
|
{% if prune_result.deleted %}<div><strong>Deleted:</strong> {{ prune_result.deleted|length }}</div>{% endif %}
|
||||||
|
{% if prune_result.error %}<div><strong>Error:</strong> {{ prune_result.error }}</div>{% endif %}
|
||||||
|
{% if prune_result.type %}<div><strong>Type:</strong> {{ prune_result.type }}</div>{% endif %}
|
||||||
|
</div>
|
||||||
|
</section>
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
<section class="panel">
|
<section class="panel">
|
||||||
<h2>Result</h2>
|
<h2>Raw Result</h2>
|
||||||
<pre>{{ result_json }}</pre>
|
<pre>{{ result_json }}</pre>
|
||||||
</section>
|
</section>
|
||||||
{% endblock %}
|
{% endblock %}
|
||||||
|
|||||||
@@ -857,6 +857,11 @@ class ViewTests(TestCase):
|
|||||||
result={
|
result={
|
||||||
"ok": True,
|
"ok": True,
|
||||||
"snapshot": snapshot.path,
|
"snapshot": snapshot.path,
|
||||||
|
"rsync": {
|
||||||
|
"command": ["rsync", "--archive", "root@web-01:/", snapshot.path],
|
||||||
|
"exit_code": 0,
|
||||||
|
"log_tail": ["sending incremental file list", "sent 500 bytes"],
|
||||||
|
},
|
||||||
"requested": {
|
"requested": {
|
||||||
"dry_run": True,
|
"dry_run": True,
|
||||||
"verbose_output": True,
|
"verbose_output": True,
|
||||||
@@ -889,6 +894,10 @@ class ViewTests(TestCase):
|
|||||||
self.assertContains(response, "Requested Options")
|
self.assertContains(response, "Requested Options")
|
||||||
self.assertContains(response, "Dry run:</strong> yes")
|
self.assertContains(response, "Dry run:</strong> yes")
|
||||||
self.assertContains(response, "Verbose rsync output:</strong> yes")
|
self.assertContains(response, "Verbose rsync output:</strong> yes")
|
||||||
|
self.assertContains(response, "Rsync Command")
|
||||||
|
self.assertContains(response, "--archive")
|
||||||
|
self.assertContains(response, "Rsync Log")
|
||||||
|
self.assertContains(response, "sending incremental file list")
|
||||||
self.assertContains(response, "Stats")
|
self.assertContains(response, "Stats")
|
||||||
self.assertContains(response, "Files seen:</strong> 10")
|
self.assertContains(response, "Files seen:</strong> 10")
|
||||||
self.assertContains(response, "Estimated link-dest saving")
|
self.assertContains(response, "Estimated link-dest saving")
|
||||||
@@ -901,7 +910,7 @@ class ViewTests(TestCase):
|
|||||||
with TemporaryDirectory() as tmp:
|
with TemporaryDirectory() as tmp:
|
||||||
log_path = Path(tmp) / "snapshot" / "meta" / "rsync.log"
|
log_path = Path(tmp) / "snapshot" / "meta" / "rsync.log"
|
||||||
log_path.parent.mkdir(parents=True)
|
log_path.parent.mkdir(parents=True)
|
||||||
log_path.write_text("rsync log line\n", encoding="utf-8")
|
log_path.write_text("old line\nrsync log line\n", encoding="utf-8")
|
||||||
run = BackupRun.objects.create(
|
run = BackupRun.objects.create(
|
||||||
host=host,
|
host=host,
|
||||||
status=BackupRun.Status.SUCCESS,
|
status=BackupRun.Status.SUCCESS,
|
||||||
@@ -915,8 +924,40 @@ class ViewTests(TestCase):
|
|||||||
|
|
||||||
self.assertContains(response, reverse("run_rsync_log", args=[run.id]))
|
self.assertContains(response, reverse("run_rsync_log", args=[run.id]))
|
||||||
self.assertContains(response, str(log_path))
|
self.assertContains(response, str(log_path))
|
||||||
|
self.assertContains(response, "rsync log line")
|
||||||
self.assertEqual(log_response.status_code, 200)
|
self.assertEqual(log_response.status_code, 200)
|
||||||
self.assertEqual(log_body, b"rsync log line\n")
|
self.assertEqual(log_body, b"old line\nrsync log line\n")
|
||||||
|
|
||||||
|
def test_run_detail_surfaces_failure_and_retention_warning(self) -> None:
|
||||||
|
self.client.force_login(self.staff_user)
|
||||||
|
host = HostConfig.objects.create(host="web-01", address="web-01.example.test")
|
||||||
|
run = BackupRun.objects.create(
|
||||||
|
host=host,
|
||||||
|
status=BackupRun.Status.WARNING,
|
||||||
|
rsync_exit_code=0,
|
||||||
|
result={
|
||||||
|
"ok": True,
|
||||||
|
"failure": {
|
||||||
|
"category": "transport",
|
||||||
|
"summary": "SSH connection dropped.",
|
||||||
|
"hint": "Check network connectivity.",
|
||||||
|
},
|
||||||
|
"prune": {
|
||||||
|
"ok": False,
|
||||||
|
"type": "ConfigError",
|
||||||
|
"error": "Deletion blocked by --max-delete=0",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
response = self.client.get(reverse("run_detail", args=[run.id]))
|
||||||
|
|
||||||
|
self.assertEqual(response.status_code, 200)
|
||||||
|
self.assertContains(response, "Failure")
|
||||||
|
self.assertContains(response, "transport")
|
||||||
|
self.assertContains(response, "Check network connectivity.")
|
||||||
|
self.assertContains(response, "Retention")
|
||||||
|
self.assertContains(response, "Deletion blocked by --max-delete=0")
|
||||||
|
|
||||||
def test_run_detail_infers_rsync_log_from_snapshot_path(self) -> None:
|
def test_run_detail_infers_rsync_log_from_snapshot_path(self) -> None:
|
||||||
self.client.force_login(self.staff_user)
|
self.client.force_login(self.staff_user)
|
||||||
|
|||||||
@@ -362,15 +362,26 @@ def queue_manual_backup(request, host: str):
|
|||||||
@staff_member_required
|
@staff_member_required
|
||||||
def run_detail(request, run_id: int):
|
def run_detail(request, run_id: int):
|
||||||
run = get_object_or_404(BackupRun.objects.select_related("host", "snapshot"), id=run_id)
|
run = get_object_or_404(BackupRun.objects.select_related("host", "snapshot"), id=run_id)
|
||||||
run_stats = run.result.get("stats") if isinstance(run.result, dict) else {}
|
result = run.result if isinstance(run.result, dict) else {}
|
||||||
|
run_stats = result.get("stats") if isinstance(result.get("stats"), dict) else {}
|
||||||
|
rsync_result = result.get("rsync") if isinstance(result.get("rsync"), dict) else {}
|
||||||
|
failure = result.get("failure") if isinstance(result.get("failure"), dict) else {}
|
||||||
|
prune_result = result.get("prune") if isinstance(result.get("prune"), dict) else {}
|
||||||
rsync_log_path = _run_rsync_log_path(run)
|
rsync_log_path = _run_rsync_log_path(run)
|
||||||
|
rsync_log_tail = _run_rsync_log_tail(rsync_result, rsync_log_path)
|
||||||
context = {
|
context = {
|
||||||
"run": run,
|
"run": run,
|
||||||
"can_cancel": run.status in {BackupRun.Status.QUEUED, BackupRun.Status.RUNNING},
|
"can_cancel": run.status in {BackupRun.Status.QUEUED, BackupRun.Status.RUNNING},
|
||||||
"requested": run.result.get("requested") if isinstance(run.result, dict) else {},
|
"requested": result.get("requested") if isinstance(result.get("requested"), dict) else {},
|
||||||
"stats": run_stats if isinstance(run_stats, dict) else {},
|
"stats": run_stats if isinstance(run_stats, dict) else {},
|
||||||
|
"rsync": rsync_result,
|
||||||
|
"rsync_command": _run_rsync_command(rsync_result),
|
||||||
|
"failure": failure,
|
||||||
|
"prune_result": prune_result,
|
||||||
|
"has_prune_result": bool(prune_result),
|
||||||
"rsync_log_path": str(rsync_log_path) if rsync_log_path is not None else "",
|
"rsync_log_path": str(rsync_log_path) if rsync_log_path is not None else "",
|
||||||
"rsync_log_exists": bool(rsync_log_path and rsync_log_path.exists()),
|
"rsync_log_exists": bool(rsync_log_path and rsync_log_path.exists()),
|
||||||
|
"rsync_log_tail": rsync_log_tail,
|
||||||
"result_json": _pretty_json(run.result),
|
"result_json": _pretty_json(run.result),
|
||||||
}
|
}
|
||||||
return render(request, "pobsync_backend/run_detail.html", context)
|
return render(request, "pobsync_backend/run_detail.html", context)
|
||||||
@@ -662,6 +673,26 @@ def _run_rsync_log_path(run: BackupRun) -> Path | None:
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _run_rsync_command(rsync_result: dict) -> list[str]:
|
||||||
|
command = rsync_result.get("command")
|
||||||
|
if not isinstance(command, list):
|
||||||
|
return []
|
||||||
|
return [str(part) for part in command]
|
||||||
|
|
||||||
|
|
||||||
|
def _run_rsync_log_tail(rsync_result: dict, log_path: Path | None, *, max_lines: int = 30) -> list[str]:
|
||||||
|
log_tail = rsync_result.get("log_tail")
|
||||||
|
if isinstance(log_tail, list):
|
||||||
|
return [str(line) for line in log_tail[-max_lines:]]
|
||||||
|
if log_path is None or not log_path.is_file():
|
||||||
|
return []
|
||||||
|
try:
|
||||||
|
with log_path.open("r", encoding="utf-8", errors="replace") as handle:
|
||||||
|
return handle.read().splitlines()[-max_lines:]
|
||||||
|
except OSError:
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
def _log_context(request) -> dict[str, object]:
|
def _log_context(request) -> dict[str, object]:
|
||||||
units = ("pobsync-web.service", "pobsync-worker.service", "pobsync-scheduler.service")
|
units = ("pobsync-web.service", "pobsync-worker.service", "pobsync-scheduler.service")
|
||||||
priorities = {
|
priorities = {
|
||||||
|
|||||||
Reference in New Issue
Block a user