(feature) Add host doctor checks and Django log viewer

Add host-level checks for address, enabled state, SSH credential
selection, and backup directory readiness, and show them on the host
detail page.

Create host backup directories during host creation and prefill new
hosts from the default global config.

Add a staff-only logs view backed by journalctl with filtering by
pobsync unit, priority, and message text.

Improve runtime checks for gunicorn in virtualenv installs and ensure
the native installer grants the service user access to the backup root.
This commit is contained in:
2026-05-19 19:11:57 +02:00
parent bb7907846e
commit 90f28410ce
10 changed files with 319 additions and 2 deletions

View File

@@ -0,0 +1,69 @@
from __future__ import annotations
import os
from pathlib import Path
from pobsync.snapshot_meta import resolve_host_root
from .models import GlobalConfig, HostConfig
from .self_check import SelfCheck
HOST_BACKUP_SUBDIRS = ("scheduled", "manual", ".incomplete")
def ensure_host_directories(host: HostConfig, global_config: GlobalConfig | None = None) -> Path:
global_config = global_config or GlobalConfig.objects.get(name="default")
host_root = resolve_host_root(global_config.backup_root, host.host)
for subdir in HOST_BACKUP_SUBDIRS:
(host_root / subdir).mkdir(parents=True, exist_ok=True)
return host_root
def collect_host_checks(host: HostConfig, global_config: GlobalConfig | None = None) -> list[SelfCheck]:
checks: list[SelfCheck] = []
try:
global_config = global_config or GlobalConfig.objects.get(name="default")
except GlobalConfig.DoesNotExist:
return [SelfCheck("Host global config", "failed", "Default global config does not exist.")]
checks.append(
SelfCheck(
"Host enabled",
"ok" if host.enabled else "warning",
"Host is enabled." if host.enabled else "Host is disabled.",
)
)
checks.append(
SelfCheck(
"Host address",
"ok" if host.address.strip() else "failed",
host.address.strip() or "Host address is empty.",
)
)
credential = host.ssh_credential or global_config.default_ssh_credential
checks.append(
SelfCheck(
"Host SSH credential",
"ok" if credential else "warning",
str(credential) if credential else "No host or global SSH credential selected.",
)
)
host_root = resolve_host_root(global_config.backup_root, host.host)
checks.append(_host_path_check("Host backup root", host_root, must_exist=True, must_be_writable=True))
for subdir in HOST_BACKUP_SUBDIRS:
checks.append(_host_path_check(f"Host directory: {subdir}", host_root / subdir, must_exist=True, must_be_writable=True))
return checks
def _host_path_check(name: str, path: Path, *, must_exist: bool, must_be_writable: bool) -> SelfCheck:
if must_exist and not path.exists():
return SelfCheck(name, "failed", f"{path} does not exist.")
target = path if path.exists() else path.parent
if not target.exists():
return SelfCheck(name, "failed", f"{target} does not exist.")
if must_be_writable and not os.access(target, os.W_OK):
return SelfCheck(name, "failed", f"{target} is not writable by this process.")
return SelfCheck(name, "ok", str(path))

View File

@@ -3,6 +3,7 @@ from __future__ import annotations
import os
import shutil
import subprocess
import sys
from dataclasses import dataclass
from pathlib import Path
from typing import Literal
@@ -125,7 +126,7 @@ def _path_check(
def _binary_checks() -> list[SelfCheck]:
checks = []
for binary in ("rsync", "ssh", "ssh-keygen", "gunicorn"):
for binary in ("rsync", "ssh", "ssh-keygen"):
path = shutil.which(binary)
checks.append(
SelfCheck(
@@ -134,6 +135,14 @@ def _binary_checks() -> list[SelfCheck]:
path or f"{binary} was not found in PATH.",
)
)
gunicorn_path = shutil.which("gunicorn") or Path(sys.executable).parent / "gunicorn"
checks.append(
SelfCheck(
"Binary: gunicorn",
"ok" if Path(gunicorn_path).exists() else "failed",
str(gunicorn_path) if Path(gunicorn_path).exists() else "gunicorn was not found in PATH or next to Python.",
)
)
return checks

View File

@@ -174,6 +174,7 @@
<a href="{% url 'admin:index' %}">Admin</a>
<a href="{% url 'ssh_credentials' %}">SSH Keys</a>
<a href="{% url 'self_check' %}">Self Check</a>
<a href="{% url 'logs' %}">Logs</a>
<a href="/api/status/">Status API</a>
<span class="spacer"></span>
<span class="muted">{{ request.user.username }}</span>

View File

@@ -70,6 +70,36 @@
</div>
</section>
<section class="panel">
<h2>Host Check</h2>
<section class="grid" aria-label="Host check summary">
<div class="metric"><div class="label">OK</div><div class="value">{{ host_check_summary.ok }}</div></div>
<div class="metric"><div class="label">Warnings</div><div class="value">{{ host_check_summary.warning }}</div></div>
<div class="metric"><div class="label">Failed</div><div class="value">{{ host_check_summary.failed }}</div></div>
<div class="metric"><div class="label">Skipped</div><div class="value">{{ host_check_summary.skipped }}</div></div>
</section>
<table>
<thead>
<tr>
<th>Status</th>
<th>Check</th>
<th>Message</th>
<th>Detail</th>
</tr>
</thead>
<tbody>
{% for check in host_checks %}
<tr>
<td><span class="status {{ check.status }}">{{ check.status }}</span></td>
<td>{{ check.name }}</td>
<td>{{ check.message }}</td>
<td class="muted">{{ check.detail }}</td>
</tr>
{% endfor %}
</tbody>
</table>
</section>
<section class="panel">
<h2>Backup Control</h2>
<div class="operator-state">

View File

@@ -0,0 +1,52 @@
{% extends "pobsync_backend/base.html" %}
{% block title %}Logs | pobsync{% endblock %}
{% block content %}
<h1>Logs</h1>
<section class="actions" aria-label="Log actions">
<a class="button-link secondary" href="{% url 'dashboard' %}">Back to dashboard</a>
</section>
<section class="panel">
<h2>Filter</h2>
<form method="get" class="form-grid">
<div class="field">
<label for="unit">Unit</label>
<select id="unit" name="unit">
<option value="">All pobsync units</option>
{% for unit in units %}
<option value="{{ unit }}" {% if selected_unit == unit %}selected{% endif %}>{{ unit }}</option>
{% endfor %}
</select>
</div>
<div class="field">
<label for="priority">Priority</label>
<select id="priority" name="priority">
{% for value, label in priorities.items %}
<option value="{{ value }}" {% if selected_priority == value %}selected{% endif %}>{{ label }}</option>
{% endfor %}
</select>
</div>
<div class="field">
<label for="q">Message contains</label>
<input id="q" name="q" value="{{ query }}">
</div>
<div class="actions">
<button type="submit">Filter logs</button>
</div>
</form>
</section>
<section class="panel">
<h2>Messages</h2>
{% if error %}
<p class="status failed">{{ error }}</p>
{% else %}
<pre>{% for line in lines %}{{ line }}
{% empty %}No log messages matched the current filter.
{% endfor %}</pre>
{% endif %}
</section>
{% endblock %}

View File

@@ -1,5 +1,6 @@
from __future__ import annotations
import subprocess
from datetime import datetime, timezone
from pathlib import Path
from tempfile import TemporaryDirectory
@@ -95,6 +96,27 @@ class ViewTests(TestCase):
self.assertContains(response, "Database connection")
self.assertContains(response, "POBSYNC_HOME")
def test_logs_view_renders_filtered_journal_messages(self) -> None:
self.client.force_login(self.staff_user)
completed = subprocess.CompletedProcess(
args=["journalctl"],
returncode=0,
stdout="2026-05-19 pobsync-worker.service failed backup\n2026-05-19 pobsync-web.service started\n",
stderr="",
)
with patch("pobsync_backend.views.shutil.which", return_value="/usr/bin/journalctl"), patch(
"pobsync_backend.views.subprocess.run", return_value=completed
) as run:
response = self.client.get(reverse("logs"), {"unit": "pobsync-worker.service", "priority": "0..3", "q": "failed"})
self.assertEqual(response.status_code, 200)
self.assertContains(response, "Logs")
self.assertContains(response, "failed backup")
self.assertNotContains(response, "started")
self.assertIn("-u", run.call_args.args[0])
self.assertIn("pobsync-worker.service", run.call_args.args[0])
def test_ssh_credentials_view_creates_key(self) -> None:
self.client.force_login(self.staff_user)
@@ -352,6 +374,57 @@ class ViewTests(TestCase):
self.assertEqual(host.rsync_extra_args, ["--numeric-ids"])
self.assertEqual(host.retention_weekly, 4)
def test_create_host_config_uses_global_defaults_and_prepares_directories(self) -> None:
self.client.force_login(self.staff_user)
credential = SshCredential.objects.create(name="global-key", private_key="PRIVATE KEY")
with TemporaryDirectory() as tmp:
backup_root = Path(tmp) / "backups"
GlobalConfig.objects.create(
name="default",
backup_root=str(backup_root),
default_ssh_credential=credential,
ssh_user="backup",
ssh_port=2222,
default_source_root="/srv",
retention_daily=3,
retention_weekly=2,
retention_monthly=1,
retention_yearly=0,
)
get_response = self.client.get(reverse("create_host_config"))
self.assertContains(get_response, 'value="backup"')
self.assertContains(get_response, 'value="2222"')
self.assertContains(get_response, 'value="/srv"')
response = self.client.post(
reverse("create_host_config"),
{
"host": "web-01",
"address": "web-01.example.test",
"enabled": "on",
"ssh_credential": str(credential.id),
"ssh_user": "backup",
"ssh_port": "2222",
"source_root": "/srv",
"includes": "",
"excludes_add": "",
"excludes_replace": "",
"rsync_extra_args": "",
"retention_daily": "3",
"retention_weekly": "2",
"retention_monthly": "1",
"retention_yearly": "0",
},
follow=True,
)
self.assertRedirects(response, reverse("host_detail", args=["web-01"]))
self.assertContains(response, "prepared")
self.assertTrue((backup_root / "web-01" / "scheduled").is_dir())
self.assertTrue((backup_root / "web-01" / "manual").is_dir())
self.assertTrue((backup_root / "web-01" / ".incomplete").is_dir())
def test_host_detail_renders_config_schedule_runs_and_snapshots(self) -> None:
self.client.force_login(self.staff_user)
GlobalConfig.objects.create(name="default", backup_root="/backups")
@@ -378,6 +451,7 @@ class ViewTests(TestCase):
self.assertContains(response, "Backup Control")
self.assertContains(response, "Queue dry-run")
self.assertContains(response, "Queue backup")
self.assertContains(response, "Host Check")
self.assertContains(response, "ready")
self.assertContains(response, "Snapshot Discovery")
self.assertContains(response, reverse("queue_manual_backup", args=[host.host]))

View File

@@ -1,6 +1,8 @@
from __future__ import annotations
import json
import shutil
import subprocess
from pathlib import Path
from django.contrib import messages
@@ -22,6 +24,7 @@ from .forms import (
ScheduleConfigForm,
SshCredentialForm,
)
from .host_ops import collect_host_checks, ensure_host_directories
from .models import BackupRun, GlobalConfig, HostConfig, ScheduleConfig, SnapshotRecord, SshCredential
from .retention import run_sql_retention_apply, run_sql_retention_plan
from .self_check import collect_self_checks, summarize_self_checks
@@ -72,6 +75,12 @@ def self_check(request):
)
@staff_member_required
def logs(request):
context = _log_context(request)
return render(request, "pobsync_backend/logs.html", context)
@staff_member_required
def ssh_credentials(request):
context = {
@@ -152,6 +161,13 @@ def create_host_config(request):
form = CreateHostConfigForm(request.POST)
if form.is_valid():
host_config = form.save()
try:
host_root = ensure_host_directories(host_config)
except Exception as exc:
messages.warning(request, f"Host config created, but backup directories could not be prepared: {exc}")
else:
messages.success(request, f"Host config created for {host_config.host}; prepared {host_root}.")
return redirect("host_detail", host=host_config.host)
messages.success(request, f"Host config created for {host_config.host}.")
return redirect("host_detail", host=host_config.host)
else:
@@ -176,10 +192,13 @@ def host_detail(request, host: str):
status__in=[BackupRun.Status.QUEUED, BackupRun.Status.RUNNING]
).order_by("created_at", "id").first()
has_global_config = GlobalConfig.objects.filter(name="default").exists()
host_checks = collect_host_checks(host_config)
context = {
"host": host_config,
"schedule": _schedule_for_host(host_config),
"discovery": inspect_snapshot_discovery(host=host_config),
"host_checks": host_checks,
"host_check_summary": summarize_self_checks(host_checks),
"manual_backup_form": ManualBackupForm(initial=_default_manual_backup_initial(host_config)),
"can_queue_backup": host_config.enabled and has_global_config,
"has_global_config": has_global_config,
@@ -414,6 +433,19 @@ def _default_global_initial() -> dict[str, object]:
def _default_host_initial() -> dict[str, object]:
global_config = GlobalConfig.objects.filter(name="default").first()
if global_config is not None:
return {
"enabled": True,
"ssh_credential": global_config.default_ssh_credential,
"ssh_user": global_config.ssh_user,
"ssh_port": global_config.ssh_port,
"source_root": global_config.default_source_root,
"retention_daily": global_config.retention_daily,
"retention_weekly": global_config.retention_weekly,
"retention_monthly": global_config.retention_monthly,
"retention_yearly": global_config.retention_yearly,
}
return {
"enabled": True,
"retention_daily": 14,
@@ -435,3 +467,50 @@ def _default_manual_backup_initial(host_config: HostConfig) -> dict[str, object]
def _pretty_json(value: object) -> str:
return json.dumps(value or {}, indent=2, sort_keys=True)
def _log_context(request) -> dict[str, object]:
units = ("pobsync-web.service", "pobsync-worker.service", "pobsync-scheduler.service")
priorities = {
"": "All",
"0..3": "Errors",
"4": "Warnings",
"5": "Notices",
"6": "Info",
"7": "Debug",
}
selected_unit = request.GET.get("unit", "")
priority = request.GET.get("priority", "0..4")
query = request.GET.get("q", "").strip()
lines = []
error = ""
if shutil.which("journalctl") is None:
error = "journalctl is not available in this runtime."
else:
command = ["journalctl", "--no-pager", "-n", "300", "-o", "short-iso"]
if selected_unit in units:
command.extend(["-u", selected_unit])
else:
for unit in units:
command.extend(["-u", unit])
if priority:
command.extend(["-p", priority])
result = subprocess.run(command, check=False, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, timeout=10)
if result.returncode != 0:
error = result.stderr.strip() or "Could not read journal logs."
else:
lines = result.stdout.splitlines()
if query:
lowered_query = query.lower()
lines = [line for line in lines if lowered_query in line.lower()]
return {
"units": units,
"priorities": priorities,
"selected_unit": selected_unit,
"selected_priority": priority,
"query": query,
"lines": lines,
"error": error,
}