(feature) Add host doctor checks and Django log viewer
Add host-level checks for address, enabled state, SSH credential selection, and backup directory readiness, and show them on the host detail page. Create host backup directories during host creation and prefill new hosts from the default global config. Add a staff-only logs view backed by journalctl with filtering by pobsync unit, priority, and message text. Improve runtime checks for gunicorn in virtualenv installs and ensure the native installer grants the service user access to the backup root.
This commit is contained in:
69
src/pobsync_backend/host_ops.py
Normal file
69
src/pobsync_backend/host_ops.py
Normal file
@@ -0,0 +1,69 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from pobsync.snapshot_meta import resolve_host_root
|
||||
|
||||
from .models import GlobalConfig, HostConfig
|
||||
from .self_check import SelfCheck
|
||||
|
||||
|
||||
HOST_BACKUP_SUBDIRS = ("scheduled", "manual", ".incomplete")
|
||||
|
||||
|
||||
def ensure_host_directories(host: HostConfig, global_config: GlobalConfig | None = None) -> Path:
|
||||
global_config = global_config or GlobalConfig.objects.get(name="default")
|
||||
host_root = resolve_host_root(global_config.backup_root, host.host)
|
||||
for subdir in HOST_BACKUP_SUBDIRS:
|
||||
(host_root / subdir).mkdir(parents=True, exist_ok=True)
|
||||
return host_root
|
||||
|
||||
|
||||
def collect_host_checks(host: HostConfig, global_config: GlobalConfig | None = None) -> list[SelfCheck]:
|
||||
checks: list[SelfCheck] = []
|
||||
try:
|
||||
global_config = global_config or GlobalConfig.objects.get(name="default")
|
||||
except GlobalConfig.DoesNotExist:
|
||||
return [SelfCheck("Host global config", "failed", "Default global config does not exist.")]
|
||||
|
||||
checks.append(
|
||||
SelfCheck(
|
||||
"Host enabled",
|
||||
"ok" if host.enabled else "warning",
|
||||
"Host is enabled." if host.enabled else "Host is disabled.",
|
||||
)
|
||||
)
|
||||
checks.append(
|
||||
SelfCheck(
|
||||
"Host address",
|
||||
"ok" if host.address.strip() else "failed",
|
||||
host.address.strip() or "Host address is empty.",
|
||||
)
|
||||
)
|
||||
|
||||
credential = host.ssh_credential or global_config.default_ssh_credential
|
||||
checks.append(
|
||||
SelfCheck(
|
||||
"Host SSH credential",
|
||||
"ok" if credential else "warning",
|
||||
str(credential) if credential else "No host or global SSH credential selected.",
|
||||
)
|
||||
)
|
||||
|
||||
host_root = resolve_host_root(global_config.backup_root, host.host)
|
||||
checks.append(_host_path_check("Host backup root", host_root, must_exist=True, must_be_writable=True))
|
||||
for subdir in HOST_BACKUP_SUBDIRS:
|
||||
checks.append(_host_path_check(f"Host directory: {subdir}", host_root / subdir, must_exist=True, must_be_writable=True))
|
||||
return checks
|
||||
|
||||
|
||||
def _host_path_check(name: str, path: Path, *, must_exist: bool, must_be_writable: bool) -> SelfCheck:
|
||||
if must_exist and not path.exists():
|
||||
return SelfCheck(name, "failed", f"{path} does not exist.")
|
||||
target = path if path.exists() else path.parent
|
||||
if not target.exists():
|
||||
return SelfCheck(name, "failed", f"{target} does not exist.")
|
||||
if must_be_writable and not os.access(target, os.W_OK):
|
||||
return SelfCheck(name, "failed", f"{target} is not writable by this process.")
|
||||
return SelfCheck(name, "ok", str(path))
|
||||
@@ -3,6 +3,7 @@ from __future__ import annotations
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Literal
|
||||
@@ -125,7 +126,7 @@ def _path_check(
|
||||
|
||||
def _binary_checks() -> list[SelfCheck]:
|
||||
checks = []
|
||||
for binary in ("rsync", "ssh", "ssh-keygen", "gunicorn"):
|
||||
for binary in ("rsync", "ssh", "ssh-keygen"):
|
||||
path = shutil.which(binary)
|
||||
checks.append(
|
||||
SelfCheck(
|
||||
@@ -134,6 +135,14 @@ def _binary_checks() -> list[SelfCheck]:
|
||||
path or f"{binary} was not found in PATH.",
|
||||
)
|
||||
)
|
||||
gunicorn_path = shutil.which("gunicorn") or Path(sys.executable).parent / "gunicorn"
|
||||
checks.append(
|
||||
SelfCheck(
|
||||
"Binary: gunicorn",
|
||||
"ok" if Path(gunicorn_path).exists() else "failed",
|
||||
str(gunicorn_path) if Path(gunicorn_path).exists() else "gunicorn was not found in PATH or next to Python.",
|
||||
)
|
||||
)
|
||||
return checks
|
||||
|
||||
|
||||
|
||||
@@ -174,6 +174,7 @@
|
||||
<a href="{% url 'admin:index' %}">Admin</a>
|
||||
<a href="{% url 'ssh_credentials' %}">SSH Keys</a>
|
||||
<a href="{% url 'self_check' %}">Self Check</a>
|
||||
<a href="{% url 'logs' %}">Logs</a>
|
||||
<a href="/api/status/">Status API</a>
|
||||
<span class="spacer"></span>
|
||||
<span class="muted">{{ request.user.username }}</span>
|
||||
|
||||
@@ -70,6 +70,36 @@
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<section class="panel">
|
||||
<h2>Host Check</h2>
|
||||
<section class="grid" aria-label="Host check summary">
|
||||
<div class="metric"><div class="label">OK</div><div class="value">{{ host_check_summary.ok }}</div></div>
|
||||
<div class="metric"><div class="label">Warnings</div><div class="value">{{ host_check_summary.warning }}</div></div>
|
||||
<div class="metric"><div class="label">Failed</div><div class="value">{{ host_check_summary.failed }}</div></div>
|
||||
<div class="metric"><div class="label">Skipped</div><div class="value">{{ host_check_summary.skipped }}</div></div>
|
||||
</section>
|
||||
<table>
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Status</th>
|
||||
<th>Check</th>
|
||||
<th>Message</th>
|
||||
<th>Detail</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{% for check in host_checks %}
|
||||
<tr>
|
||||
<td><span class="status {{ check.status }}">{{ check.status }}</span></td>
|
||||
<td>{{ check.name }}</td>
|
||||
<td>{{ check.message }}</td>
|
||||
<td class="muted">{{ check.detail }}</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
</section>
|
||||
|
||||
<section class="panel">
|
||||
<h2>Backup Control</h2>
|
||||
<div class="operator-state">
|
||||
|
||||
52
src/pobsync_backend/templates/pobsync_backend/logs.html
Normal file
52
src/pobsync_backend/templates/pobsync_backend/logs.html
Normal file
@@ -0,0 +1,52 @@
|
||||
{% extends "pobsync_backend/base.html" %}
|
||||
|
||||
{% block title %}Logs | pobsync{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
<h1>Logs</h1>
|
||||
|
||||
<section class="actions" aria-label="Log actions">
|
||||
<a class="button-link secondary" href="{% url 'dashboard' %}">Back to dashboard</a>
|
||||
</section>
|
||||
|
||||
<section class="panel">
|
||||
<h2>Filter</h2>
|
||||
<form method="get" class="form-grid">
|
||||
<div class="field">
|
||||
<label for="unit">Unit</label>
|
||||
<select id="unit" name="unit">
|
||||
<option value="">All pobsync units</option>
|
||||
{% for unit in units %}
|
||||
<option value="{{ unit }}" {% if selected_unit == unit %}selected{% endif %}>{{ unit }}</option>
|
||||
{% endfor %}
|
||||
</select>
|
||||
</div>
|
||||
<div class="field">
|
||||
<label for="priority">Priority</label>
|
||||
<select id="priority" name="priority">
|
||||
{% for value, label in priorities.items %}
|
||||
<option value="{{ value }}" {% if selected_priority == value %}selected{% endif %}>{{ label }}</option>
|
||||
{% endfor %}
|
||||
</select>
|
||||
</div>
|
||||
<div class="field">
|
||||
<label for="q">Message contains</label>
|
||||
<input id="q" name="q" value="{{ query }}">
|
||||
</div>
|
||||
<div class="actions">
|
||||
<button type="submit">Filter logs</button>
|
||||
</div>
|
||||
</form>
|
||||
</section>
|
||||
|
||||
<section class="panel">
|
||||
<h2>Messages</h2>
|
||||
{% if error %}
|
||||
<p class="status failed">{{ error }}</p>
|
||||
{% else %}
|
||||
<pre>{% for line in lines %}{{ line }}
|
||||
{% empty %}No log messages matched the current filter.
|
||||
{% endfor %}</pre>
|
||||
{% endif %}
|
||||
</section>
|
||||
{% endblock %}
|
||||
@@ -1,5 +1,6 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import subprocess
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from tempfile import TemporaryDirectory
|
||||
@@ -95,6 +96,27 @@ class ViewTests(TestCase):
|
||||
self.assertContains(response, "Database connection")
|
||||
self.assertContains(response, "POBSYNC_HOME")
|
||||
|
||||
def test_logs_view_renders_filtered_journal_messages(self) -> None:
|
||||
self.client.force_login(self.staff_user)
|
||||
completed = subprocess.CompletedProcess(
|
||||
args=["journalctl"],
|
||||
returncode=0,
|
||||
stdout="2026-05-19 pobsync-worker.service failed backup\n2026-05-19 pobsync-web.service started\n",
|
||||
stderr="",
|
||||
)
|
||||
|
||||
with patch("pobsync_backend.views.shutil.which", return_value="/usr/bin/journalctl"), patch(
|
||||
"pobsync_backend.views.subprocess.run", return_value=completed
|
||||
) as run:
|
||||
response = self.client.get(reverse("logs"), {"unit": "pobsync-worker.service", "priority": "0..3", "q": "failed"})
|
||||
|
||||
self.assertEqual(response.status_code, 200)
|
||||
self.assertContains(response, "Logs")
|
||||
self.assertContains(response, "failed backup")
|
||||
self.assertNotContains(response, "started")
|
||||
self.assertIn("-u", run.call_args.args[0])
|
||||
self.assertIn("pobsync-worker.service", run.call_args.args[0])
|
||||
|
||||
def test_ssh_credentials_view_creates_key(self) -> None:
|
||||
self.client.force_login(self.staff_user)
|
||||
|
||||
@@ -352,6 +374,57 @@ class ViewTests(TestCase):
|
||||
self.assertEqual(host.rsync_extra_args, ["--numeric-ids"])
|
||||
self.assertEqual(host.retention_weekly, 4)
|
||||
|
||||
def test_create_host_config_uses_global_defaults_and_prepares_directories(self) -> None:
|
||||
self.client.force_login(self.staff_user)
|
||||
credential = SshCredential.objects.create(name="global-key", private_key="PRIVATE KEY")
|
||||
with TemporaryDirectory() as tmp:
|
||||
backup_root = Path(tmp) / "backups"
|
||||
GlobalConfig.objects.create(
|
||||
name="default",
|
||||
backup_root=str(backup_root),
|
||||
default_ssh_credential=credential,
|
||||
ssh_user="backup",
|
||||
ssh_port=2222,
|
||||
default_source_root="/srv",
|
||||
retention_daily=3,
|
||||
retention_weekly=2,
|
||||
retention_monthly=1,
|
||||
retention_yearly=0,
|
||||
)
|
||||
|
||||
get_response = self.client.get(reverse("create_host_config"))
|
||||
self.assertContains(get_response, 'value="backup"')
|
||||
self.assertContains(get_response, 'value="2222"')
|
||||
self.assertContains(get_response, 'value="/srv"')
|
||||
|
||||
response = self.client.post(
|
||||
reverse("create_host_config"),
|
||||
{
|
||||
"host": "web-01",
|
||||
"address": "web-01.example.test",
|
||||
"enabled": "on",
|
||||
"ssh_credential": str(credential.id),
|
||||
"ssh_user": "backup",
|
||||
"ssh_port": "2222",
|
||||
"source_root": "/srv",
|
||||
"includes": "",
|
||||
"excludes_add": "",
|
||||
"excludes_replace": "",
|
||||
"rsync_extra_args": "",
|
||||
"retention_daily": "3",
|
||||
"retention_weekly": "2",
|
||||
"retention_monthly": "1",
|
||||
"retention_yearly": "0",
|
||||
},
|
||||
follow=True,
|
||||
)
|
||||
|
||||
self.assertRedirects(response, reverse("host_detail", args=["web-01"]))
|
||||
self.assertContains(response, "prepared")
|
||||
self.assertTrue((backup_root / "web-01" / "scheduled").is_dir())
|
||||
self.assertTrue((backup_root / "web-01" / "manual").is_dir())
|
||||
self.assertTrue((backup_root / "web-01" / ".incomplete").is_dir())
|
||||
|
||||
def test_host_detail_renders_config_schedule_runs_and_snapshots(self) -> None:
|
||||
self.client.force_login(self.staff_user)
|
||||
GlobalConfig.objects.create(name="default", backup_root="/backups")
|
||||
@@ -378,6 +451,7 @@ class ViewTests(TestCase):
|
||||
self.assertContains(response, "Backup Control")
|
||||
self.assertContains(response, "Queue dry-run")
|
||||
self.assertContains(response, "Queue backup")
|
||||
self.assertContains(response, "Host Check")
|
||||
self.assertContains(response, "ready")
|
||||
self.assertContains(response, "Snapshot Discovery")
|
||||
self.assertContains(response, reverse("queue_manual_backup", args=[host.host]))
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import shutil
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
from django.contrib import messages
|
||||
@@ -22,6 +24,7 @@ from .forms import (
|
||||
ScheduleConfigForm,
|
||||
SshCredentialForm,
|
||||
)
|
||||
from .host_ops import collect_host_checks, ensure_host_directories
|
||||
from .models import BackupRun, GlobalConfig, HostConfig, ScheduleConfig, SnapshotRecord, SshCredential
|
||||
from .retention import run_sql_retention_apply, run_sql_retention_plan
|
||||
from .self_check import collect_self_checks, summarize_self_checks
|
||||
@@ -72,6 +75,12 @@ def self_check(request):
|
||||
)
|
||||
|
||||
|
||||
@staff_member_required
|
||||
def logs(request):
|
||||
context = _log_context(request)
|
||||
return render(request, "pobsync_backend/logs.html", context)
|
||||
|
||||
|
||||
@staff_member_required
|
||||
def ssh_credentials(request):
|
||||
context = {
|
||||
@@ -152,6 +161,13 @@ def create_host_config(request):
|
||||
form = CreateHostConfigForm(request.POST)
|
||||
if form.is_valid():
|
||||
host_config = form.save()
|
||||
try:
|
||||
host_root = ensure_host_directories(host_config)
|
||||
except Exception as exc:
|
||||
messages.warning(request, f"Host config created, but backup directories could not be prepared: {exc}")
|
||||
else:
|
||||
messages.success(request, f"Host config created for {host_config.host}; prepared {host_root}.")
|
||||
return redirect("host_detail", host=host_config.host)
|
||||
messages.success(request, f"Host config created for {host_config.host}.")
|
||||
return redirect("host_detail", host=host_config.host)
|
||||
else:
|
||||
@@ -176,10 +192,13 @@ def host_detail(request, host: str):
|
||||
status__in=[BackupRun.Status.QUEUED, BackupRun.Status.RUNNING]
|
||||
).order_by("created_at", "id").first()
|
||||
has_global_config = GlobalConfig.objects.filter(name="default").exists()
|
||||
host_checks = collect_host_checks(host_config)
|
||||
context = {
|
||||
"host": host_config,
|
||||
"schedule": _schedule_for_host(host_config),
|
||||
"discovery": inspect_snapshot_discovery(host=host_config),
|
||||
"host_checks": host_checks,
|
||||
"host_check_summary": summarize_self_checks(host_checks),
|
||||
"manual_backup_form": ManualBackupForm(initial=_default_manual_backup_initial(host_config)),
|
||||
"can_queue_backup": host_config.enabled and has_global_config,
|
||||
"has_global_config": has_global_config,
|
||||
@@ -414,6 +433,19 @@ def _default_global_initial() -> dict[str, object]:
|
||||
|
||||
|
||||
def _default_host_initial() -> dict[str, object]:
|
||||
global_config = GlobalConfig.objects.filter(name="default").first()
|
||||
if global_config is not None:
|
||||
return {
|
||||
"enabled": True,
|
||||
"ssh_credential": global_config.default_ssh_credential,
|
||||
"ssh_user": global_config.ssh_user,
|
||||
"ssh_port": global_config.ssh_port,
|
||||
"source_root": global_config.default_source_root,
|
||||
"retention_daily": global_config.retention_daily,
|
||||
"retention_weekly": global_config.retention_weekly,
|
||||
"retention_monthly": global_config.retention_monthly,
|
||||
"retention_yearly": global_config.retention_yearly,
|
||||
}
|
||||
return {
|
||||
"enabled": True,
|
||||
"retention_daily": 14,
|
||||
@@ -435,3 +467,50 @@ def _default_manual_backup_initial(host_config: HostConfig) -> dict[str, object]
|
||||
|
||||
def _pretty_json(value: object) -> str:
|
||||
return json.dumps(value or {}, indent=2, sort_keys=True)
|
||||
|
||||
|
||||
def _log_context(request) -> dict[str, object]:
|
||||
units = ("pobsync-web.service", "pobsync-worker.service", "pobsync-scheduler.service")
|
||||
priorities = {
|
||||
"": "All",
|
||||
"0..3": "Errors",
|
||||
"4": "Warnings",
|
||||
"5": "Notices",
|
||||
"6": "Info",
|
||||
"7": "Debug",
|
||||
}
|
||||
selected_unit = request.GET.get("unit", "")
|
||||
priority = request.GET.get("priority", "0..4")
|
||||
query = request.GET.get("q", "").strip()
|
||||
lines = []
|
||||
error = ""
|
||||
|
||||
if shutil.which("journalctl") is None:
|
||||
error = "journalctl is not available in this runtime."
|
||||
else:
|
||||
command = ["journalctl", "--no-pager", "-n", "300", "-o", "short-iso"]
|
||||
if selected_unit in units:
|
||||
command.extend(["-u", selected_unit])
|
||||
else:
|
||||
for unit in units:
|
||||
command.extend(["-u", unit])
|
||||
if priority:
|
||||
command.extend(["-p", priority])
|
||||
result = subprocess.run(command, check=False, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, timeout=10)
|
||||
if result.returncode != 0:
|
||||
error = result.stderr.strip() or "Could not read journal logs."
|
||||
else:
|
||||
lines = result.stdout.splitlines()
|
||||
if query:
|
||||
lowered_query = query.lower()
|
||||
lines = [line for line in lines if lowered_query in line.lower()]
|
||||
|
||||
return {
|
||||
"units": units,
|
||||
"priorities": priorities,
|
||||
"selected_unit": selected_unit,
|
||||
"selected_priority": priority,
|
||||
"query": query,
|
||||
"lines": lines,
|
||||
"error": error,
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user