From 89397b0cda154a3be13c50f4236a5590a1e8dd33 Mon Sep 17 00:00:00 2001 From: Peter van Arkel Date: Mon, 8 Jun 2026 23:31:32 +0200 Subject: [PATCH] (feature) Add manual storage metrics refresh Add a storage metrics refresh service and management command that update cached snapshot storage metadata outside web requests. The command supports host/kind filters, bounded scans, and dry-run output for operational refreshes. Refs #98 --- docs/development.md | 5 + .../refresh_pobsync_storage_metrics.py | 50 +++++++ src/pobsync_backend/storage_metrics.py | 133 ++++++++++++++++++ .../tests/test_storage_metrics.py | 127 +++++++++++++++++ 4 files changed, 315 insertions(+) create mode 100644 src/pobsync_backend/management/commands/refresh_pobsync_storage_metrics.py create mode 100644 src/pobsync_backend/storage_metrics.py create mode 100644 src/pobsync_backend/tests/test_storage_metrics.py diff --git a/docs/development.md b/docs/development.md index 0232d26..df8c9a5 100644 --- a/docs/development.md +++ b/docs/development.md @@ -73,8 +73,13 @@ One-off maintenance commands are still available when the UI is not the right to pobsync backup --dry-run pobsync discover-snapshots --host pobsync retention +pobsync django refresh_pobsync_storage_metrics --host ``` +`refresh_pobsync_storage_metrics` refreshes cached snapshot storage metadata outside web requests. Use `--kind` to limit +the scan to `scheduled`, `manual`, or `incomplete`, `--max-entries` to bound large scans, and `--dry-run` to inspect +candidate counts without writing metadata. + For scripted configuration changes, call the Django management command explicitly so it is clear that this is an automation/debugging path rather than the normal UI workflow: diff --git a/src/pobsync_backend/management/commands/refresh_pobsync_storage_metrics.py b/src/pobsync_backend/management/commands/refresh_pobsync_storage_metrics.py new file mode 100644 index 0000000..3e859b5 --- /dev/null +++ b/src/pobsync_backend/management/commands/refresh_pobsync_storage_metrics.py @@ -0,0 +1,50 @@ +from __future__ import annotations + +import json +from typing import Any + +from django.core.management.base import BaseCommand, CommandError + +from pobsync.snapshot_meta import normalize_kind +from pobsync_backend.models import HostConfig +from pobsync_backend.storage_metrics import DEFAULT_STORAGE_SCAN_MAX_ENTRIES, refresh_snapshot_storage_metrics + + +class Command(BaseCommand): + help = "Refresh cached snapshot storage metrics outside web requests." + + def add_arguments(self, parser) -> None: + parser.add_argument("--host", default=None) + parser.add_argument("--kind", default="all", help="scheduled|manual|incomplete|all") + parser.add_argument("--max-entries", type=int, default=DEFAULT_STORAGE_SCAN_MAX_ENTRIES) + parser.add_argument("--dry-run", action="store_true", help="Measure candidates without writing metadata") + + def handle(self, *args: Any, **options: Any) -> None: + host = None + if options["host"]: + try: + host = HostConfig.objects.get(host=options["host"]) + except HostConfig.DoesNotExist as exc: + raise CommandError(f"Missing host {options['host']!r}") from exc + + kind = normalize_kind(options["kind"]) + result = refresh_snapshot_storage_metrics( + host=host, + kind=None if kind == "all" else kind, + max_entries=int(options["max_entries"]), + dry_run=bool(options["dry_run"]), + ) + self.stdout.write( + json.dumps( + { + "scanned": result.scanned, + "updated": result.updated, + "skipped": result.skipped, + "missing": result.missing, + "errors": result.errors, + "dry_run": bool(options["dry_run"]), + }, + indent=2, + sort_keys=True, + ) + ) diff --git a/src/pobsync_backend/storage_metrics.py b/src/pobsync_backend/storage_metrics.py new file mode 100644 index 0000000..09f4d4b --- /dev/null +++ b/src/pobsync_backend/storage_metrics.py @@ -0,0 +1,133 @@ +from __future__ import annotations + +from dataclasses import dataclass +from pathlib import Path +from typing import Any, Iterable + +from django.utils import timezone + +from pobsync.run_stats import tree_usage + +from .models import HostConfig, SnapshotRecord + + +DEFAULT_STORAGE_SCAN_MAX_ENTRIES = 200_000 + + +@dataclass(frozen=True) +class StorageMetricsRefreshResult: + scanned: int + updated: int + skipped: int + missing: int + errors: int + + +def refresh_snapshot_storage_metrics( + *, + host: HostConfig | None = None, + kind: str | None = None, + max_entries: int = DEFAULT_STORAGE_SCAN_MAX_ENTRIES, + dry_run: bool = False, +) -> StorageMetricsRefreshResult: + result = { + "scanned": 0, + "updated": 0, + "skipped": 0, + "missing": 0, + "errors": 0, + } + for snapshot in _snapshot_queryset(host=host, kind=kind): + refresh = refresh_snapshot_storage_metric(snapshot, max_entries=max_entries, dry_run=dry_run) + result["scanned"] += 1 + result[refresh["status"]] += 1 + return StorageMetricsRefreshResult(**result) + + +def refresh_snapshot_storage_metric( + snapshot: SnapshotRecord, + *, + max_entries: int = DEFAULT_STORAGE_SCAN_MAX_ENTRIES, + dry_run: bool = False, +) -> dict[str, Any]: + data_path = _snapshot_data_path(snapshot) + if data_path is None or not data_path.exists(): + _record_storage_measurement_error(snapshot, reason="missing_path", dry_run=dry_run) + return {"status": "missing", "snapshot": snapshot, "path": str(data_path) if data_path else ""} + + try: + usage = tree_usage(data_path, max_entries=max_entries) + except OSError as exc: + _record_storage_measurement_error(snapshot, reason=type(exc).__name__, message=str(exc), dry_run=dry_run) + return {"status": "errors", "snapshot": snapshot, "path": str(data_path), "error": str(exc)} + + if not usage: + _record_storage_measurement_error(snapshot, reason="unreadable", dry_run=dry_run) + return {"status": "errors", "snapshot": snapshot, "path": str(data_path)} + + metadata = _metadata_with_storage(snapshot.metadata, usage=usage, source="manual_refresh") + if dry_run: + return {"status": "skipped", "snapshot": snapshot, "path": str(data_path), "usage": usage} + + snapshot.metadata = metadata + snapshot.save(update_fields=["metadata"]) + return {"status": "updated", "snapshot": snapshot, "path": str(data_path), "usage": usage} + + +def _snapshot_queryset(*, host: HostConfig | None, kind: str | None) -> Iterable[SnapshotRecord]: + snapshots = SnapshotRecord.objects.select_related("host").order_by("host__host", "kind", "dirname") + if host is not None: + snapshots = snapshots.filter(host=host) + if kind: + snapshots = snapshots.filter(kind=kind) + return snapshots + + +def _snapshot_data_path(snapshot: SnapshotRecord) -> Path | None: + if not snapshot.path: + return None + snapshot_path = Path(snapshot.path) + data_path = snapshot_path / "data" + if snapshot_path.name == "data": + return snapshot_path + if data_path.exists(): + return data_path + return snapshot_path + + +def _metadata_with_storage(metadata: object, *, usage: dict[str, Any], source: str) -> dict[str, Any]: + metadata_dict = dict(metadata) if isinstance(metadata, dict) else {} + stats = dict(metadata_dict.get("stats")) if isinstance(metadata_dict.get("stats"), dict) else {} + storage = dict(stats.get("storage")) if isinstance(stats.get("storage"), dict) else {} + storage["snapshot"] = { + **usage, + "measured_at": timezone.now().isoformat(), + "measurement_source": source, + } + stats["storage"] = storage + metadata_dict["stats"] = stats + return metadata_dict + + +def _record_storage_measurement_error( + snapshot: SnapshotRecord, + *, + reason: str, + message: str = "", + dry_run: bool, +) -> None: + if dry_run: + return + metadata = dict(snapshot.metadata) if isinstance(snapshot.metadata, dict) else {} + stats = dict(metadata.get("stats")) if isinstance(metadata.get("stats"), dict) else {} + storage = dict(stats.get("storage")) if isinstance(stats.get("storage"), dict) else {} + storage["snapshot_measurement_error"] = { + "reason": reason, + "message": message, + "measured_at": timezone.now().isoformat(), + "measurement_source": "manual_refresh", + } + stats["storage"] = storage + metadata["stats"] = stats + snapshot.metadata = metadata + snapshot.save(update_fields=["metadata"]) diff --git a/src/pobsync_backend/tests/test_storage_metrics.py b/src/pobsync_backend/tests/test_storage_metrics.py new file mode 100644 index 0000000..c3f8e5f --- /dev/null +++ b/src/pobsync_backend/tests/test_storage_metrics.py @@ -0,0 +1,127 @@ +from __future__ import annotations + +import json +from io import StringIO +from pathlib import Path +from tempfile import TemporaryDirectory + +from django.core.management import call_command +from django.test import TestCase + +from pobsync_backend.models import HostConfig, SnapshotRecord +from pobsync_backend.storage_metrics import refresh_snapshot_storage_metric, refresh_snapshot_storage_metrics + + +class StorageMetricsTests(TestCase): + def test_refresh_snapshot_storage_metric_updates_snapshot_metadata(self) -> None: + host = HostConfig.objects.create(host="web-01", address="web-01.example.test") + with TemporaryDirectory() as tmp: + snapshot = self._snapshot_with_file(host, Path(tmp), "scheduled", "payload.txt", b"payload") + + result = refresh_snapshot_storage_metric(snapshot, max_entries=100) + + snapshot.refresh_from_db() + storage = snapshot.metadata["stats"]["storage"]["snapshot"] + self.assertEqual(result["status"], "updated") + self.assertEqual(storage["files"], 1) + self.assertEqual(storage["apparent_size_bytes"], 7) + self.assertEqual(storage["max_entries"], 100) + self.assertFalse(storage["scan_limited"]) + self.assertEqual(storage["measurement_source"], "manual_refresh") + self.assertIn("measured_at", storage) + + def test_refresh_snapshot_storage_metric_dry_run_does_not_write_metadata(self) -> None: + host = HostConfig.objects.create(host="web-01", address="web-01.example.test") + with TemporaryDirectory() as tmp: + snapshot = self._snapshot_with_file(host, Path(tmp), "scheduled", "payload.txt", b"payload") + + result = refresh_snapshot_storage_metric(snapshot, max_entries=100, dry_run=True) + + snapshot.refresh_from_db() + self.assertEqual(result["status"], "skipped") + self.assertEqual(snapshot.metadata, {}) + + def test_refresh_snapshot_storage_metrics_filters_by_host_and_kind(self) -> None: + web = HostConfig.objects.create(host="web-01", address="web-01.example.test") + db = HostConfig.objects.create(host="db-01", address="db-01.example.test") + with TemporaryDirectory() as tmp: + root = Path(tmp) + target = self._snapshot_with_file(web, root, "scheduled", "target.txt", b"target") + other_kind = self._snapshot_with_file(web, root, "manual", "manual.txt", b"manual") + other_host = self._snapshot_with_file(db, root, "scheduled", "db.txt", b"db") + + result = refresh_snapshot_storage_metrics(host=web, kind=SnapshotRecord.Kind.SCHEDULED, max_entries=100) + + self.assertEqual(result.scanned, 1) + self.assertEqual(result.updated, 1) + target.refresh_from_db() + other_kind.refresh_from_db() + other_host.refresh_from_db() + self.assertIn("stats", target.metadata) + self.assertEqual(other_kind.metadata, {}) + self.assertEqual(other_host.metadata, {}) + + def test_refresh_snapshot_storage_metrics_records_missing_paths(self) -> None: + host = HostConfig.objects.create(host="web-01", address="web-01.example.test") + snapshot = SnapshotRecord.objects.create( + host=host, + kind=SnapshotRecord.Kind.SCHEDULED, + dirname="20260608-100000Z__MISSING", + path="/missing/pobsync/snapshot", + status="success", + metadata={}, + ) + + result = refresh_snapshot_storage_metrics(host=host, max_entries=100) + + snapshot.refresh_from_db() + self.assertEqual(result.scanned, 1) + self.assertEqual(result.missing, 1) + error = snapshot.metadata["stats"]["storage"]["snapshot_measurement_error"] + self.assertEqual(error["reason"], "missing_path") + self.assertEqual(error["measurement_source"], "manual_refresh") + + def test_refresh_command_outputs_counts(self) -> None: + host = HostConfig.objects.create(host="web-01", address="web-01.example.test") + with TemporaryDirectory() as tmp: + self._snapshot_with_file(host, Path(tmp), "scheduled", "payload.txt", b"payload") + output = StringIO() + + call_command( + "refresh_pobsync_storage_metrics", + "--host", + host.host, + "--kind", + "scheduled", + "--max-entries", + "100", + stdout=output, + ) + + result = json.loads(output.getvalue()) + self.assertEqual(result["scanned"], 1) + self.assertEqual(result["updated"], 1) + self.assertFalse(result["dry_run"]) + + def _snapshot_with_file( + self, + host: HostConfig, + root: Path, + kind: str, + filename: str, + content: bytes, + ) -> SnapshotRecord: + dirname = f"20260608-100000Z__{host.host.replace('-', '').upper()}{kind[:3].upper()}" + parent = ".incomplete" if kind == SnapshotRecord.Kind.INCOMPLETE else kind + snapshot_dir = root / host.host / parent / dirname + data_dir = snapshot_dir / "data" + data_dir.mkdir(parents=True) + (data_dir / filename).write_bytes(content) + return SnapshotRecord.objects.create( + host=host, + kind=kind, + dirname=dirname, + path=str(snapshot_dir), + status="success", + metadata={}, + )