(feature) Add manual storage metrics refresh #102

Merged
parkel merged 1 commits from issue-98-storage-metrics-refresh into master 2026-06-08 23:39:23 +02:00
4 changed files with 315 additions and 0 deletions

View File

@@ -73,8 +73,13 @@ One-off maintenance commands are still available when the UI is not the right to
pobsync backup <host> --dry-run
pobsync discover-snapshots --host <host>
pobsync retention <host>
pobsync django refresh_pobsync_storage_metrics --host <host>
```
`refresh_pobsync_storage_metrics` refreshes cached snapshot storage metadata outside web requests. Use `--kind` to limit
the scan to `scheduled`, `manual`, or `incomplete`, `--max-entries` to bound large scans, and `--dry-run` to inspect
candidate counts without writing metadata.
For scripted configuration changes, call the Django management command explicitly so it is clear that this is an
automation/debugging path rather than the normal UI workflow:

View File

@@ -0,0 +1,50 @@
from __future__ import annotations
import json
from typing import Any
from django.core.management.base import BaseCommand, CommandError
from pobsync.snapshot_meta import normalize_kind
from pobsync_backend.models import HostConfig
from pobsync_backend.storage_metrics import DEFAULT_STORAGE_SCAN_MAX_ENTRIES, refresh_snapshot_storage_metrics
class Command(BaseCommand):
help = "Refresh cached snapshot storage metrics outside web requests."
def add_arguments(self, parser) -> None:
parser.add_argument("--host", default=None)
parser.add_argument("--kind", default="all", help="scheduled|manual|incomplete|all")
parser.add_argument("--max-entries", type=int, default=DEFAULT_STORAGE_SCAN_MAX_ENTRIES)
parser.add_argument("--dry-run", action="store_true", help="Measure candidates without writing metadata")
def handle(self, *args: Any, **options: Any) -> None:
host = None
if options["host"]:
try:
host = HostConfig.objects.get(host=options["host"])
except HostConfig.DoesNotExist as exc:
raise CommandError(f"Missing host {options['host']!r}") from exc
kind = normalize_kind(options["kind"])
result = refresh_snapshot_storage_metrics(
host=host,
kind=None if kind == "all" else kind,
max_entries=int(options["max_entries"]),
dry_run=bool(options["dry_run"]),
)
self.stdout.write(
json.dumps(
{
"scanned": result.scanned,
"updated": result.updated,
"skipped": result.skipped,
"missing": result.missing,
"errors": result.errors,
"dry_run": bool(options["dry_run"]),
},
indent=2,
sort_keys=True,
)
)

View File

@@ -0,0 +1,133 @@
from __future__ import annotations
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Iterable
from django.utils import timezone
from pobsync.run_stats import tree_usage
from .models import HostConfig, SnapshotRecord
DEFAULT_STORAGE_SCAN_MAX_ENTRIES = 200_000
@dataclass(frozen=True)
class StorageMetricsRefreshResult:
scanned: int
updated: int
skipped: int
missing: int
errors: int
def refresh_snapshot_storage_metrics(
*,
host: HostConfig | None = None,
kind: str | None = None,
max_entries: int = DEFAULT_STORAGE_SCAN_MAX_ENTRIES,
dry_run: bool = False,
) -> StorageMetricsRefreshResult:
result = {
"scanned": 0,
"updated": 0,
"skipped": 0,
"missing": 0,
"errors": 0,
}
for snapshot in _snapshot_queryset(host=host, kind=kind):
refresh = refresh_snapshot_storage_metric(snapshot, max_entries=max_entries, dry_run=dry_run)
result["scanned"] += 1
result[refresh["status"]] += 1
return StorageMetricsRefreshResult(**result)
def refresh_snapshot_storage_metric(
snapshot: SnapshotRecord,
*,
max_entries: int = DEFAULT_STORAGE_SCAN_MAX_ENTRIES,
dry_run: bool = False,
) -> dict[str, Any]:
data_path = _snapshot_data_path(snapshot)
if data_path is None or not data_path.exists():
_record_storage_measurement_error(snapshot, reason="missing_path", dry_run=dry_run)
return {"status": "missing", "snapshot": snapshot, "path": str(data_path) if data_path else ""}
try:
usage = tree_usage(data_path, max_entries=max_entries)
except OSError as exc:
_record_storage_measurement_error(snapshot, reason=type(exc).__name__, message=str(exc), dry_run=dry_run)
return {"status": "errors", "snapshot": snapshot, "path": str(data_path), "error": str(exc)}
if not usage:
_record_storage_measurement_error(snapshot, reason="unreadable", dry_run=dry_run)
return {"status": "errors", "snapshot": snapshot, "path": str(data_path)}
metadata = _metadata_with_storage(snapshot.metadata, usage=usage, source="manual_refresh")
if dry_run:
return {"status": "skipped", "snapshot": snapshot, "path": str(data_path), "usage": usage}
snapshot.metadata = metadata
snapshot.save(update_fields=["metadata"])
return {"status": "updated", "snapshot": snapshot, "path": str(data_path), "usage": usage}
def _snapshot_queryset(*, host: HostConfig | None, kind: str | None) -> Iterable[SnapshotRecord]:
snapshots = SnapshotRecord.objects.select_related("host").order_by("host__host", "kind", "dirname")
if host is not None:
snapshots = snapshots.filter(host=host)
if kind:
snapshots = snapshots.filter(kind=kind)
return snapshots
def _snapshot_data_path(snapshot: SnapshotRecord) -> Path | None:
if not snapshot.path:
return None
snapshot_path = Path(snapshot.path)
data_path = snapshot_path / "data"
if snapshot_path.name == "data":
return snapshot_path
if data_path.exists():
return data_path
return snapshot_path
def _metadata_with_storage(metadata: object, *, usage: dict[str, Any], source: str) -> dict[str, Any]:
metadata_dict = dict(metadata) if isinstance(metadata, dict) else {}
stats = dict(metadata_dict.get("stats")) if isinstance(metadata_dict.get("stats"), dict) else {}
storage = dict(stats.get("storage")) if isinstance(stats.get("storage"), dict) else {}
storage["snapshot"] = {
**usage,
"measured_at": timezone.now().isoformat(),
"measurement_source": source,
}
stats["storage"] = storage
metadata_dict["stats"] = stats
return metadata_dict
def _record_storage_measurement_error(
snapshot: SnapshotRecord,
*,
reason: str,
message: str = "",
dry_run: bool,
) -> None:
if dry_run:
return
metadata = dict(snapshot.metadata) if isinstance(snapshot.metadata, dict) else {}
stats = dict(metadata.get("stats")) if isinstance(metadata.get("stats"), dict) else {}
storage = dict(stats.get("storage")) if isinstance(stats.get("storage"), dict) else {}
storage["snapshot_measurement_error"] = {
"reason": reason,
"message": message,
"measured_at": timezone.now().isoformat(),
"measurement_source": "manual_refresh",
}
stats["storage"] = storage
metadata["stats"] = stats
snapshot.metadata = metadata
snapshot.save(update_fields=["metadata"])

View File

@@ -0,0 +1,127 @@
from __future__ import annotations
import json
from io import StringIO
from pathlib import Path
from tempfile import TemporaryDirectory
from django.core.management import call_command
from django.test import TestCase
from pobsync_backend.models import HostConfig, SnapshotRecord
from pobsync_backend.storage_metrics import refresh_snapshot_storage_metric, refresh_snapshot_storage_metrics
class StorageMetricsTests(TestCase):
def test_refresh_snapshot_storage_metric_updates_snapshot_metadata(self) -> None:
host = HostConfig.objects.create(host="web-01", address="web-01.example.test")
with TemporaryDirectory() as tmp:
snapshot = self._snapshot_with_file(host, Path(tmp), "scheduled", "payload.txt", b"payload")
result = refresh_snapshot_storage_metric(snapshot, max_entries=100)
snapshot.refresh_from_db()
storage = snapshot.metadata["stats"]["storage"]["snapshot"]
self.assertEqual(result["status"], "updated")
self.assertEqual(storage["files"], 1)
self.assertEqual(storage["apparent_size_bytes"], 7)
self.assertEqual(storage["max_entries"], 100)
self.assertFalse(storage["scan_limited"])
self.assertEqual(storage["measurement_source"], "manual_refresh")
self.assertIn("measured_at", storage)
def test_refresh_snapshot_storage_metric_dry_run_does_not_write_metadata(self) -> None:
host = HostConfig.objects.create(host="web-01", address="web-01.example.test")
with TemporaryDirectory() as tmp:
snapshot = self._snapshot_with_file(host, Path(tmp), "scheduled", "payload.txt", b"payload")
result = refresh_snapshot_storage_metric(snapshot, max_entries=100, dry_run=True)
snapshot.refresh_from_db()
self.assertEqual(result["status"], "skipped")
self.assertEqual(snapshot.metadata, {})
def test_refresh_snapshot_storage_metrics_filters_by_host_and_kind(self) -> None:
web = HostConfig.objects.create(host="web-01", address="web-01.example.test")
db = HostConfig.objects.create(host="db-01", address="db-01.example.test")
with TemporaryDirectory() as tmp:
root = Path(tmp)
target = self._snapshot_with_file(web, root, "scheduled", "target.txt", b"target")
other_kind = self._snapshot_with_file(web, root, "manual", "manual.txt", b"manual")
other_host = self._snapshot_with_file(db, root, "scheduled", "db.txt", b"db")
result = refresh_snapshot_storage_metrics(host=web, kind=SnapshotRecord.Kind.SCHEDULED, max_entries=100)
self.assertEqual(result.scanned, 1)
self.assertEqual(result.updated, 1)
target.refresh_from_db()
other_kind.refresh_from_db()
other_host.refresh_from_db()
self.assertIn("stats", target.metadata)
self.assertEqual(other_kind.metadata, {})
self.assertEqual(other_host.metadata, {})
def test_refresh_snapshot_storage_metrics_records_missing_paths(self) -> None:
host = HostConfig.objects.create(host="web-01", address="web-01.example.test")
snapshot = SnapshotRecord.objects.create(
host=host,
kind=SnapshotRecord.Kind.SCHEDULED,
dirname="20260608-100000Z__MISSING",
path="/missing/pobsync/snapshot",
status="success",
metadata={},
)
result = refresh_snapshot_storage_metrics(host=host, max_entries=100)
snapshot.refresh_from_db()
self.assertEqual(result.scanned, 1)
self.assertEqual(result.missing, 1)
error = snapshot.metadata["stats"]["storage"]["snapshot_measurement_error"]
self.assertEqual(error["reason"], "missing_path")
self.assertEqual(error["measurement_source"], "manual_refresh")
def test_refresh_command_outputs_counts(self) -> None:
host = HostConfig.objects.create(host="web-01", address="web-01.example.test")
with TemporaryDirectory() as tmp:
self._snapshot_with_file(host, Path(tmp), "scheduled", "payload.txt", b"payload")
output = StringIO()
call_command(
"refresh_pobsync_storage_metrics",
"--host",
host.host,
"--kind",
"scheduled",
"--max-entries",
"100",
stdout=output,
)
result = json.loads(output.getvalue())
self.assertEqual(result["scanned"], 1)
self.assertEqual(result["updated"], 1)
self.assertFalse(result["dry_run"])
def _snapshot_with_file(
self,
host: HostConfig,
root: Path,
kind: str,
filename: str,
content: bytes,
) -> SnapshotRecord:
dirname = f"20260608-100000Z__{host.host.replace('-', '').upper()}{kind[:3].upper()}"
parent = ".incomplete" if kind == SnapshotRecord.Kind.INCOMPLETE else kind
snapshot_dir = root / host.host / parent / dirname
data_dir = snapshot_dir / "data"
data_dir.mkdir(parents=True)
(data_dir / filename).write_bytes(content)
return SnapshotRecord.objects.create(
host=host,
kind=kind,
dirname=dirname,
path=str(snapshot_dir),
status="success",
metadata={},
)