diff --git a/README.md b/README.md index 527c817..70a68eb 100644 --- a/README.md +++ b/README.md @@ -154,6 +154,45 @@ The UI includes: - `/self-check/` for runtime checks - `/logs/` for filtered pobsync service logs +## Restoring Data + +pobsync 1.0 treats restores as an explicit manual operation. The control panel shows restore guidance on each snapshot +detail page, but it does not run restore commands for you yet. That is deliberate: restores should be inspected and +tested before data is copied back into a live system. + +Each snapshot directory contains: + +``` +/data/ # backed-up filesystem contents +/meta/ # metadata and rsync logs +``` + +Use the `data/` directory as the rsync source. Start with a dry run and restore to a staging path first: + +``` +rsync -aHAX --numeric-ids --info=progress2 --dry-run /backups/example.org/scheduled//data/ /restore/example.org/ +rsync -aHAX --numeric-ids --info=progress2 /backups/example.org/scheduled//data/ /restore/example.org/ +``` + +After validating the staged files, copy the specific files or directories back to the target machine. For a full-host +restore, use another dry run before writing to the remote root: + +``` +rsync -aHAX --numeric-ids --info=progress2 --dry-run /backups/example.org/scheduled//data/ root@example.org:/ +``` + +For most incidents, prefer a targeted restore instead of copying the whole snapshot. Keep paths relative to the +snapshot's `data/` directory: + +``` +rsync -aHAX --numeric-ids --info=progress2 --dry-run /backups/example.org/scheduled//data/etc/nginx/ /restore/example.org/etc/nginx/ +rsync -aHAX --numeric-ids --info=progress2 --dry-run /backups/example.org/scheduled//data/home/example/site/public_html/index.php /restore/example.org/home/example/site/public_html/index.php +``` + +Snapshots may use hardlinks for files that are unchanged between backups. That saves disk space and is safe for normal +restore copies, but do not edit files inside snapshot directories. Treat snapshots as read-only and copy data out with +rsync. + ## SSH Keys SSH keys can be managed from `/ssh-credentials/`. The recommended flow is to generate keys from Django or during the diff --git a/src/pobsync_backend/templates/pobsync_backend/snapshot_detail.html b/src/pobsync_backend/templates/pobsync_backend/snapshot_detail.html index f858624..f09d0f8 100644 --- a/src/pobsync_backend/templates/pobsync_backend/snapshot_detail.html +++ b/src/pobsync_backend/templates/pobsync_backend/snapshot_detail.html @@ -60,6 +60,48 @@ {% endif %} +
+

Restore Guidance

+
+
Snapshot data source: {{ restore.source_path }}
+
Example staging destination: {{ restore.destination_path }}
+
+ Restore from the snapshot's data/ directory. Start with a dry run, restore to a staging path first, + and only then copy data back to a live host or service path. +
+
+
+
Inspect the snapshot:
+
{{ restore.inspect_command }}
+
+
+
Dry-run restore to staging:
+
{{ restore.dry_run_command }}
+
+
+
Restore to staging:
+
{{ restore.local_command }}
+
+
+
Dry-run a directory restore:
+
{{ restore.partial_dry_run_command }}
+
Replace {{ restore.example_relative_path }} with the path you want to restore.
+
+
+
Dry-run a single file restore:
+
{{ restore.file_dry_run_command }}
+
Replace {{ restore.example_file_relative_path }} with the file you want to restore.
+
+
+
Dry-run restore back to the source host:
+
{{ restore.remote_dry_run_command }}
+
+

+ Snapshots can contain hardlinks to files shared with earlier snapshots. Treat snapshot directories as read-only: + copy data out with rsync instead of editing files in place. +

+
+

Backup Runs

diff --git a/src/pobsync_backend/tests/test_views.py b/src/pobsync_backend/tests/test_views.py index 5fcaa29..031d0ed 100644 --- a/src/pobsync_backend/tests/test_views.py +++ b/src/pobsync_backend/tests/test_views.py @@ -1421,6 +1421,19 @@ class ViewTests(TestCase): self.assertContains(response, "Stats") self.assertContains(response, "Files seen: 100") self.assertContains(response, "Hardlinked files: 9") + self.assertContains(response, "Restore Guidance") + self.assertContains(response, f"{base.path}/data") + self.assertContains(response, f"/restore/{host.host}") + self.assertContains(response, "rsync -aHAX --numeric-ids --info=progress2 --dry-run") + self.assertContains(response, f"{base.path}/data/") + self.assertContains(response, "root@web-01.example.test:/") + self.assertContains(response, "Dry-run a directory restore") + self.assertContains(response, f"{base.path}/data/etc/nginx/") + self.assertContains(response, f"/restore/{host.host}/etc/nginx/") + self.assertContains(response, "Dry-run a single file restore") + self.assertContains(response, f"{base.path}/data/home/example/site/public_html/index.php") + self.assertContains(response, f"/restore/{host.host}/home/example/site/public_html/index.php") + self.assertContains(response, "Treat snapshot directories as read-only") self.assertContains(response, child.dirname) self.assertContains(response, f"Run {run.id}") self.assertContains(response, reverse("run_detail", args=[run.id])) diff --git a/src/pobsync_backend/views.py b/src/pobsync_backend/views.py index 03a739a..bc78428 100644 --- a/src/pobsync_backend/views.py +++ b/src/pobsync_backend/views.py @@ -1,6 +1,7 @@ from __future__ import annotations import json +import shlex import shutil import subprocess from pathlib import Path @@ -494,12 +495,14 @@ def snapshot_detail(request, snapshot_id: int): SnapshotRecord.objects.select_related("host", "base").prefetch_related("derived_snapshots", "backup_runs"), id=snapshot_id, ) + restore = _snapshot_restore_guidance(snapshot) context = { "snapshot": snapshot, "stats": snapshot.metadata.get("stats") if isinstance(snapshot.metadata, dict) else {}, "metadata_json": _pretty_json(snapshot.metadata), "backup_runs": snapshot.backup_runs.select_related("host").order_by("-created_at"), "derived_snapshots": snapshot.derived_snapshots.select_related("host").order_by("-started_at", "dirname"), + "restore": restore, } return render(request, "pobsync_backend/snapshot_detail.html", context) @@ -790,6 +793,38 @@ def _pretty_json(value: object) -> str: return json.dumps(value or {}, indent=2, sort_keys=True) +def _snapshot_restore_guidance(snapshot: SnapshotRecord) -> dict[str, str]: + source_path = Path(snapshot.path) / "data" + destination_path = Path("/restore") / snapshot.host.host + example_relative_path = Path("etc") / "nginx" + example_file_relative_path = Path("home") / "example" / "site" / "public_html" / "index.php" + quoted_source = _quote_path_with_trailing_slash(source_path) + quoted_destination = _quote_path_with_trailing_slash(destination_path) + quoted_partial_source = _quote_path_with_trailing_slash(source_path / example_relative_path) + quoted_partial_destination = _quote_path_with_trailing_slash(destination_path / example_relative_path) + quoted_file_source = shlex.quote(str(source_path / example_file_relative_path)) + quoted_file_destination = shlex.quote(str(destination_path / example_file_relative_path)) + quoted_remote_destination = shlex.quote(f"root@{snapshot.host.address or snapshot.host.host}:/") + common_args = "rsync -aHAX --numeric-ids --info=progress2" + + return { + "source_path": str(source_path), + "destination_path": str(destination_path), + "example_relative_path": str(example_relative_path), + "example_file_relative_path": str(example_file_relative_path), + "inspect_command": f"ls -la {quoted_source}", + "dry_run_command": f"{common_args} --dry-run {quoted_source} {quoted_destination}", + "local_command": f"{common_args} {quoted_source} {quoted_destination}", + "partial_dry_run_command": f"{common_args} --dry-run {quoted_partial_source} {quoted_partial_destination}", + "file_dry_run_command": f"{common_args} --dry-run {quoted_file_source} {quoted_file_destination}", + "remote_dry_run_command": f"{common_args} --dry-run {quoted_source} {quoted_remote_destination}", + } + + +def _quote_path_with_trailing_slash(path: Path) -> str: + return shlex.quote(str(path).rstrip("/") + "/") + + def _run_rsync_log_path(run: BackupRun) -> Path | None: if isinstance(run.result, dict): log = run.result.get("log")