Record rsync process pid and execution phase while normal backup runs are active so the worker can reconcile stale running rows when rsync has already disappeared. Keep finalizing runs out of the missing-process path to avoid marking slow post-rsync stats collection as a failed transfer. Closes #54
372 lines
16 KiB
Python
372 lines
16 KiB
Python
from __future__ import annotations
|
|
|
|
from pathlib import Path
|
|
from tempfile import TemporaryDirectory
|
|
from unittest.mock import patch
|
|
|
|
from django.test import SimpleTestCase
|
|
|
|
from pobsync.commands.run_scheduled import run_scheduled
|
|
from pobsync.errors import ConfigError
|
|
from pobsync.rsync import RsyncResult
|
|
|
|
|
|
class FakeConfigSource:
|
|
def __init__(self, backup_root: str = "/tmp/pobsync-test-backups") -> None:
|
|
self.backup_root = backup_root
|
|
|
|
def effective_config_for_host(self, host: str) -> dict:
|
|
return {
|
|
"backup_root": self.backup_root,
|
|
"host": host,
|
|
"address": "example.test",
|
|
"ssh": {"user": "root", "port": 22, "options": []},
|
|
"rsync": {
|
|
"binary": "rsync",
|
|
"args_effective": ["--archive"],
|
|
"timeout_seconds": 0,
|
|
"bwlimit_kbps": 0,
|
|
},
|
|
"source_root": "/",
|
|
"includes": [],
|
|
"excludes_effective": [],
|
|
"retention": {"daily": 7, "weekly": 4, "monthly": 3, "yearly": 1},
|
|
}
|
|
|
|
|
|
class RunScheduledConfigSourceTests(SimpleTestCase):
|
|
def test_requires_explicit_config_source(self) -> None:
|
|
with self.assertRaisesMessage(ConfigError, "A Django config source is required."):
|
|
run_scheduled(prefix=Path("/missing-prefix"), host="web-01", dry_run=True)
|
|
|
|
def test_dry_run_uses_injected_config_source(self) -> None:
|
|
with patch("pobsync.commands.run_scheduled.run_rsync") as run_rsync:
|
|
run_rsync.return_value = RsyncResult(exit_code=0, command=["rsync", "--archive"])
|
|
|
|
result = run_scheduled(
|
|
prefix=Path("/missing-prefix"),
|
|
host="web-01",
|
|
dry_run=True,
|
|
config_source=FakeConfigSource(),
|
|
)
|
|
|
|
self.assertTrue(result["ok"])
|
|
self.assertEqual(result["host"], "web-01")
|
|
run_rsync.assert_called_once()
|
|
|
|
def test_failed_dry_run_includes_log_tail(self) -> None:
|
|
def fake_run_rsync(command, log_path, timeout_seconds, cancel_check=None):
|
|
log_path.parent.mkdir(parents=True, exist_ok=True)
|
|
log_path.write_text("Permission denied (publickey).\nrsync error\n", encoding="utf-8")
|
|
return RsyncResult(exit_code=12, command=command)
|
|
|
|
with patch("pobsync.commands.run_scheduled.run_rsync", side_effect=fake_run_rsync):
|
|
result = run_scheduled(
|
|
prefix=Path("/missing-prefix"),
|
|
host="web-01",
|
|
dry_run=True,
|
|
config_source=FakeConfigSource(),
|
|
)
|
|
|
|
self.assertFalse(result["ok"])
|
|
self.assertEqual(result["rsync"]["exit_code"], 12)
|
|
self.assertEqual(result["rsync"]["log_tail"], ["Permission denied (publickey).", "rsync error"])
|
|
self.assertEqual(result["failure"]["category"], "permissions")
|
|
|
|
def test_failed_dry_run_classifies_broken_pipe(self) -> None:
|
|
def fake_run_rsync(command, log_path, timeout_seconds, cancel_check=None):
|
|
log_path.parent.mkdir(parents=True, exist_ok=True)
|
|
log_path.write_text(
|
|
"rsync error: unexplained error (code 255) at rsync.c(716) [generator=3.4.1]\n"
|
|
"rsync: [generator] write error: Broken pipe (32)\n",
|
|
encoding="utf-8",
|
|
)
|
|
return RsyncResult(exit_code=255, command=command)
|
|
|
|
with patch("pobsync.commands.run_scheduled.run_rsync", side_effect=fake_run_rsync):
|
|
result = run_scheduled(
|
|
prefix=Path("/missing-prefix"),
|
|
host="web-01",
|
|
dry_run=True,
|
|
config_source=FakeConfigSource(),
|
|
)
|
|
|
|
self.assertFalse(result["ok"])
|
|
self.assertEqual(result["rsync"]["exit_code"], 255)
|
|
self.assertEqual(result["failure"]["category"], "transport")
|
|
self.assertIn("broken pipe", result["failure"]["hint"].lower())
|
|
|
|
def test_dry_run_clears_previous_log_before_running(self) -> None:
|
|
def fake_run_rsync(command, log_path, timeout_seconds, cancel_check=None):
|
|
self.assertFalse(log_path.exists())
|
|
log_path.parent.mkdir(parents=True, exist_ok=True)
|
|
log_path.write_text("current run only\n", encoding="utf-8")
|
|
return RsyncResult(exit_code=0, command=command)
|
|
|
|
old_log = Path("/tmp/pobsync-dryrun/web-01/adhoc/rsync.log")
|
|
old_log.parent.mkdir(parents=True, exist_ok=True)
|
|
old_log.write_text("old failure\n", encoding="utf-8")
|
|
|
|
with patch("pobsync.commands.run_scheduled.run_rsync", side_effect=fake_run_rsync):
|
|
result = run_scheduled(
|
|
prefix=Path("/missing-prefix"),
|
|
host="web-01",
|
|
dry_run=True,
|
|
config_source=FakeConfigSource(),
|
|
)
|
|
|
|
self.assertTrue(result["ok"])
|
|
self.assertEqual(result["rsync"]["log_tail"], ["current run only"])
|
|
|
|
def test_dry_run_uses_run_specific_log_path_and_default_timeout(self) -> None:
|
|
def fake_run_rsync(command, log_path, timeout_seconds, cancel_check=None):
|
|
self.assertEqual(log_path, Path("/tmp/pobsync-dryrun/web-01/run-42/rsync.log"))
|
|
self.assertEqual(timeout_seconds, 900)
|
|
self.assertIn("--itemize-changes", command)
|
|
self.assertIn("--info=flist2,progress2,stats2", command)
|
|
self.assertIn("--stats", command)
|
|
log_path.parent.mkdir(parents=True, exist_ok=True)
|
|
log_path.write_text(
|
|
"Number of files: 42\n"
|
|
"Number of regular files transferred: 3\n"
|
|
"Total file size: 1,000 bytes\n"
|
|
"Literal data: 100 bytes\n"
|
|
"Matched data: 900 bytes\n",
|
|
encoding="utf-8",
|
|
)
|
|
return RsyncResult(exit_code=0, command=command)
|
|
|
|
with patch("pobsync.commands.run_scheduled.run_rsync", side_effect=fake_run_rsync):
|
|
result = run_scheduled(
|
|
prefix=Path("/missing-prefix"),
|
|
host="web-01",
|
|
dry_run=True,
|
|
config_source=FakeConfigSource(),
|
|
run_id=42,
|
|
)
|
|
|
|
self.assertTrue(result["ok"])
|
|
self.assertEqual(result["log"], "/tmp/pobsync-dryrun/web-01/run-42/rsync.log")
|
|
self.assertEqual(result["timeout_seconds"], 900)
|
|
self.assertEqual(result["stats"]["rsync"]["files_total"], 42)
|
|
self.assertEqual(result["stats"]["rsync"]["link_dest_estimated_savings_ratio"], 0.9)
|
|
|
|
def test_dry_run_does_not_duplicate_custom_output_args(self) -> None:
|
|
config_source = FakeConfigSource()
|
|
|
|
def effective_config_for_host(host: str) -> dict:
|
|
config = FakeConfigSource.effective_config_for_host(config_source, host)
|
|
config["rsync"]["args_effective"] = ["--archive", "--itemize-changes", "--info=name1,stats2"]
|
|
return config
|
|
|
|
config_source.effective_config_for_host = effective_config_for_host
|
|
|
|
with patch("pobsync.commands.run_scheduled.run_rsync") as run_rsync:
|
|
run_rsync.return_value = RsyncResult(exit_code=0, command=["rsync"])
|
|
run_scheduled(
|
|
prefix=Path("/missing-prefix"),
|
|
host="web-01",
|
|
dry_run=True,
|
|
config_source=config_source,
|
|
)
|
|
|
|
command = run_rsync.call_args.args[0]
|
|
self.assertEqual(command.count("--itemize-changes"), 1)
|
|
self.assertNotIn("--info=flist2,progress2,stats2", command)
|
|
self.assertIn("--info=name1,stats2", command)
|
|
|
|
def test_real_run_can_request_verbose_output_args(self) -> None:
|
|
with TemporaryDirectory() as tmp:
|
|
prefix = Path(tmp) / "home"
|
|
|
|
with patch("pobsync.commands.run_scheduled.run_rsync") as run_rsync:
|
|
run_rsync.return_value = RsyncResult(exit_code=0, command=["rsync", "--archive"])
|
|
result = run_scheduled(
|
|
prefix=prefix,
|
|
host="web-01",
|
|
dry_run=False,
|
|
verbose_output=True,
|
|
config_source=FakeConfigSource(backup_root=str(Path(tmp) / "backups")),
|
|
)
|
|
|
|
command = run_rsync.call_args.args[0]
|
|
self.assertTrue(result["ok"])
|
|
self.assertIn("--stats", command)
|
|
self.assertIn("--itemize-changes", command)
|
|
self.assertIn("--info=flist2,progress2,stats2", command)
|
|
self.assertTrue(result["verbose_output"])
|
|
|
|
def test_real_run_keeps_default_output_quiet(self) -> None:
|
|
with TemporaryDirectory() as tmp:
|
|
prefix = Path(tmp) / "home"
|
|
|
|
with patch("pobsync.commands.run_scheduled.run_rsync") as run_rsync:
|
|
run_rsync.return_value = RsyncResult(exit_code=0, command=["rsync", "--archive"])
|
|
result = run_scheduled(
|
|
prefix=prefix,
|
|
host="web-01",
|
|
dry_run=False,
|
|
config_source=FakeConfigSource(backup_root=str(Path(tmp) / "backups")),
|
|
)
|
|
|
|
command = run_rsync.call_args.args[0]
|
|
self.assertTrue(result["ok"])
|
|
self.assertIn("--stats", command)
|
|
self.assertNotIn("--itemize-changes", command)
|
|
self.assertNotIn("--info=flist2,progress2,stats2", command)
|
|
self.assertFalse(result["verbose_output"])
|
|
|
|
def test_successful_real_run_records_stats_in_result_and_metadata(self) -> None:
|
|
def fake_run_rsync(command, log_path, timeout_seconds, cancel_check=None):
|
|
log_path.parent.mkdir(parents=True, exist_ok=True)
|
|
log_path.write_text(
|
|
"Number of files: 10\n"
|
|
"Number of regular files transferred: 2\n"
|
|
"Total file size: 2,000 bytes\n"
|
|
"Total transferred file size: 500 bytes\n"
|
|
"Literal data: 500 bytes\n"
|
|
"Matched data: 1,500 bytes\n",
|
|
encoding="utf-8",
|
|
)
|
|
data_dir = log_path.parent.parent / "data"
|
|
data_dir.mkdir(parents=True, exist_ok=True)
|
|
(data_dir / "payload.txt").write_text("payload", encoding="utf-8")
|
|
return RsyncResult(exit_code=0, command=command)
|
|
|
|
with TemporaryDirectory() as tmp:
|
|
backup_root = Path(tmp) / "backups"
|
|
with patch("pobsync.commands.run_scheduled.run_rsync", side_effect=fake_run_rsync):
|
|
result = run_scheduled(
|
|
prefix=Path(tmp) / "home",
|
|
host="web-01",
|
|
dry_run=False,
|
|
config_source=FakeConfigSource(backup_root=str(backup_root)),
|
|
)
|
|
|
|
meta_path = Path(result["snapshot"]) / "meta" / "meta.yaml"
|
|
meta_text = meta_path.read_text(encoding="utf-8")
|
|
|
|
self.assertTrue(result["ok"])
|
|
self.assertEqual(result["log"], str(Path(result["snapshot"]) / "meta" / "rsync.log"))
|
|
self.assertEqual(result["stats"]["rsync"]["files_total"], 10)
|
|
self.assertEqual(result["stats"]["rsync"]["files_transferred"], 2)
|
|
self.assertEqual(result["stats"]["rsync"]["link_dest_estimated_savings_bytes"], 1500)
|
|
self.assertIn("snapshot", result["stats"]["storage"])
|
|
self.assertIn("capacity", result["stats"]["storage"])
|
|
self.assertIn("stats:", meta_text)
|
|
self.assertIn("files_total: 10", meta_text)
|
|
|
|
def test_real_run_reports_running_state_callback_before_rsync_returns(self) -> None:
|
|
states = []
|
|
|
|
def fake_run_rsync(command, log_path, timeout_seconds, cancel_check=None, process_started=None):
|
|
self.assertEqual(len(states), 1)
|
|
self.assertEqual(states[0]["status"], "running")
|
|
self.assertEqual(states[0]["phase"], "preparing")
|
|
self.assertEqual(states[0]["log"], str(log_path))
|
|
self.assertEqual(states[0]["rsync"]["command"], command)
|
|
self.assertIsNotNone(process_started)
|
|
process_started(1234, 1234)
|
|
self.assertEqual(len(states), 2)
|
|
self.assertEqual(states[1]["phase"], "rsync")
|
|
self.assertEqual(states[1]["rsync"]["pid"], 1234)
|
|
self.assertEqual(states[1]["rsync"]["pgid"], 1234)
|
|
log_path.write_text("Number of files: 1\n", encoding="utf-8")
|
|
return RsyncResult(exit_code=0, command=command)
|
|
|
|
with TemporaryDirectory() as tmp:
|
|
with patch("pobsync.commands.run_scheduled.run_rsync", side_effect=fake_run_rsync):
|
|
run_scheduled(
|
|
prefix=Path(tmp) / "home",
|
|
host="web-01",
|
|
dry_run=False,
|
|
config_source=FakeConfigSource(backup_root=str(Path(tmp) / "backups")),
|
|
state_callback=states.append,
|
|
)
|
|
|
|
self.assertEqual(len(states), 3)
|
|
self.assertIn("/.incomplete/", states[0]["snapshot"])
|
|
self.assertEqual(states[2]["phase"], "finalizing")
|
|
self.assertEqual(states[2]["rsync"]["exit_code"], 0)
|
|
|
|
def test_real_run_keeps_snapshot_with_warning_for_vanished_files(self) -> None:
|
|
def fake_run_rsync(command, log_path, timeout_seconds, cancel_check=None):
|
|
log_path.write_text(
|
|
"file has vanished: \"/var/lib/app/session\"\n"
|
|
"rsync warning: some files vanished before they could be transferred (code 24) at main.c(1338) [sender=3.4.1]\n",
|
|
encoding="utf-8",
|
|
)
|
|
data_dir = log_path.parent.parent / "data"
|
|
data_dir.mkdir(parents=True, exist_ok=True)
|
|
(data_dir / "payload.txt").write_text("payload", encoding="utf-8")
|
|
return RsyncResult(exit_code=24, command=command)
|
|
|
|
with TemporaryDirectory() as tmp:
|
|
backup_root = Path(tmp) / "backups"
|
|
with patch("pobsync.commands.run_scheduled.run_rsync", side_effect=fake_run_rsync):
|
|
result = run_scheduled(
|
|
prefix=Path(tmp) / "home",
|
|
host="web-01",
|
|
dry_run=False,
|
|
config_source=FakeConfigSource(backup_root=str(backup_root)),
|
|
)
|
|
|
|
snapshot = Path(result["snapshot"])
|
|
self.assertTrue((snapshot / "data" / "payload.txt").exists())
|
|
|
|
self.assertTrue(result["ok"])
|
|
self.assertEqual(result["status"], "warning")
|
|
self.assertEqual(result["rsync"]["exit_code"], 24)
|
|
self.assertEqual(result["warning"]["category"], "vanished")
|
|
self.assertEqual(snapshot.parent.name, "scheduled")
|
|
self.assertIn("file has vanished", "\n".join(result["rsync"]["log_tail"]))
|
|
|
|
def test_dry_run_reports_cancelled_rsync(self) -> None:
|
|
def fake_run_rsync(command, log_path, timeout_seconds, cancel_check=None):
|
|
self.assertTrue(cancel_check())
|
|
log_path.parent.mkdir(parents=True, exist_ok=True)
|
|
log_path.write_text("[pobsync] rsync cancelled\n", encoding="utf-8")
|
|
return RsyncResult(exit_code=130, command=command, cancelled=True)
|
|
|
|
with patch("pobsync.commands.run_scheduled.run_rsync", side_effect=fake_run_rsync):
|
|
result = run_scheduled(
|
|
prefix=Path("/missing-prefix"),
|
|
host="web-01",
|
|
dry_run=True,
|
|
config_source=FakeConfigSource(),
|
|
cancel_check=lambda: True,
|
|
)
|
|
|
|
self.assertFalse(result["ok"])
|
|
self.assertTrue(result["cancelled"])
|
|
self.assertEqual(result["rsync"]["exit_code"], 130)
|
|
|
|
def test_successful_real_run_applies_prune_when_requested(self) -> None:
|
|
with TemporaryDirectory() as tmp:
|
|
prefix = Path(tmp) / "home"
|
|
with (
|
|
patch("pobsync.commands.run_scheduled.run_rsync") as run_rsync,
|
|
patch("pobsync.commands.retention_apply.run_retention_plan") as plan,
|
|
):
|
|
run_rsync.return_value = RsyncResult(exit_code=0, command=["rsync", "--archive"])
|
|
plan.return_value = {
|
|
"ok": True,
|
|
"delete": [],
|
|
"keep": [],
|
|
"reasons": {},
|
|
"protect_bases": False,
|
|
}
|
|
|
|
result = run_scheduled(
|
|
prefix=prefix,
|
|
host="web-01",
|
|
dry_run=False,
|
|
prune=True,
|
|
prune_max_delete=10,
|
|
config_source=FakeConfigSource(backup_root=str(Path(tmp) / "backups")),
|
|
)
|
|
|
|
self.assertTrue(result["ok"])
|
|
self.assertIsNotNone(result["prune"])
|
|
self.assertEqual(result["prune"]["deleted"], [])
|