From 26313834e4b592b4d3d3270df10d3a81c1e8569e Mon Sep 17 00:00:00 2001 From: Johann Bahl Date: Thu, 2 Nov 2023 13:45:49 +0100 Subject: [PATCH] integrate backy-extract --- ...1110_210927_jb_integrate_backy_extract.rst | 3 + src/backy/backup.py | 74 +++++++++++++++++-- src/backy/ext_deps.py | 1 + src/backy/main.py | 12 ++- src/backy/tests/test_backup.py | 19 +++++ 5 files changed, 102 insertions(+), 7 deletions(-) create mode 100644 changelog.d/20231110_210927_jb_integrate_backy_extract.rst diff --git a/changelog.d/20231110_210927_jb_integrate_backy_extract.rst b/changelog.d/20231110_210927_jb_integrate_backy_extract.rst new file mode 100644 index 00000000..9147c85b --- /dev/null +++ b/changelog.d/20231110_210927_jb_integrate_backy_extract.rst @@ -0,0 +1,3 @@ +.. A new scriv changelog fragment. + +- Integrate `backy-extract`. `backy restore` automatically switches to `backy-extract` if available. diff --git a/src/backy/backup.py b/src/backy/backup.py index a4ad4467..4874c150 100644 --- a/src/backy/backup.py +++ b/src/backy/backup.py @@ -3,17 +3,21 @@ import glob import os import os.path as p +import subprocess import time +from enum import Enum from typing import IO, Optional, Type import yaml from structlog.stdlib import BoundLogger +import backy.backends.chunked from backy.utils import min_date from .backends import BackendException, BackyBackend from .backends.chunked import ChunkedFileBackend from .backends.cowfile import COWFileBackend +from .ext_deps import BACKY_EXTRACT from .quarantine import QuarantineStore from .revision import Revision, Trust, filter_schedule_tags from .schedule import Schedule @@ -34,6 +38,15 @@ # locking main function. +class RestoreBackend(Enum): + AUTO = "auto" + PYTHON = "python" + RUST = "rust" + + def __str__(self): + return self.value + + def locked(target=None, mode=None): if mode == "shared": mode = fcntl.LOCK_SH @@ -318,15 +331,66 @@ def purge(self): # This needs no locking as it's only a wrapper for restore_file and # restore_stdout and locking isn't re-entrant. - def restore(self, revision, target): + def restore( + self, + revision: str, + target: str, + restore_backend: RestoreBackend = RestoreBackend.AUTO, + ): r = self.find(revision) backend = self.backend_factory(r, self.log) s = backend.open("rb") - with s as source: - if target != "-": - self.restore_file(source, target) + if restore_backend == RestoreBackend.AUTO: + if self.backy_extract_supported(s): + restore_backend = RestoreBackend.RUST else: - self.restore_stdout(source) + restore_backend = RestoreBackend.PYTHON + self.log.info("restore-backend", backend=restore_backend.value) + if restore_backend == RestoreBackend.PYTHON: + with s as source: + if target != "-": + self.restore_file(source, target) + else: + self.restore_stdout(source) + elif restore_backend == RestoreBackend.RUST: + self.restore_backy_extract(r, target) + + def backy_extract_supported(self, file: IO) -> bool: + log = self.log.bind(subsystem="backy-extract") + if not isinstance(file, backy.backends.chunked.File): + log.debug("unsupported-backend") + return False + if file.size % CHUNK_SIZE != 0: + log.debug("not-chunk-aligned") + return False + try: + version = subprocess.check_output( + [BACKY_EXTRACT, "--version"], encoding="utf-8", errors="replace" + ) + if not version.startswith("backy-extract"): + log.debug("unknown-version") + return False + except: + log.debug("unavailable") + return False + return True + + # backy-extract acquires lock + def restore_backy_extract(self, rev: Revision, target: str): + log = self.log.bind(subsystem="backy-extract") + cmd = [BACKY_EXTRACT, p.join(self.path, rev.uuid), target] + log.debug("started", cmd=cmd) + proc = subprocess.Popen(cmd) + return_code = proc.wait() + log.info( + "finished", + return_code=return_code, + subprocess_pid=proc.pid, + ) + if return_code: + raise RuntimeError( + f"backy-extract failed with return code {return_code}. Maybe try `--backend python`?" + ) @locked(target=".purge", mode="shared") def restore_file(self, source, target): diff --git a/src/backy/ext_deps.py b/src/backy/ext_deps.py index 0d8ff0f9..cc2ce7d9 100644 --- a/src/backy/ext_deps.py +++ b/src/backy/ext_deps.py @@ -12,4 +12,5 @@ ) CP = os.environ.get("BACKY_CP", "cp") RBD = os.environ.get("BACKY_RBD", "rbd") +BACKY_EXTRACT = os.environ.get("BACKY_EXTRACT", "backy-extract") BASH = os.environ.get("BACKY_BASH", "bash") diff --git a/src/backy/main.py b/src/backy/main.py index 4e0ced87..1a5323c6 100644 --- a/src/backy/main.py +++ b/src/backy/main.py @@ -22,6 +22,7 @@ from . import logging from .client import APIClient, CLIClient +from .backup import RestoreBackend def valid_date(s): @@ -101,9 +102,9 @@ def backup(self, tags, force): finally: b._clean() - def restore(self, revision, target): + def restore(self, revision, target, restore_backend): b = backy.backup.Backup(self.path, self.log) - b.restore(revision, target) + b.restore(revision, target, restore_backend) def forget(self, revision): b = backy.backup.Backup(self.path, self.log) @@ -280,6 +281,13 @@ def setup_argparser(): Restore (a given revision) to a given target. """, ) + p.add_argument( + "--backend", + type=RestoreBackend, + choices=list(RestoreBackend), + default=RestoreBackend.AUTO, + dest="restore_backend", + ) p.add_argument( "-r", "--revision", diff --git a/src/backy/tests/test_backup.py b/src/backy/tests/test_backup.py index 26d9eace..22b2f384 100644 --- a/src/backy/tests/test_backup.py +++ b/src/backy/tests/test_backup.py @@ -1,4 +1,6 @@ import os.path +import subprocess +from unittest import mock import pytest import yaml @@ -7,6 +9,7 @@ from backy.backup import Backup from backy.revision import Revision from backy.sources.file import File +from backy.utils import CHUNK_SIZE def test_config(simple_file_config, tmpdir): @@ -60,6 +63,22 @@ def test_restore_stdout(simple_file_config, capfd): assert "volume contents\n" == out +def test_restore_backy_extract(simple_file_config, monkeypatch): + check_output = mock.Mock(return_value="backy-extract 1.1.0") + monkeypatch.setattr(subprocess, "check_output", check_output) + backup = simple_file_config + backup.restore_backy_extract = mock.Mock() + source = "input-file" + with open(source, "wb") as f: + f.write(b"a" * CHUNK_SIZE) + backup.backup({"daily"}) + backup.restore(0, "restore.img") + check_output.assert_called() + backup.restore_backy_extract.assert_called_once_with( + backup.find(0), "restore.img" + ) + + def test_backup_corrupted(simple_file_config): backup = simple_file_config source = "input-file"