diff --git a/README.md b/README.md new file mode 100644 index 0000000..e9c0042 --- /dev/null +++ b/README.md @@ -0,0 +1,429 @@ +# What is `abarms`? + +`abarms` is a *handy* Swiss-army-knife-like tool/utility/console app for POSIX-compatible systems for manipulating Android Backup files (`*.ab`, `*.adb`) produced by `adb backup`, `bmgr`, and similar tools. +`abarms` can list contents, convert Android Backup files into TAR files and back (by decrypting, decompressing, and re-compressing said files), and split full-system dumps produced by `adb backup` into per-app backups that can be given to `adb restore`. + +Basically, this is a simpler pure Python implementation (only requires `setuptools` and `cryptography` modules) of [android-backup-extractor](https://github.com/nelenkov/android-backup-extractor) and the parts of [android-backup-toolkit](https://sourceforge.net/projects/android-backup-toolkit/) and [android-backup-processor](https://sourceforge.net/projects/android-backup-processor/) that I use myself. + +# Why does `abarms` exists? + +(TL;DR: read the parts in bold.) + +**Did you know that your Android OS device already has an awesome built-in full-system phone-to-PC backup and PC-to-phone restore tool that does not require root access?** +`adb` utility of Android Platform Tools has `adb backup` subcommand that, in principle, can do basically everything you could possibly want there. + +Internally this is implemented via Android OS setuid root binary named `bu` --- which you can run manually via `adb shell bu help` --- that simply backs up every app on the device one by one and streams the resulting `.ab` file --- which is a wrapped PAX-formatted TAR file (see "EXTENDED DESCRIPTION" section in [`man 1 pax`](https://man7.org/linux/man-pages/man1/pax.1p.html#EXTENDED_DESCRIPTION)) --- to stdout. `adb backup` subcommand is just a simple wrapper around it. + +*But then Android Platform Tools bundle gives no tools to manipulate those backup files!* +So, if you make a full-system backup with `adb backup`, and then want to restore a single app out of 100+ you have installed on your device, you need third-party tools now. +This is kind of embarrassing, to be honest. +A tool to manipulate backup files should have been a standard utility in Android Platform Tools since Android version 0.1 or something. +(Seriously, are you not embarrassed? I'm embarrassed for the state of humanity thinking about how the most popular OS on the planet gives no widely accessible local backup and restore tools on par with what every user of 1970s-era UNIX mainframe had out of the box. I'm not asking for automatic opportunistic incremental quantum-safely encrypted full-system replication to cooperative nearby devices in a local mesh-network here!) + +Well, technically speaking, Android OS also has automatic scheduled non-interactive backup service `bmgr` --- which can be controlled via Android settings menu and `adb shell bmgr help`, that does per-app backups and restores. +Internally, `bmgr` service also generates `.ab` files and then either uploads them to Google --- which is the default and the only option available through the settings menu --- or stores them locally under `/data/data/com.android.localtransport/files/` --- which requires root to access. +On old Android versions you could ask `bmgr` to do a backup to an SD card directly from the settings menu, but Google removed that functionality to force users to use Cloud-based backups. + +So, basically, according to Google (and Samsung, which ship with their own `bmgr`-like service in parallel with `bmgr`), to restore to a previous state of an app, or to migrate between phones you now apparently have to upload all your data to their servers in plain-text for their convenient data-mining and selling of your data to interested third parties. +Google even went as far as to hide `adb backup` subcommand from their official Android documentation: compare the [old manual for `adb`](https://web.archive.org/web/20180426100826/https://developer.android.com/studio/command-line/adb) with the [current one](https://web.archive.org/web/20240129131223/https://developer.android.com/tools/adb), Control+F for "backup". + +This resulted into every Android vendor now making their own vendor-specific phone-to-phone migration utilities, and a whole ecosystem of commercial apps that do what `adb backup` already does, but worse. + +This also resulted in usefulness of `adb backup` itself being reduced because in Android version 6 Google made automatic daily file-based backups that get uploaded to Google the default when you attach your phone to your Google account. +So, most apps started to opt-out of those backups for privacy and security reasons -- which also started opting them out of being included in `adb backup` output, since `bmgr` and `bu` share most of the infrastructure. +Some of those apps now implement their own in-app backup buttons hidden away in the settings menu somewhere, but most do not. + +Yes, this is stupid, see [this discussion on StackOverflow](https://stackoverflow.com/questions/12648373/what-is-androidallowbackup). +See also old Android developer docs that explained this fairly clearly [here](https://web.archive.org/web/20181122123338/https://developer.android.com/guide/topics/data/backup) and [here](https://web.archive.org/web/20181118184751/https://developer.android.com/guide/topics/data/testingbackup). + +(You can also force an app to be included in `adb backup` by rebuilding its APK to enable `android:allowBackup` attribute in the manifest and installing the result manually, see [this](https://stackpointer.io/mobile/android-enable-adb-backup-for-any-app/462/) for more info. +But this will only work for newly installed apps as you will have to re-sign the resulting APK with your own private key and Android forbids app updates that change the signing key.) + +But, hopefully, eventually, some alternative firmware developer will fix the above bug and allow `adb backup` to backup all apps regardless of `android:allowBackup` manifest setting, as it should. + +Still, `adb backup` works fine for a lot of apps and, hopefully, will eventually get back to working as well as it did before Android version 6 in the future. +Meanwhile, [android-backup-toolkit](https://sourceforge.net/projects/android-backup-toolkit/) allows you to split full-system dumps produced by `adb backup` into per-app backups that can then be restored with `adb restore`. + +The problem is that, while I'm thankful that `android-backup-toolkit` exists, I find it really annoying to use: it is a bundle of pre-compiled Java apps, binaries, and shell scripts that manages to work somehow, but modifying anything there is basically impossible as building all of those things from sources is an adventure I failed to complete, and then you need to install the gigantic Java VM and libraries to run it all. + +**So, as it currently stands, to have per-app backups of your Android device you have to either:** + +- **root your device;** +- **give up your privacy by uploading your backups to other people's computers (aka "the cloud"); or** +- **repack all you APKs with `android:allowBackup = true` and either run older Android firmware that can do backup to an SD card or run `adb backup` from your PC, and then extract per-app backups from its output (yes, this is not ideal, but it works, and does not need root).** + +**So, one day I was looking at all of this. +I couldn't root or change the firmware on a phone I wanted to keep backed up, but I could follow the last option and get most of what I wanted with almost no effort. +Except figuring out how to run `android-backup-toolkit` to do the very last step of this took me quite a while. +And so I thought, "Hmm, this seems overly complicated, something as simple as splitting and merging TAR files with some additional headers should be doable with a simple Python program." +So I made one.** + +It turned out to be a bit less simple than I though it would be, mostly because Python's `tarfile` module was not designed for this, so I had to make my own, and PAX-formatted TAR files are kind of ugly to parse, but it works now, so, eh. + +**Hopefully, `abarms` existing will inspire more app and alternative firmware developers to support `adb backup` properly and so personal computing devices of late 2020s will finally reach feature parity with 1970s-era Tape ARchiving (TAR) backup technology.** + +# Quickstart + +## Installation + +- Install with: + ``` {.bash} + pip install abarms + ``` + and run as + ``` {.bash} + abarms --help + ``` +- Alternatively, install it via Nix + ``` {.bash} + nix-env -i -f ./default.nix + ``` +- Alternatively, run without installing: + ``` {.bash} + python3 -m abarms --help + ``` + +## Backup all apps from your Android device, then restore a single app, without root + +### Prepare your PC and phone + +Before you make a full backup of your Android phone (or other device) you need to + +- install Android Platform Tools (either from [there](https://developer.android.com/tools/releases/platform-tools) or from you distribution), + +- enable Developer Mode and USB Debugging (see [Android Docs](https://web.archive.org/web/20240129131223/https://developer.android.com/tools/adb) for instructions). + +- then, usually, on your PC you need to run + + ``` + sudo adb kill-server + sudo adb start-server + ``` + + unless, you added special UDev rules for your phone. + +### Do a full backup + +To do the backup, you need to unlock your phone, connect your it to your PC via a USB cable (in that order, otherwise USB Debugging will be disabled), confirm that the PC is allowed to do USB Debugging in the popup on the phone, then run + +``` +adb backup -apk -obb -noshared -all -system -keyvalue +``` + +on your PC, then (unlock your phone again and) press "Back up my data" button at the bottom of your screen. + +Now you need to wait awhile for `adb` to finish. +The result will be saved in `backup.ab` file. + +If you want to backup to an explicitly named file, e.g. to note the date of the backup, run + +``` +adb backup -f backup_20240101.ab -apk -obb -noshared -all -system -keyvalue +``` + +instead. + +### Split it into pieces + +You can view contents of the backup via + +``` +abarms ls backup_20240101.ab +``` + +and split it into per-app backups via + +``` +abarms split backup_20240101.ab +``` + +which will produce a bunch of files named `abarms_split___.ab` (e.g. `abarms_split_backup_20240101_020_org.fdroid.fdroid.ab`). + +### Restore a single app + +A single per-app file can be fed back to `adb restore` to restore that singe app, e.g. + +``` +adb restore abarms_split_backup_20240101_020_org.fdroid.fdroid.ab +``` + +### Rebuild full backup from parts + +You can also rebuild the original full-backup from parts via + +``` +abarms merge abarms_split_backup_20240101_*.ab backup_20240101.rebuilt.ab +``` + +to check that it produces exactly the same backup file + +``` +# strip encryption and compression from the original +abarms strip backup_20240101.ab backup_20240101.stripped.ab + +# compare to the stipped original and the rebuilt file +diff backup_20240101.stripped.ab backup_20240101.rebuilt.ab || echo differ +``` + +# Alternatives + +## `android-backup-toolkit` and friends + +- [android-backup-extractor](https://github.com/nelenkov/android-backup-extractor) is a Java app that can decrypt and decompress Android Backup archives and convert them into TAR. + +- [android-backup-toolkit](https://sourceforge.net/projects/android-backup-toolkit/) builds on top of `android-backup-extractor` and provides a way to split full-system backup ADB files into per-app pieces. + +- [android-backup-processor](https://sourceforge.net/projects/android-backup-processor/) is an older version of `android-backup-toolkit`. + +## If you have root on your device + +Assuming you have root on your Android phone, you can do + +``` +# check if bmgr is enabled +adb shell bmgr enabled + +# list bmgr transports +adb shell bmgr list transports +# localtransport should be there, enable it +adb shell bmgr transport com.android.localtransport/.LocalTransport + +# enable bmgr +adb shell bmgr enable true + +# do a full backup now +adb shell bmgr fullbackup +``` + +and then take per-app backup files from `/data/data/com.android.localtransport/files/`. + +# Quirks + +The precise algorithm for how encrypted Android Backup files get their master key salted checksums computed remains a mystery to me even after reading all the related Android sources. + +Luckily, those checksums verify that the given passphrase is correct and can be ignored while reading `.ab` files since the following encrypted Android Backup headers are verbose enough that a wrong passphrase will break parsing anyway. +None of my use cases ever need encrypted `.ab` files and no firmware I know of requires `adb restore` inputs to be encrypted. + +So, after spending two days trying to figure those checksums out I decided that `abarms` does not support generating encrypted `.ab` files by design. +(You are welcome to try and implement this, see comments in the `__main__.py`.) + +If it isn't clear, `abarms` *does* support encrypted `.ab` files as inputs (because my phone always generates such regardless of my wishes). + +# License + +GPLv3+, small library parts are MIT. + +# Usage + +## abarms + +A handy Swiss-army-knife-like utility for manipulating Android Backup files (`*.ab`, `*.adb`) produced by `adb backup`, `bmgr`, and similar tools. + +- options: + - `--version` + : show program's version number and exit + - `-h, --help` + : show this help message and exit + - `--markdown` + : show help messages formatted in Markdown + +- passphrase: + - `-p PASSPHRASE, --passphrase PASSPHRASE` + : passphrase for an encrypted `INPUT_AB_FILE` + - `--passfile PASSFILE` + : a file containing the passphrase for an encrypted `INPUT_AB_FILE`; similar to `-p` option but the whole contents of the file will be used verbatim, allowing you to, e.g. use new line symbols or strange character encodings in there; default: guess based on `INPUT_AB_FILE` trying to replace ".ab" and ".adb" extensions with ".passphrase.txt" + +- subcommands: + - `{ls,list,strip,ab2ab,split,ab2many,merge,many2ab,unwrap,ab2tar,wrap,tar2ab}` + - `ls (list)` + : list contents of an Android Backup file + - `strip (ab2ab)` + : strip encyption and compression from an Android Backup file + - `split (ab2many)` + : split a full-system Android Backup file into a bunch of per-app Android Backup files + - `merge (many2ab)` + : merge a bunch of Android Backup files into one + - `unwrap (ab2tar)` + : convert an Android Backup file into a TAR file + - `wrap (tar2ab)` + : convert a TAR file into an Android Backup file + +### abarms ls + +List contents of an Android Backup file similar to how `tar -tvf` would do, but this will also show Android Backup file version and compression flags. + +- positional arguments: + - `INPUT_AB_FILE` + : an Android Backup file to be used as input, set to "-" to use standard input + +### abarms strip + +Convert an Android Backup file into another Android Backup file with encryption and (optionally) compression stripped away. +I.e. convert an Android Backup file into a simple unencrypted (plain-text) and uncompressed version of the same. + +Versioning parameters and the TAR file stored inside the input file are copied into the output file verbatim. + +Useful e.g. if your Android firmware forces you to encrypt your backups but you store your backups on an encrypted media anyway and don't want to remember more passphrases than strictly necessary. +Or if you want to strip encryption and compression and re-compress using something better than zlib. + +- positional arguments: + - `INPUT_AB_FILE` + : an Android Backup file to be used as input, set to "-" to use standard input + - `OUTPUT_AB_FILE` + : file to write the output to, set to "-" to use standard output; default: "-" if `INPUT_TAR_FILE` is "-", otherwise replace ".ab" and ".adb" extension of `INPUT_TAR_FILE` with `.stripped.ab` + +- options: + - `-d, --decompress` + : produce decompressed output; this is the default + - `-k, --keep-compression` + : copy compression flag and data from input to output as-is; this will make the output into a compressed Android Backup file if the source is compressed; this is the fastest way to `strip`, since it just copies bytes around as-is + - `-c, --compress` + : (re-)compress the output file; this could take awhile + +### abarms split + +Split a full-system Android Backup file into a bunch of per-app Android Backup files. + +Resulting per-app files can be given to `adb restore` to restore selected apps. + +Also, if you do backups regularly, then splitting large Android Backup files like this and then deduplicating per-app files between backups could save lots of disk space. + +- positional arguments: + - `INPUT_AB_FILE` + : an Android Backup file to be used as input, set to "-" to use standard input + +- options: + - `--prefix PREFIX` + : file name prefix for output files; default: `abarms_split_backup` if `INPUT_AB_FILE` is "-", `abarms_split_` otherwise + - `-c, --compress` + : compress per-app output files + +### abarms merge + +Merge many smaller Android Backup files into a single larger one. +A reverse operation to `split`. + +This exists mostly for checking that `split` is not buggy. + +- positional arguments: + - `INPUT_AB_FILE` + : Android Backup files to be used as inputs + - `OUTPUT_AB_FILE` + : file to write the output to + +- options: + - `-c, --compress` + : compress the output file + +### abarms unwrap + +Convert Android Backup header into a TAR file by stripping Android Backup header, decrypting and decompressing as necessary. + +The TAR file stored inside the input file gets copied into the output file verbatim. + +- positional arguments: + - `INPUT_AB_FILE` + : an Android Backup file to be used as input, set to "-" to use standard input + - `OUTPUT_TAR_FILE` + : file to write output to, set to "-" to use standard output; default: guess based on `INPUT_AB_FILE` while setting extension to `.tar` + +### abarms wrap --output-version + +Convert a TAR file into an Android Backup file by prepending Android Backup header and (optionally) compressing TAR data with zlib (the only compressing Android Backup file format supports). + +The input TAR file gets copied into the output file verbatim. + +Note that the above means that unwrapping a `.ab` file, unpacking the resulting `.tar`, editing the resulting files, packing them back with GNU `tar` utility, running `abarms wrap`, and then running `adb restore` on the resulting file will probably crash your Android device (phone or whatever) because the Android-side code restoring from the backup expects the data in the packed TAR to be in a certain order and have certain PAX headers, which GNU `tar` will not produce. + +So you should only use this on files previously produced by `abarms unwrap` or if you know what it is you are doing. + +Production of encrypted Android Backup files is not supported at this time. + +- positional arguments: + - `INPUT_TAR_FILE` + : a TAR file to be used as input, set to "-" to use standard input + - `OUTPUT_AB_FILE` + : file to write the output to, set to "-" to use standard output; default: "-" if `INPUT_TAR_FILE` is "-", otherwise replace ".ab" and ".adb" extension of `INPUT_TAR_FILE` with `.ab` + +- options: + - `--output-version OUTPUT_VERSION` + : Android Backup file version to use (required) + - `-c, --compress` + : compress the output file + +## Usage notes + +Giving an encrypted `INPUT_AB_FILE` as input, not specifying `--passphrase` or `--passfile`, and not having a file named `{INPUT_AB_FILE with ".ab" or ".adb" extension replaced with ".passphrase.txt"}` in the same directory will case the passphrase to be read interactively from the tty. + +## Examples + +- List contents of an Android Backup file: + ``` + abarms ls backup.ab + ``` + +- Use `tar` util to list contents of an Android Backup file instead of running `abarms ls`: + ``` + abarms unwrap backup.ab - | tar -tvf - + ``` + +- Extract contents of an Android Backup file: + ``` + abarms unwrap backup.ab - | tar -xvf - + ``` + +- Strip encryption and compression from an Android Backup file: + ``` + # equivalent + abarms strip backup.ab backup.stripped.ab + abarms strip backup.ab + ``` + + ``` + # equivalent + abarms strip --passphrase secret backup.ab + abarms strip -p secret backup.ab + ``` + + ``` + # with passphrase taken from a file + echo -n secret > backup.passphrase.txt + # equivalent + abarms strip backup.ab + abarms strip --passfile backup.passphrase.txt backup.ab + ``` + + ``` + # with a weird passphrase taken from a file + echo -ne "secret\r\n\x00another line" > backup.passphrase.txt + abarms strip backup.ab + ``` + +- Strip encryption but keep compression, if any: + ``` + # equivalent + abarms strip --keep-compression backup.ab backup.stripped.ab + abarms strip -k backup.ab + ``` + +- Strip encryption and compression from an Android Backup file and then re-compress using `xz`: + ``` + abarms strip backup.ab - | xz --compress -9 - > backup.ab.xz + # ... and then convert to tar and list contents: + xzcat backup.ab.xz | abarms unwrap - | tar -tvf - + ``` + +- Convert an Android Backup file into a TAR archive: + ``` + # equivalent + abarms unwrap backup.ab backup.tar + abarms unwrap backup.ab + ``` + +- Convert a TAR archive into an Android Backup file: + ``` + # equivalent + abarms wrap --output-version=5 backup.tar backup.ab + abarms wrap --output-version=5 backup.tar + ``` + diff --git a/abarms/__init__.py b/abarms/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/abarms/__main__.py b/abarms/__main__.py new file mode 100755 index 0000000..9d426e1 --- /dev/null +++ b/abarms/__main__.py @@ -0,0 +1,800 @@ +#!/usr/bin/env python3 +# +# This file is a part of abarms project. +# +# Copyright (c) 2018-2024 Jan Malakhovski +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +import io +import os +import re +import struct +import sys +import time +import typing as _t +import zlib + +from cryptography.hazmat.primitives import hashes +from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes +from cryptography.hazmat.primitives.kdf.pbkdf2 import PBKDF2HMAC +from cryptography.hazmat.primitives.padding import PKCS7 +from gettext import gettext, ngettext + +from . import argparse_better as argparse +from .argparse_better import Namespace +from .exceptions import * +from . import tariter + +BUFFER_SIZE = 16 * 1024 ** 2 + +class ReadProxy: + def __init__(self, fobj : _t.Any, block_size : int) -> None: + self._fobj = fobj + self._block_size = block_size + self._buffer = b"" + self._eof = False + + def _handle_eof(self) -> bytes: + raise NotImplementedError + + def _handle_data(self, data : bytes) -> bytes: + raise NotImplementedError + + def read(self, size : int = -1) -> bytes: + while not self._eof and (size == -1 or len(self._buffer) < size): + data = self._fobj.read(self._block_size) + if len(data) == 0: + self._buffer += self._handle_eof() + self._eof = True + else: + self._buffer += self._handle_data(data) + + if len(self._buffer) == 0: + return b"" + + if size == -1 or len(self._buffer) == size: + res = self._buffer + self._buffer = b"" + return res + else: + res = self._buffer[:size] + self._buffer = self._buffer[len(res):] + return res + + def tell(self) -> int: + return self._fobj.tell() # type: ignore + + def fileno(self) -> int: + return self._fobj.fileno() # type: ignore + + def close(self) -> None: + self._fobj.close() + +class Decryptor(ReadProxy): + def __init__(self, decryptor : _t.Any, fobj : _t.Any, block_size : int) -> None: + super().__init__(fobj, block_size) + self._decryptor = decryptor + + def _handle_eof(self) -> bytes: + return self._decryptor.finalize() # type: ignore + + def _handle_data(self, data : bytes) -> bytes: + return self._decryptor.update(data) # type: ignore + +class Unpadder(ReadProxy): + def __init__(self, unpadder : _t.Any, fobj : _t.Any, block_size : int) -> None: + super().__init__(fobj, block_size) + self._unpadder = unpadder + + def _handle_eof(self) -> bytes: + return self._unpadder.finalize() # type: ignore + + def _handle_data(self, data : bytes) -> bytes: + return self._unpadder.update(data) # type: ignore + +class Decompressor(ReadProxy): + def __init__(self, fobj : _t.Any, block_size : int) -> None: + super().__init__(fobj, block_size) + self._decompressor = zlib.decompressobj(0) + + def _handle_eof(self) -> bytes: + return self._decompressor.flush() + + def _handle_data(self, data : bytes) -> bytes: + return self._decompressor.decompress(data) + +class Compressor: + def __init__(self, fobj : _t.Any) -> None: + self._fobj = fobj + self._compressor = zlib.compressobj() + + def write(self, data : bytes) -> None: + self._fobj.write(self._compressor.compress(data)) + + def flush(self) -> None: + self._fobj.write(self._compressor.flush()) + self._fobj.flush() + + def close(self) -> None: + self._fobj.close() + +def androidKDF(length : int, salt : bytes, iterations : int, passphrase : bytes) -> bytes: + kdf = PBKDF2HMAC( + algorithm=hashes.SHA1(), + length=length, + salt=salt, + iterations=iterations, + ) + return kdf.derive(passphrase) + +def getpass(prompt : str = "Passphrase: ") -> bytes: + import termios + with open("/dev/tty", "r+b", buffering=0) as tty: + tty.write(b"Passphrase: ") + old = termios.tcgetattr(tty) + new = termios.tcgetattr(tty) + new[3] = new[3] & ~termios.ECHO + try: + termios.tcsetattr(tty, termios.TCSADRAIN, new) + data = tty.readline() + tty.write(b"\n") + finally: + termios.tcsetattr(tty, termios.TCSADRAIN, old) + + if data[-2:] == b"\r\n": data = data[:-2] + elif data[-1:] == b"\n": data = data[:-1] + return data + +def begin_input(cfg : Namespace, input_exts : _t.List[str]) -> None: + if cfg.input_file == "-": + cfg.basename = "backup" + cfg.input = os.fdopen(0, "rb") + return + + cfg.input_file = os.path.expanduser(cfg.input_file) + + root, ext = os.path.splitext(cfg.input_file) + if ext in input_exts: + cfg.basename = root + else: + cfg.basename = cfg.input_file + + try: + cfg.input = open(cfg.input_file, "rb") + except FileNotFoundError: + raise CatastrophicFailure(gettext("file `%s` does not exists"), cfg.input_file) + + cfg.input_size = None + if cfg.input.seekable(): + cfg.input_size = cfg.input.seek(0, io.SEEK_END) + cfg.input.seek(0) + +def begin_ab_input(cfg : Namespace, decompress : bool = True) -> None: + begin_input(cfg, [".ab", ".adb"]) + + passphrase = None + if cfg.passphrase is not None: + passphrase = os.fsencode(cfg.passphrase) + elif cfg.passfile is not None: + try: + with open(cfg.passfile, "rb") as f: + passphrase = f.read() + except FileNotFoundError: + raise CatastrophicFailure(gettext("file `%s` does not exists"), cfg.passfile) + else: + passfile = cfg.basename + ".passphrase.txt" + try: + with open(passfile, "rb") as f: + passphrase = f.read() + except FileNotFoundError: + pass + + # The original backing up code: https://android.googlesource.com/platform/frameworks/base/+/refs/heads/master/services/backup/java/com/android/server/backup/fullbackup/PerformAdbBackupTask.java + def readline(what : str) -> bytes: + data : bytes = cfg.input.readline() + if data[-1:] == b"\n": + data = data[:-1] + else: + raise CatastrophicFailure(gettext("%s: unable to parse header: %s"), cfg.input_file, what) + return data + + def readint(what : str) -> int: + data = readline(what) + try: + res = int(data) + except Exception: + raise CatastrophicFailure(gettext("%s: unable to parse header: %s"), cfg.input_file, what) + return res + + def readhex(what : str) -> bytes: + data = readline(what) + try: + res = bytes.fromhex(str(data, "ascii")) + except Exception: + raise CatastrophicFailure(gettext("%s: unable to parse header: %s"), cfg.input_file, what) + return res + + magic = readline("magic") + if magic != b"ANDROID BACKUP": + raise CatastrophicFailure(gettext("%s: not an Android Backup file"), cfg.input_file) + + version = readint("version") + if version < 1 or version > 5: + raise CatastrophicFailure(gettext("%s: unknown Android Backup version: %s"), cfg.input_file, version) + cfg.input_version = version + + compression = readint("compression") + if compression not in [0, 1]: + raise CatastrophicFailure(gettext("%s: unknown Android Backup compression: %s"), cfg.input_file, compression) + cfg.input_compression = compression + + algo = readline("encryption").upper() + if algo == b"NONE": + pass + elif algo == b"AES-256": + user_salt = readhex("user_salt") + checksum_salt = readhex("checksum_salt") + iterations = readint("iterations") + user_iv = readhex("user_iv") + user_blob = readhex("user_blob") + + if passphrase is None: + passphrase = getpass() + + blob_key=androidKDF(32, user_salt, iterations, passphrase) + + decryptor = Cipher(algorithms.AES(blob_key), modes.CBC(user_iv)).decryptor() + unpadder = PKCS7(128).unpadder() + try: + data = decryptor.update(user_blob) + decryptor.finalize() + decrypted_blob = unpadder.update(data) + unpadder.finalize() + except: + raise CatastrophicFailure(gettext("%s: failed to decrypt, wrong passphrase?"), cfg.input_file) + + state = {"data": decrypted_blob} + + def readb(want : int) -> bytes: + blob = state["data"] + length = struct.unpack("B", blob[:1])[0] + if length != want: + raise CatastrophicFailure(gettext("%s: failed to decrypt, wrong passphrase?"), cfg.input_file) + data = blob[1:length + 1] + blob = blob[length + 1:] + state["data"] = blob + return data + + master_iv = readb(16) + master_key = readb(32) + checksum = readb(32) + + # Okay, so. I give up trying to figure out how `checksum` is actually + # computed there. Regardless, wrong passphrase will fail in the later + # parsing stage so this does not really matter. + # + # The Java code seems deceptively simple, but it clearly does some + # non-trivial type conversion on the fly. + # You are welcome to expremint with this. + + #print("miv", len(master_iv), master_iv.hex()) + #print("mk", len(master_key), master_key.hex()) + #print("ck", len(checksum), checksum.hex()) + #print("cs", len(checksum_salt), checksum_salt.hex()) + #print(state) + + #master_key_as_pwd = "" + #for c in master_key: + # master_key_as_pwd += chr(c) + + #master_key_as_utf8 = bytes(master_key_as_pwd, "utf-8") + #master_key_as_utf16be = bytes(master_key_as_pwd, "utf-16-be") + #master_key_as_utf16le = bytes(master_key_as_pwd, "utf-16-le") + + #print(len(master_key_as_pwd), repr(master_key_as_pwd)) + #print(len(master_key_as_utf8), repr(master_key_as_utf8)) + #print(len(master_key_as_utf16be), repr(master_key_as_utf16be)) + + #print("ck") + + #for x in [master_key, master_key_as_utf8, master_key_as_utf16be, master_key_as_utf16le]: + # our_checksum = androidKDF(32, checksum_salt, iterations, x) + # print(len(our_checksum), our_checksum.hex()) + # if checksum == our_checksum: + # raise CatastrophicFailure("match found!") + + decryptor = Cipher(algorithms.AES(master_key), modes.CBC(master_iv)).decryptor() + cfg.input = Decryptor(decryptor, cfg.input, BUFFER_SIZE) + + unpadder = PKCS7(128).unpadder() + cfg.input = Unpadder(unpadder, cfg.input, BUFFER_SIZE) + else: + raise CatastrophicFailure(gettext("%s: unknown Android Backup encryption: %s"), cfg.input_file, algo) + + if decompress and compression == 1: + cfg.input = Decompressor(cfg.input, BUFFER_SIZE) + +def begin_output(cfg : Namespace, output_ext : str) -> None: + if cfg.output_file is None: + if cfg.input_file != "-": + cfg.output_file = cfg.basename + output_ext + else: + cfg.output_file = "-" + + if cfg.output_file == "-": + cfg.output = os.fdopen(1, "wb") + cfg.report = False # let's not clutter the tty when inside a pipe + return + + cfg.output_file = os.path.expanduser(cfg.output_file) + try: + cfg.output = open(cfg.output_file, "xb") + except FileExistsError: + raise CatastrophicFailure(gettext("file `%s` already exists"), cfg.output_file) + + if cfg.report: + sys.stderr.write("Writing output to `%s`..." % (cfg.output_file,)) + sys.stderr.flush() + +def begin_ab_header(output : _t.Any, output_version : int, compress : bool) -> _t.Any: + output_compression = 1 if compress else 0 + output.write(b"ANDROID BACKUP\n%d\n%d\nnone\n" % (output_version, output_compression)) + if compress: + return Compressor(output) + else: + return output + +def begin_ab_output(cfg : Namespace, output_ext : str, output_version : int, compress : bool) -> None: + begin_output(cfg, output_ext) + cfg.output = begin_ab_header(cfg.output, output_version, compress) + +prev_percent = None +def report_progress(cfg : Namespace) -> None: + if not cfg.report: return + + global prev_percent + percent = 100 * cfg.input.tell() / cfg.input_size + if prev_percent == percent: return + prev_percent = percent + + sys.stderr.write("\r\033[KWriting output to `%s`... %d%%" % (cfg.output_file, percent)) + sys.stderr.flush() + +def copy_input_to_output(cfg : Namespace, report : bool = True) -> None: + while True: + data = cfg.input.read(BUFFER_SIZE) + if data == b"": break + cfg.output.write(data) + if report: + report_progress(cfg) + +def finish_input(cfg : Namespace) -> None: + cfg.input.close() + +def finish_output(cfg : Namespace) -> None: + cfg.output.flush() + cfg.output.close() + + if cfg.report: + sys.stderr.write("\r\033[K") + sys.stderr.flush() + +def str_ftype(ftype : bytes) -> str: + if ftype == b"\x00" or ftype == b"0": + return "-" + elif ftype == b"1": + return "h" + elif ftype == b"2": + return "l" + elif ftype == b"3": + return "c" + elif ftype == b"4": + return "b" + elif ftype == b"5": + return "d" + elif ftype == b"6": + return "f" + else: + raise CatastrophicFailure(gettext("unknown TAR header file type: %s"), repr(ftype)) + +def str_modes(mode : int) -> str: + mode_ = oct(mode)[2:] + if len(mode_) > 3: + mode_ = mode_[-3:] + mode = int(mode_, 8) + + res = "" + rwx = ["r", "w", "x"] + n = 0 + for b in bin(mode)[2:]: + if b == "0": + res += "-" + else: + res += rwx[n % 3] + n += 1 + return res + +def str_uidgid(uid : int, gid : int, uname : str, gname : str) -> str: + res = "" + if uname != "": + res += uname + else: + res += str(uid) + res += "/" + if gname != "": + res += gname + else: + res += str(gid) + + return res.ljust(12) + +def str_size(x : int) -> str: + return str(x).rjust(8) + +def str_mtime(x : int) -> str: + return time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(x)) + +def ab_ls(cfg : Namespace) -> None: + begin_ab_input(cfg) + print("# Android Backup, version: %d, compression: %d" % (cfg.input_version, cfg.input_compression)) + for h in tariter.iter_tar_headers(cfg.input): + print(str_ftype(h.ftype) + str_modes(h.mode), + str_uidgid(h.uid, h.gid, h.uname, h.gname), + str_size(h.size), str_mtime(h.mtime), h.path) + finish_input(cfg) + +def ab_strip(cfg : Namespace) -> None: + if cfg.keep_compression: + begin_ab_input(cfg, False) + begin_output(cfg, ".stripped.ab") + cfg.output.write(b"ANDROID BACKUP\n%d\n%d\nnone\n" % (cfg.input_version, cfg.input_compression)) + copy_input_to_output(cfg) + else: + begin_ab_input(cfg) + begin_ab_output(cfg, ".stripped.ab", cfg.input_version, cfg.compress) + copy_input_to_output(cfg) + finish_input(cfg) + finish_output(cfg) + +def write_tar(pax_header : _t.Optional[bytes], h : tariter.TarHeader, input : _t.Any, output : _t.Any) -> None: + if pax_header is not None: + output.write(pax_header) + + output.write(h.raw) + fsize = h.size + h.leftovers + while fsize > 0: + data = input.read(min(fsize, BUFFER_SIZE)) + if len(data) == 0: + raise tariter.ParsingError("unexpected EOF") + fsize -= len(data) + output.write(data) + +def finish_tar(output : _t.Any) -> None: + output.write(b"\0" * 1024) + output.flush() + output.close() + +def ab_split(cfg : Namespace) -> None: + begin_ab_input(cfg) + + if cfg.prefix is None: + cfg.prefix = "abarms_split_" + cfg.basename + + print("# Android Backup, version: %d, compression: %d" % (cfg.input_version, cfg.input_compression)) + + output : _t.Optional[_t.Any] = None + fname : _t.Optional[str] = None + app : _t.Optional[str] = None + appnum = 0 + + global_pax_header : _t.Optional[bytes] = None + pax_header : _t.Optional[bytes] = None + + for h in tariter.yield_tar_headers(cfg.input): + ftype = h.ftype + if ftype == b"g": + global_pax_header = h.raw + pax_header = None + continue + elif ftype == b"x": + pax_header = h.raw + continue + + happ = "other" + spath = h.path.split("/") + if len(spath) > 2 and spath[0] == "apps": + happ = spath[1] + + if app is None or happ != app: + if output is not None: + # finish the previous one + finish_tar(output) + appnum += 1 + + app = happ + fname = "%s_%03d_%s.ab" % (cfg.prefix, appnum, app) + try: + output = open(fname, "xb") + except FileExistsError: + raise CatastrophicFailure(gettext("file `%s` already exists"), fname) + + if cfg.report: + sys.stderr.write("Writing `%s`...\n" % (fname,)) + sys.stderr.flush() + + output = begin_ab_header(output, cfg.input_version, cfg.compress) + if global_pax_header is not None: + output.write(global_pax_header) + + write_tar(pax_header, h, cfg.input, output) + pax_header = None + + if output is not None: + # finish last + finish_tar(output) + + finish_input(cfg) + +def ab_merge(cfg : Namespace) -> None: + cfg.output = None + input_version = 0 + for input_file in cfg.input_files: + cfg.input_file = input_file + begin_ab_input(cfg) + if cfg.output is None: + input_version = cfg.input_version + begin_ab_output(cfg, ".merged.ab", input_version, cfg.compress) + elif cfg.input_version != input_version: + raise CatastrophicFailure(gettext("can't merge files with different Android Backup versions: `%s` is has version %d, but we are merging into version %d"), cfg.input_file, cfg.input_version, input_version) + + if cfg.report: + sys.stderr.write("Merging `%s`...\n" % (input_file,)) + sys.stderr.flush() + + for h in tariter.yield_tar_headers(cfg.input): + write_tar(None, h, cfg.input, cfg.output) + + finish_input(cfg) + finish_tar(cfg.output) + +def ab_unwrap(cfg : Namespace) -> None: + begin_ab_input(cfg) + begin_output(cfg, ".tar") + copy_input_to_output(cfg) + finish_input(cfg) + finish_output(cfg) + +def ab_wrap(cfg : Namespace) -> None: + begin_input(cfg, [".tar"]) + begin_ab_output(cfg, ".ab", cfg.output_version, cfg.compress) + copy_input_to_output(cfg) + finish_input(cfg) + finish_output(cfg) + +def add_examples(fmt : _t.Any) -> None: + fmt.add_text("# Usage notes") + + fmt.add_text('Giving an encrypted `INPUT_AB_FILE` as input, not specifying `--passphrase` or `--passfile`, and not having a file named `{INPUT_AB_FILE with ".ab" or ".adb" extension replaced with ".passphrase.txt"}` in the same directory will case the passphrase to be read interactively from the tty.') + + fmt.add_text("# Examples") + + fmt.start_section("List contents of an Android Backup file") + fmt.add_code(f"{__package__} ls backup.ab") + fmt.end_section() + + fmt.start_section(f"Use `tar` util to list contents of an Android Backup file instead of running `{__package__} ls`") + fmt.add_code(f"{__package__} unwrap backup.ab - | tar -tvf -") + fmt.end_section() + + fmt.start_section("Extract contents of an Android Backup file") + fmt.add_code(f"{__package__} unwrap backup.ab - | tar -xvf -") + fmt.end_section() + + fmt.start_section("Strip encryption and compression from an Android Backup file") + fmt.add_code(f"""# equivalent +{__package__} strip backup.ab backup.stripped.ab +{__package__} strip backup.ab +""") + fmt.add_code(f"""# equivalent +{__package__} strip --passphrase secret backup.ab +{__package__} strip -p secret backup.ab +""") + fmt.add_code(f"""# with passphrase taken from a file +echo -n secret > backup.passphrase.txt +# equivalent +{__package__} strip backup.ab +{__package__} strip --passfile backup.passphrase.txt backup.ab +""") + fmt.add_code(f"""# with a weird passphrase taken from a file +echo -ne "secret\\r\\n\\x00another line" > backup.passphrase.txt +{__package__} strip backup.ab +""") + fmt.end_section() + + fmt.start_section("Strip encryption but keep compression, if any") + fmt.add_code(f"""# equivalent +{__package__} strip --keep-compression backup.ab backup.stripped.ab +{__package__} strip -k backup.ab +""") + fmt.end_section() + + fmt.start_section("Strip encryption and compression from an Android Backup file and then re-compress using `xz`") + fmt.add_code(f"""{__package__} strip backup.ab - | xz --compress -9 - > backup.ab.xz +# ... and then convert to tar and list contents: +xzcat backup.ab.xz | {__package__} unwrap - | tar -tvf - +""") + fmt.end_section() + + fmt.start_section("Convert an Android Backup file into a TAR archive") + fmt.add_code(f"""# equivalent +{__package__} unwrap backup.ab backup.tar +{__package__} unwrap backup.ab +""") + fmt.end_section() + + fmt.start_section("Convert a TAR archive into an Android Backup file") + fmt.add_code(f"""# equivalent +{__package__} wrap --output-version=5 backup.tar backup.ab +{__package__} wrap --output-version=5 backup.tar +""") + fmt.end_section() + +def make_argparser(real : bool = True) -> _t.Any: + _ = gettext + + parser = argparse.BetterArgumentParser( + prog=__package__, + description = _("A handy Swiss-army-knife-like utility for manipulating Android Backup files (`*.ab`, `*.adb`) produced by `adb backup`, `bmgr`, and similar tools."), + additional_sections = [add_examples], + allow_abbrev = False, + add_help = False, + add_version = True) + parser.add_argument("-h", "--help", action="store_true", help=_("show this help message and exit")) + parser.add_argument("--markdown", action="store_true", help=_("show help messages formatted in Markdown")) + parser.set_defaults(func=None) + + def no_cmd(cfg : Namespace) -> None: + parser.print_help(sys.stderr) + parser.error(_("no subcommand specified")) + parser.set_defaults(func=no_cmd) + + def add_pass(cmd : _t.Any) -> None: + agrp = cmd.add_argument_group(_("passphrase")) + grp = agrp.add_mutually_exclusive_group() + grp.add_argument("-p", "--passphrase", type=str, help=_("passphrase for an encrypted `INPUT_AB_FILE`")) + grp.add_argument("--passfile", type=str, help=_('a file containing the passphrase for an encrypted `INPUT_AB_FILE`; similar to `-p` option but the whole contents of the file will be used verbatim, allowing you to, e.g. use new line symbols or strange character encodings in there; default: guess based on `INPUT_AB_FILE` trying to replace ".ab" and ".adb" extensions with ".passphrase.txt"')) + + if not real: + add_pass(parser) + + subparsers = parser.add_subparsers(title="subcommands") + + def add_input(cmd : _t.Any) -> None: + cmd.add_argument("input_file", metavar="INPUT_AB_FILE", type=str, help=_('an Android Backup file to be used as input, set to "-" to use standard input')) + + def add_output(cmd : _t.Any, extension : str) -> None: + cmd.add_argument("output_file", metavar="OUTPUT_AB_FILE", nargs="?", default=None, type=str, help=_('file to write the output to, set to "-" to use standard output; default: "-" if `INPUT_TAR_FILE` is "-", otherwise replace ".ab" and ".adb" extension of `INPUT_TAR_FILE` with `%s`' % (extension,))) + + cmd = subparsers.add_parser("ls", aliases = ["list"], + help=_("list contents of an Android Backup file"), + description=_("List contents of an Android Backup file similar to how `tar -tvf` would do, but this will also show Android Backup file version and compression flags.")) + if real: add_pass(cmd) + add_input(cmd) + cmd.set_defaults(func=ab_ls) + + cmd = subparsers.add_parser("strip", aliases = ["ab2ab"], + help=_("strip encyption and compression from an Android Backup file"), + description=_("""Convert an Android Backup file into another Android Backup file with encryption and (optionally) compression stripped away. +I.e. convert an Android Backup file into a simple unencrypted (plain-text) and uncompressed version of the same. + +Versioning parameters and the TAR file stored inside the input file are copied into the output file verbatim. + +Useful e.g. if your Android firmware forces you to encrypt your backups but you store your backups on an encrypted media anyway and don't want to remember more passphrases than strictly necessary. +Or if you want to strip encryption and compression and re-compress using something better than zlib.""")) + if real: add_pass(cmd) + grp = cmd.add_mutually_exclusive_group() + grp.add_argument("-d", "--decompress", action="store_true", help=_("produce decompressed output; this is the default")) + grp.add_argument("-k", "--keep-compression", action="store_true", help=_("copy compression flag and data from input to output as-is; this will make the output into a compressed Android Backup file if the source is compressed; this is the fastest way to `strip`, since it just copies bytes around as-is")) + grp.add_argument("-c", "--compress", action="store_true", help=_("(re-)compress the output file; this could take awhile")) + + add_input(cmd) + add_output(cmd, ".stripped.ab") + cmd.set_defaults(func=ab_strip) + + cmd = subparsers.add_parser("split", aliases = ["ab2many"], + help=_("split a full-system Android Backup file into a bunch of per-app Android Backup files"), + description=_("""Split a full-system Android Backup file into a bunch of per-app Android Backup files. + +Resulting per-app files can be given to `adb restore` to restore selected apps. + +Also, if you do backups regularly, then splitting large Android Backup files like this and then deduplicating per-app files between backups could save lots of disk space. +""")) + if real: add_pass(cmd) + cmd.add_argument("--prefix", type=str, help=_('file name prefix for output files; default: `abarms_split_backup` if `INPUT_AB_FILE` is "-", `abarms_split_` otherwise')) + cmd.add_argument("-c", "--compress", action="store_true", help=_("compress per-app output files")) + add_input(cmd) + cmd.set_defaults(func=ab_split) + + + cmd = subparsers.add_parser("merge", aliases = ["many2ab"], + help=_("merge a bunch of Android Backup files into one"), + description=_("""Merge many smaller Android Backup files into a single larger one. +A reverse operation to `split`. + +This exists mostly for checking that `split` is not buggy. +""")) + if real: add_pass(cmd) + cmd.add_argument("-c", "--compress", action="store_true", help=_("compress the output file")) + cmd.add_argument("input_files", metavar="INPUT_AB_FILE", nargs="+", type=str, help=_('Android Backup files to be used as inputs')) + cmd.add_argument("output_file", metavar="OUTPUT_AB_FILE", type=str, help=_('file to write the output to')) + cmd.set_defaults(func=ab_merge) + + cmd = subparsers.add_parser("unwrap", aliases = ["ab2tar"], + help=_("convert an Android Backup file into a TAR file"), + description=_("""Convert Android Backup header into a TAR file by stripping Android Backup header, decrypting and decompressing as necessary. + +The TAR file stored inside the input file gets copied into the output file verbatim.""")) + if real: add_pass(cmd) + add_input(cmd) + cmd.add_argument("output_file", metavar="OUTPUT_TAR_FILE", nargs="?", default=None, type=str, help=_('file to write output to, set to "-" to use standard output; default: guess based on `INPUT_AB_FILE` while setting extension to `.tar`')) + cmd.set_defaults(func=ab_unwrap) + + cmd = subparsers.add_parser("wrap", aliases = ["tar2ab"], + help=_("convert a TAR file into an Android Backup file"), + description=_(f"""Convert a TAR file into an Android Backup file by prepending Android Backup header and (optionally) compressing TAR data with zlib (the only compressing Android Backup file format supports). + +The input TAR file gets copied into the output file verbatim. + +Note that the above means that unwrapping a `.ab` file, unpacking the resulting `.tar`, editing the resulting files, packing them back with GNU `tar` utility, running `{__package__} wrap`, and then running `adb restore` on the resulting file will probably crash your Android device (phone or whatever) because the Android-side code restoring from the backup expects the data in the packed TAR to be in a certain order and have certain PAX headers, which GNU `tar` will not produce. + +So you should only use this on files previously produced by `{__package__} unwrap` or if you know what it is you are doing. + +Production of encrypted Android Backup files is not supported at this time. +""")) + cmd.add_argument("--output-version", type=int, required=True, help=_("Android Backup file version to use (required)")) + cmd.add_argument("-c", "--compress", action="store_true", help=_("compress the output file")) + cmd.add_argument("input_file", metavar="INPUT_TAR_FILE", type=str, help=_('a TAR file to be used as input, set to "-" to use standard input')) + add_output(cmd, ".ab") + cmd.set_defaults(func=ab_wrap) + + return parser + +def main() -> None: + parser = make_argparser() + cfg = parser.parse_args(sys.argv[1:]) + + if cfg.help: + if cfg.markdown: + parser = make_argparser(False) + parser.set_formatter_class(argparse.MarkdownBetterHelpFormatter) + print(parser.format_help(1024)) + else: + print(parser.format_help()) + sys.exit(0) + + if sys.stderr.isatty(): + cfg.report = True + else: + cfg.report = False + + try: + cfg.func(cfg) + except KeyboardInterrupt: + print("Interrupted.", file=sys.stderr) + sys.exit(1) + except CatastrophicFailure as exc: + print(str(exc), file=sys.stderr) + sys.exit(1) + +if __name__ == "__main__": + main() diff --git a/abarms/argparse_better.py b/abarms/argparse_better.py new file mode 100644 index 0000000..4f27f0a --- /dev/null +++ b/abarms/argparse_better.py @@ -0,0 +1,208 @@ +# This file is a part of kisstdlib project. +# +# This file can be distributed under the terms of the MIT-style license given +# below or Python Software Foundation License version 2 (PSF-2.0) as published +# by Python Software Foundation. +# +# Copyright (c) 2022-2023 Jan Malakhovski +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +import typing as _t +from argparse import * +from gettext import gettext as _ + +class BetterHelpFormatter(HelpFormatter): + """Like argparse.HelpFormatter, but with better formatting. + Also, it adds `add_code` function. + """ + + def _fill_text(self, text : str, width : int, indent : str) -> str: + import textwrap + res = [] + for line in text.splitlines(): + if line == "": + res.append(line) + continue + + for sub in textwrap.wrap(line, width - len(indent)): + sub = indent + sub + res.append(sub) + return "\n".join(res) + + def _split_lines(self, text : str, width : int) -> _t.List[str]: + import textwrap + res = [] + for line in text.splitlines(): + res += textwrap.wrap(line, width) + return res + + def add_code(self, text : str) -> None: + self.add_text(text.strip()) + +class MarkdownBetterHelpFormatter(BetterHelpFormatter): + """BetterHelpFormatter that outputs stuff formatted in Markdown""" + + def add_code(self, text : str) -> None: + self.add_text("```\n" + text.strip() + "\n```") + + def _format_usage(self, usage : _t.Optional[str], actions : _t.Any, groups : _t.Any, prefix : _t.Optional[str]) -> str: + return super()._format_usage(usage, actions, groups, "") + + def _format_action(self, action : _t.Any) -> str: + # determine the required width and the entry label + action_header = self._format_action_invocation(action) + + tup = self._current_indent, '', "`" + action_header + "`" + action_header = '%*s- %s\n' % tup + + # collect the pieces of the action help + parts = [action_header] + + # if there was help for the action, add it + if action.help and action.help.strip(): + first = True + for line in self._expand_help(action).splitlines(): + if first: + parts.append('%*s: %s\n' % (self._current_indent, '', line)) + else: + parts.append('%*s %s\n' % (self._current_indent, '', line)) + first = False + + # or add a newline if the description doesn't end with one + elif not action_header.endswith('\n'): + parts.append('\n') + + # if there are any sub-actions, add their help as well + for subaction in self._iter_indented_subactions(action): + parts.append(self._format_action(subaction)) + + # return a single string + return self._join_parts(parts) + + class _Section(HelpFormatter._Section): + def format_help(self) -> str: + if self.parent is not None: + self.formatter._indent() + join = self.formatter._join_parts + item_help = join([func(*args) for func, args in self.items]) + if self.parent is not None: + self.formatter._dedent() + + # return nothing if the section was empty + if not item_help: + return '' + + # add the heading if the section was non-empty + if self.heading is not SUPPRESS and self.heading is not None: + heading = '%*s- %s:\n' % (self.formatter._current_indent, '', self.heading) + else: + heading = '' + + # join the section-initial newline, the heading and the help + return join(['\n', heading, item_help, '\n']) + +class BetterArgumentParser(ArgumentParser): + """Like argparse.ArgumentParser but uses BetterHelpFormatter by default, + adds `--help` only to the root node, and + that `--help` prints the help for all the subcommands at once. + Also, provides `add_version` option. + """ + + formatter_class : _t.Any + + def __init__(self, + prog : _t.Optional[str] = None, + version : _t.Optional[str] = None, + add_version : bool = False, # we set these two to False by default + add_help : bool = False, # so that subparsers don't get them enabled by default + additional_sections : _t.List[_t.Callable[[BetterHelpFormatter], None]] = [], + formatter_class : _t.Any = BetterHelpFormatter, + *args : _t.Any, **kwargs : _t.Any) -> None: + super().__init__(prog, *args, formatter_class = formatter_class, add_help = False, **kwargs) # type: ignore + + if version is None: + version = "dev" + if prog is not None: + try: + import importlib.metadata as meta + try: + version = meta.version(prog) + except meta.PackageNotFoundError: + pass + except ImportError: + pass + + self.version = version + self.add_version = add_version + self.add_help = add_help + self.additional_sections = additional_sections + + default_prefix = '-' if '-' in self.prefix_chars else self.prefix_chars[0] + if self.add_version: + self.add_argument(default_prefix*2 + "version", action="version", version="%(prog)s " + version) + + if self.add_help: + self.add_argument( + default_prefix + "h", default_prefix*2 + "help", action='help', + default=SUPPRESS, help=_('show this help message and exit')) + + def set_formatter_class(self, formatter_class : _t.Any) -> None: + self.formatter_class = formatter_class + if hasattr(self._subparsers, "_group_actions"): + for grp in self._subparsers._group_actions: # type: ignore + for choice, e in grp.choices.items(): # type: ignore + if e.formatter_class != formatter_class: + e.formatter_class = formatter_class + + def format_help(self, width : _t.Optional[int] = None) -> str: + if width is None: + import shutil + width = shutil.get_terminal_size().columns - 2 + formatter = self.formatter_class(prog=self.prog, width=width) + + formatter.add_usage(self.usage, self._actions, self._mutually_exclusive_groups) + formatter.add_text(self.description) + + if hasattr(self, "_action_groups"): + for action_group in self._action_groups: + formatter.start_section(action_group.title) + formatter.add_text(action_group.description) + formatter.add_arguments(action_group._group_actions) + formatter.end_section() + + res : str = "# " + formatter.format_help() + + if hasattr(self._subparsers, "_group_actions"): + seen = set() + for grp in self._subparsers._group_actions: # type: ignore + for choice, e in grp.choices.items(): # type: ignore + if e in seen: continue + seen.add(e) + e.formatter_class = self.formatter_class + res += "\n#" + e.format_help(width=width) + + for gen in self.additional_sections: + formatter = self.formatter_class(prog=self.prog, width=width) + gen(formatter) + res += "\n" + formatter.format_help() + + formatter.add_text(self.epilog) + + return res diff --git a/abarms/exceptions.py b/abarms/exceptions.py new file mode 100644 index 0000000..bdae339 --- /dev/null +++ b/abarms/exceptions.py @@ -0,0 +1,44 @@ +# This file is a part of kisstdlib project. +# +# This file can be distributed under the terms of the MIT-style license given +# below or Python Software Foundation License version 2 (PSF-2.0) as published +# by Python Software Foundation. +# +# Copyright (c) 2023 Jan Malakhovski +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +"""Exceptions with printable descriptions. +""" + +import typing as _t + +class CatastrophicFailure(Exception): + def __init__(self, what : str, *args : _t.Any) -> None: + super().__init__() + self.description = what % args + + def __str__(self) -> str: + return self.description + + def elaborate(self, what : str, *args : _t.Any) -> None: + self.description = what % args + ": " + self.description + +class Failure(CatastrophicFailure): + pass diff --git a/abarms/tariter.py b/abarms/tariter.py new file mode 100644 index 0000000..19199c5 --- /dev/null +++ b/abarms/tariter.py @@ -0,0 +1,250 @@ +# This file is a part of kisstdlib project. +# +# This file is a streaming/iterator version of Python's `tarfile`. +# I.e. you give it a file-like object, it returns an iterator. +# The file object will be read once, without seeking, which is not true for `tarfile`. +# +# Copyright (c) 2018-2024 Jan Malakhovski +# Copyright (c) 2002 Lars Gustaebel +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +import dataclasses as _dc +import typing as _t + +from .exceptions import * + +BUFFER_SIZE = 16 * 1024 ** 2 + +def nts(s : bytes, encoding : str, errors : str) -> str: + """Convert a null-terminated bytes object to a string. + """ + p = s.find(b"\0") + if p != -1: + s = s[:p] + return s.decode(encoding, errors) + +class ParsingError(Failure): pass +class InvalidHeader(ParsingError): pass + +def nti(s : bytes) -> int: + """Convert a number field to a python number. + """ + # There are two possible encodings for a number field, see + # itn() below. + if s[0] in (0o200, 0o377): + n = 0 + for i in range(len(s) - 1): + n <<= 8 + n += s[i + 1] + if s[0] == 0o377: + n = -(256 ** (len(s) - 1) - n) + else: + try: + ss = nts(s, "ascii", "strict") + n = int(ss.strip() or "0", 8) + except ValueError: + raise InvalidHeader("invalid TAR header") + return n + +@_dc.dataclass +class TarHeader: + """Informational class which holds the details about TAR file header. + """ + path : str + mode : int + uid : int + gid : int + size : int + leftovers : int + mtime : int + chksum : int + ftype : bytes + linkpath : str + uname : str + gname : str + devmajor : int + devminor : int + + raw : bytes + pax_headers : _t.Dict[str, _t.Any] + +def parse_pax_headers(data : bytes) -> _t.Dict[str, _t.Any]: + res = dict() + try: + while len(data) > 0: + size_, _ = data.split(b" ", 1) + size = int(size_) + if size < 1 or data[size-1:size] != b"\n": + raise ValueError() + pax_value = data[:size - 1] + data = data[size:] + + _, rest = pax_value.split(b" ", 1) + name, value = rest.split(b"=", 1) + res[name.decode("ascii", "strict")] = value + except ValueError: + raise InvalidHeader("invalid PAX header data") + return res + +def yield_tar_headers(fobj : _t.Any, encoding : str = "utf-8", errors : str = "surrogateescape") -> _t.Iterator[TarHeader]: + """Given a file-like object `fobj`, parse and yield the next TAR file header, + repeatedly. PAX headers will be parsed and skipped over and normal TAR + headers will be updated based the results, but for other header types + it's caller's responsibility to skip or seek over file data in `fobj` + before calling `next()` on this iterator. + """ + global_pax_headers = dict() + pax_headers = dict() + + empty = 0 + while True: + buf = fobj.read(512) + if len(buf) != 512: + raise ParsingError("unexpected EOF") + + path = nts(buf[0:100], encoding, errors) + size = nti(buf[124:136]) + + if path == "" and size == 0: + # empty header + empty += 1 + if empty >= 2: break + else: continue + + if buf[257:265] != b"ustar\x0000": + raise InvalidHeader("invalid TAR header, expecting UStar format") + + mode = nti(buf[100:108]) + uid = nti(buf[108:116]) + gid = nti(buf[116:124]) + mtime = nti(buf[136:148]) + chksum = nti(buf[148:156]) + ftype = buf[156:157] + linkpath = nts(buf[157:257], encoding, errors) + uname = nts(buf[265:297], encoding, errors) + gname = nts(buf[297:329], encoding, errors) + devmajor = nti(buf[329:337]) + devminor = nti(buf[337:345]) + prefix = nts(buf[345:500], encoding, errors) + + if prefix != "": + path = prefix + "/" + path + + if ftype == b"x" or ftype == b"g": + # parse and process PAX headers, see "pax Header Block" section in `man 1 pax` + leftovers = 0 + if size % 512 != 0: + leftovers = 512 - size % 512 + + pax_data = fobj.read(size) + if len(pax_data) != size: + raise ParsingError("unexpected EOF") + + pax_leftovers = fobj.read(leftovers) + if len(pax_leftovers) != leftovers: + raise ParsingError("unexpected EOF") + + pax_prefix = b"".join([buf, pax_data, pax_leftovers]) + parsed_headers = parse_pax_headers(pax_data) + del pax_data + del pax_leftovers + + yield TarHeader(path, mode, uid, gid, + 0, 0, + mtime, chksum, ftype, + linkpath, uname, gname, + devmajor, devminor, + pax_prefix, dict()) + + if ftype == b"g": + global_pax_headers = parsed_headers + pax_headers = global_pax_headers.copy() + else: + pax_headers = global_pax_headers.copy() + pax_headers.update(parsed_headers) + + try: + hcharset = pax_headers["hdrcharset"] + except KeyError: + charset = encoding + else: + if hcharset == b"ISO-IR 10646 2000 UTF-8": + charset = "utf-8" + elif hcharset == b"BINARY": + charset = encoding + else: + raise InvalidHeader("invalid PAX header data: unknown hdrcharset") + + for k in pax_headers: + v = pax_headers[k] + v_ : _t.Any + if k in ["path", "linkpath", "uname", "gname"]: + try: + v_ = v.decode(charset) + except UnicodeEncodeError: + raise InvalidHeader("invalid PAX header data: can't decode str") + elif k in ["size", "uid", "gid", "atime", "mtime"]: + try: + v_ = int(v.decode("ascii", "strict")) + except Exception: + raise InvalidHeader("invalid PAX header data: can't decode int") + else: + raise InvalidHeader("invalid PAX header data: unknown header `%s`", k) + pax_headers[k] = v_ + + continue + + # generate TAR header + header = TarHeader(path, mode, uid, gid, + size, 0, + mtime, chksum, ftype, + linkpath, uname, gname, + devmajor, devminor, + buf, pax_headers) + + # update values from pax_headers + for k, v in pax_headers.items(): + if hasattr(header, k): + setattr(header, k, v) + + # compute leftovers for size possibly updated from pax_headers + size = header.size + leftovers = 0 + if size % 512 != 0: + leftovers = 512 - size % 512 + header.leftovers = leftovers + + yield header + pax_headers = dict() + +def iter_tar_headers(fobj : _t.Any, encoding : str = "utf-8", errors : str = "surrogateescape") -> _t.Iterator[TarHeader]: + """Given a file-like object `fobj`, iterate over its parsed non-PAX TAR file + headers. File data will `read` and thrown out. + """ + for h in yield_tar_headers(fobj, encoding, errors): + ftype = h.ftype + if ftype not in [b"g", b"x"]: + yield h + fsize = h.size + h.leftovers + while fsize > 0: + data = fobj.read(min(fsize, BUFFER_SIZE)) + if len(data) == 0: + raise ParsingError("unexpected EOF") + fsize -= len(data) diff --git a/default.nix b/default.nix new file mode 100644 index 0000000..f88dd36 --- /dev/null +++ b/default.nix @@ -0,0 +1,41 @@ +{ pkgs ? import {} +, lib ? pkgs.lib +, debug ? false +}: + +with pkgs.python3Packages; + +buildPythonApplication (rec { + pname = "abarms"; + version = "1.0.0"; + format = "pyproject"; + + src = lib.cleanSourceWith { + src = ./.; + filter = name: type: let baseName = baseNameOf (toString name); in + lib.cleanSourceFilter name type + && (builtins.match ".*.un~" baseName == null) + && (baseName != "default.nix") + && (baseName != "dist") + && (baseName != "result") + && (baseName != "results") + && (baseName != "__pycache__") + && (baseName != ".mypy_cache") + && (baseName != ".pytest_cache") + && (baseName != "test-data") + ; + }; + + propagatedBuildInputs = [ + setuptools + cryptography + ]; + +} // lib.optionalAttrs debug { + nativeBuildInputs = [ + mypy + ]; + + preBuild = "find . ; mypy"; + postInstall = "find $out"; +}) diff --git a/mypy.ini b/mypy.ini new file mode 100644 index 0000000..a04166f --- /dev/null +++ b/mypy.ini @@ -0,0 +1,6 @@ +[mypy] +python_version=3.10 +strict=True +implicit_reexport = True +explicit_package_bases = True +files=*/*.py diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..4118ae2 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,34 @@ +[build-system] +requires = ["setuptools"] +build-backend = "setuptools.build_meta" +[project] +name = "abarms" +version = "1.0.0" +authors = [{ name = "Jan Malakhovski", email = "oxij@oxij.org" }] +description = "A handy Swiss-army-knife-like utility for manipulating Android Backup files (`*.ab`, `*.adb`) produced by `adb backup`, `bmgr`, and similar tools" +readme = "README.md" +license = { text = "GPL-3.0-or-later" } +classifiers = [ + "Development Status :: 4 - Beta", + "Programming Language :: Python :: 3", + "License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)", + "Intended Audience :: End Users/Desktop", + "Topic :: Security :: Cryptography", + "Topic :: System :: Archiving", + "Topic :: System :: Archiving :: Backup", + "Topic :: System :: Recovery Tools", + "Operating System :: POSIX", + "Environment :: Console", +] +keywords = [ + "android", "android backup", + "adb", "ab", + "adb backup", +] +dependencies = [ + 'importlib-metadata; python_version<"3.8"', + "cryptography", +] +requires-python = ">=3.7" +[project.scripts] +abarms = "abarms.__main__:main" diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..26de544 --- /dev/null +++ b/setup.py @@ -0,0 +1,3 @@ +#!/usr/bin/env python3 +from setuptools import setup +setup() diff --git a/update-readme.sh b/update-readme.sh new file mode 100755 index 0000000..d5ccfdf --- /dev/null +++ b/update-readme.sh @@ -0,0 +1,10 @@ +#!/bin/sh -e + +sed -n "0,/# Usage/ p" README.md > README.new +echo >> README.new +python3 -m abarms.__main__ --help --markdown | sed ' +s/^\(#\+\) /#\1 / +s/^\(#\+\) \(abarms[^A-Z[({]*\) [A-Z[({].*/\1 \2/ +' >> README.new +mv README.new README.md +pandoc -s -V pagetitle=README -f markdown -t html README.md > README.html