From ef5c12be5ea1e460e7e2bc9c2f9ab8d1026a47a0 Mon Sep 17 00:00:00 2001 From: Stu Hood Date: Thu, 30 Nov 2017 15:38:30 -0700 Subject: [PATCH] Utility to reversion whls (#5145) ### Problem After feedback in #5118, #4896 will now propose to build suffixed releases of pants in travis on relevant platforms, fetch them them to a releaser's machine, and then "stabilize"/"re-version" them with an unsuffixed version before sending them to pypi. AFAICT, there are no utilities for re-versioning wheels around (perhaps because it's not a great idea? *shrug). ### Solution Implement a tool for re-versioning whl files by find-replacing the version str in all files in the whl and then updating the `RECORD` file with new fingerprints. Because of the blind find-replace, this is useful for stabilizing from a version like `1.4.0.dev21+b9121c0c4` to `1.4.0.dev21`, but probably a bit risky for heading in the opposite direction. ### Result #5118 will be able to use a command like: ``` ./pants -q run src/python/pants/releases:reversion -- requests-2.18.4-py2.py3-none-any.whl ${dist} 17.4.0.dev21 ``` ...to re-version whls fetched from travis. I did some local testing to confirm that pip is able to install a re-versioned whl. --- src/python/pants/releases/BUILD | 10 ++ src/python/pants/releases/reversion.py | 159 ++++++++++++++++++ tests/python/pants_test/releases/BUILD | 12 ++ tests/python/pants_test/releases/__init__.py | 0 .../pants_test/releases/test_reversion.py | 60 +++++++ 5 files changed, 241 insertions(+) create mode 100644 src/python/pants/releases/BUILD create mode 100644 src/python/pants/releases/reversion.py create mode 100644 tests/python/pants_test/releases/BUILD create mode 100644 tests/python/pants_test/releases/__init__.py create mode 100644 tests/python/pants_test/releases/test_reversion.py diff --git a/src/python/pants/releases/BUILD b/src/python/pants/releases/BUILD new file mode 100644 index 00000000000..d40b081b6a0 --- /dev/null +++ b/src/python/pants/releases/BUILD @@ -0,0 +1,10 @@ +# Copyright 2017 Pants project contributors (see CONTRIBUTORS.md). +# Licensed under the Apache License, Version 2.0 (see LICENSE). + +python_binary( + name = 'reversion', + source = 'reversion.py', + dependencies = [ + 'src/python/pants/util:contextutil', + ], +) diff --git a/src/python/pants/releases/reversion.py b/src/python/pants/releases/reversion.py new file mode 100644 index 00000000000..2263d059880 --- /dev/null +++ b/src/python/pants/releases/reversion.py @@ -0,0 +1,159 @@ +# coding=utf-8 +# Copyright 2017 Pants project contributors (see CONTRIBUTORS.md). +# Licensed under the Apache License, Version 2.0 (see LICENSE). + +from __future__ import (absolute_import, division, generators, nested_scopes, print_function, + unicode_literals, with_statement) + +import argparse +import base64 +import fnmatch +import glob +import hashlib +import json +import os +import zipfile + +from pants.util.contextutil import open_zip, temporary_dir +from pants.util.dirutil import read_file, safe_file_dump + + +def replace_in_file(workspace, src_file_path, from_str, to_str): + """Replace from_str with to_str in the name and content of the given file. + + If any edits were necessary, returns the new filename (which may be the same as the old filename). + """ + from_bytes = from_str.encode('ascii') + to_bytes = to_str.encode('ascii') + data = read_file(os.path.join(workspace, src_file_path)) + if from_bytes not in data and from_str not in src_file_path: + return None + + dst_file_path = src_file_path.replace(from_str, to_str) + safe_file_dump(os.path.join(workspace, dst_file_path), data.replace(from_bytes, to_bytes)) + if src_file_path != dst_file_path: + os.unlink(os.path.join(workspace, src_file_path)) + return dst_file_path + + +def any_match(globs, filename): + return any(fnmatch.fnmatch(filename, g) for g in globs) + + +def locate_dist_info_dir(workspace): + dir_suffix = '*.dist-info' + matches = glob.glob(os.path.join(workspace, dir_suffix)) + if not matches: + raise Exception('Unable to locate `{}` directory in input whl.'.format(dir_suffix)) + if len(matches) > 1: + raise Exception('Too many `{}` directories in input whl: {}'.format(dir_suffix, matches)) + return os.path.relpath(matches[0], workspace) + + +def fingerprint_file(workspace, filename): + """Given a relative filename located in a workspace, fingerprint the file. + + Returns a tuple of fingerprint string and size string. + """ + content = read_file(os.path.join(workspace, filename)) + fingerprint = hashlib.sha256(content) + return 'sha256={}'.format(base64.b64encode(fingerprint.digest())), str(len(content)) + + +def rewrite_record_file(workspace, src_record_file, mutated_file_tuples): + """Given a RECORD file and list of mutated file tuples, update the RECORD file in place. + + The RECORD file should always be a member of the mutated files, due to both containing + versions, and having a version in its filename. + """ + mutated_files = set() + dst_record_file = None + for src, dst in mutated_file_tuples: + if src == src_record_file: + dst_record_file = dst + else: + mutated_files.add(dst) + if not dst_record_file: + raise Exception('Malformed whl or bad globs: `{}` was not rewritten.'.format(src_record_file)) + + output_records = [] + for line in read_file(os.path.join(workspace, dst_record_file)).splitlines(): + filename, fingerprint_str, size_str = line.rsplit(',', 3) + if filename in mutated_files: + fingerprint_str, size_str = fingerprint_file(workspace, filename) + output_line = ','.join((filename, fingerprint_str, size_str)) + else: + output_line = line + output_records.append(output_line) + + safe_file_dump(os.path.join(workspace, dst_record_file), '\r\n'.join(output_records) + '\r\n') + + +def reversion(args): + with temporary_dir() as workspace: + # Extract the input. + with open_zip(args.whl_file, 'r') as whl: + src_filenames = whl.namelist() + whl.extractall(workspace) + + # Determine the location of the `dist-info` directory. + dist_info_dir = locate_dist_info_dir(workspace) + record_file = os.path.join(dist_info_dir, 'RECORD') + + # Load metadata for the input whl. + with open(os.path.join(workspace, dist_info_dir, 'metadata.json'), 'r') as info: + metadata = json.load(info) + input_version = metadata['version'] + + # Rewrite and move all files (including the RECORD file), recording which files need to be + # re-fingerprinted due to content changes. + dst_filenames = [] + refingerprint = [] + for src_filename in src_filenames: + if os.path.isdir(os.path.join(workspace, src_filename)): + continue + dst_filename = src_filename + if any_match(args.glob, src_filename): + rewritten = replace_in_file(workspace, src_filename, input_version, args.target_version) + if rewritten is not None: + dst_filename = rewritten + refingerprint.append((src_filename, dst_filename)) + dst_filenames.append(dst_filename) + + # Refingerprint relevant entries in the RECORD file under their new names. + rewrite_record_file(workspace, record_file, refingerprint) + + # Create a new output whl in the destination. + dst_whl_filename = os.path.basename(args.whl_file).replace(input_version, args.target_version) + dst_whl_file = os.path.join(args.dest_dir, dst_whl_filename) + with open_zip(dst_whl_file, 'w', zipfile.ZIP_DEFLATED) as whl: + for dst_filename in dst_filenames: + whl.write(os.path.join(workspace, dst_filename), dst_filename) + + print('Wrote whl with version {} to {}.\n'.format(args.target_version, dst_whl_file)) + + +def main(): + """Given an input whl file and target version, create a copy of the whl with that version. + + This is accomplished via string replacement in files matching a list of globs. Pass the + optional `--glob` argument to add additional globs: ie `--glob='thing-to-match*.txt'`. + """ + parser = argparse.ArgumentParser() + parser.add_argument('whl_file', + help='The input whl file.') + parser.add_argument('dest_dir', + help='The destination directory for the output whl.') + parser.add_argument('target_version', + help='The target version of the output whl.') + parser.add_argument('--glob', action='append', + default=[ + '*.dist-info/*', + '*-nspkg.pth', + ], + help='Globs (fnmatch) to rewrite within the whl: may be specified multiple times.') + args = parser.parse_args() + reversion(args) + +if __name__ == '__main__': + main() diff --git a/tests/python/pants_test/releases/BUILD b/tests/python/pants_test/releases/BUILD new file mode 100644 index 00000000000..3bd2f58d867 --- /dev/null +++ b/tests/python/pants_test/releases/BUILD @@ -0,0 +1,12 @@ +# Copyright 2017 Pants project contributors (see CONTRIBUTORS.md). +# Licensed under the Apache License, Version 2.0 (see LICENSE). + +python_tests( + sources = ['test_reversion.py'], + dependencies = [ + '3rdparty/python:pex', + '3rdparty/python:requests', + 'tests/python/pants_test:int-test' + ], + tags = {'integration'}, +) diff --git a/tests/python/pants_test/releases/__init__.py b/tests/python/pants_test/releases/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/python/pants_test/releases/test_reversion.py b/tests/python/pants_test/releases/test_reversion.py new file mode 100644 index 00000000000..586b55f7c43 --- /dev/null +++ b/tests/python/pants_test/releases/test_reversion.py @@ -0,0 +1,60 @@ +# coding=utf-8 +# Copyright 2017 Pants project contributors (see CONTRIBUTORS.md). +# Licensed under the Apache License, Version 2.0 (see LICENSE). + +from __future__ import (absolute_import, division, generators, nested_scopes, print_function, + unicode_literals, with_statement) + +import os +import shutil + +import requests +from pex.bin import pex as pex_main + +from pants.util.contextutil import temporary_dir +from pants_test.pants_run_integration_test import PantsRunIntegrationTest + + +class ReversionTest(PantsRunIntegrationTest): + + def test_run(self): + with temporary_dir() as dest_dir: + # Download an input whl. + # TODO: Not happy about downloading things. Attempted to: + # ./pants setup-py --run="bdist_wheel" $target + # but was unable to locate the output whl in the context of a test (perhaps due to + # mismatched cwd?) + name_template ='virtualenv-{}-py2.py3-none-any.whl' + input_name = name_template.format('15.1.0') + url = ( + 'https://files.pythonhosted.org/packages/6f/86/' + '3dc328ee7b1a6419ebfac7896d882fba83c48e3561d22ddddf38294d3e83/{}'.format(input_name) + ) + input_whl_file = os.path.join(dest_dir, input_name) + with open(input_whl_file, 'wb') as f: + shutil.copyfileobj(requests.get(url, stream=True).raw, f) + + # Rewrite it. + output_version = '9.1.9' + output_name = name_template.format(output_version) + output_whl_file = os.path.join(dest_dir, output_name) + command = [ + 'run', + '-q', + 'src/python/pants/releases:reversion', + '--', + input_whl_file, + dest_dir, + output_version, + ] + self.assert_success(self.run_pants(command)) + self.assertTrue(os.path.isfile(output_whl_file)) + + # Confirm that it can be consumed. + output_pex_file = os.path.join(dest_dir, 'out.pex') + pex_main.main([ + '--disable-cache', + '-o', output_pex_file, + output_whl_file, + ]) + self.assertTrue(os.path.isfile(output_pex_file))