Skip to content

Commit

Permalink
Utility to reversion whls (#5145)
Browse files Browse the repository at this point in the history
### Problem

After feedback in #5118, #4896 will now propose to build suffixed releases of pants in travis on relevant platforms, fetch them them to a releaser's machine, and then "stabilize"/"re-version" them with an unsuffixed version before sending them to pypi.

AFAICT, there are no utilities for re-versioning wheels around (perhaps because it's not a great idea? *shrug).

### Solution

Implement a tool for re-versioning whl files by find-replacing the version str in all files in the whl and then updating the `RECORD` file with new fingerprints. Because of the blind find-replace, this is useful for stabilizing from a version like `1.4.0.dev21+b9121c0c4` to `1.4.0.dev21`, but probably a bit risky for heading in the opposite direction.

### Result

#5118 will be able to use a command like:
```
./pants -q run src/python/pants/releases:reversion -- requests-2.18.4-py2.py3-none-any.whl ${dist} 17.4.0.dev21
```
...to re-version whls fetched from travis.

I did some local testing to confirm that pip is able to install a re-versioned whl.
  • Loading branch information
Stu Hood authored Nov 30, 2017
1 parent c4e1cd6 commit ef5c12b
Show file tree
Hide file tree
Showing 5 changed files with 241 additions and 0 deletions.
10 changes: 10 additions & 0 deletions src/python/pants/releases/BUILD
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# Copyright 2017 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).

python_binary(
name = 'reversion',
source = 'reversion.py',
dependencies = [
'src/python/pants/util:contextutil',
],
)
159 changes: 159 additions & 0 deletions src/python/pants/releases/reversion.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
# coding=utf-8
# Copyright 2017 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).

from __future__ import (absolute_import, division, generators, nested_scopes, print_function,
unicode_literals, with_statement)

import argparse
import base64
import fnmatch
import glob
import hashlib
import json
import os
import zipfile

from pants.util.contextutil import open_zip, temporary_dir
from pants.util.dirutil import read_file, safe_file_dump


def replace_in_file(workspace, src_file_path, from_str, to_str):
"""Replace from_str with to_str in the name and content of the given file.
If any edits were necessary, returns the new filename (which may be the same as the old filename).
"""
from_bytes = from_str.encode('ascii')
to_bytes = to_str.encode('ascii')
data = read_file(os.path.join(workspace, src_file_path))
if from_bytes not in data and from_str not in src_file_path:
return None

dst_file_path = src_file_path.replace(from_str, to_str)
safe_file_dump(os.path.join(workspace, dst_file_path), data.replace(from_bytes, to_bytes))
if src_file_path != dst_file_path:
os.unlink(os.path.join(workspace, src_file_path))
return dst_file_path


def any_match(globs, filename):
return any(fnmatch.fnmatch(filename, g) for g in globs)


def locate_dist_info_dir(workspace):
dir_suffix = '*.dist-info'
matches = glob.glob(os.path.join(workspace, dir_suffix))
if not matches:
raise Exception('Unable to locate `{}` directory in input whl.'.format(dir_suffix))
if len(matches) > 1:
raise Exception('Too many `{}` directories in input whl: {}'.format(dir_suffix, matches))
return os.path.relpath(matches[0], workspace)


def fingerprint_file(workspace, filename):
"""Given a relative filename located in a workspace, fingerprint the file.
Returns a tuple of fingerprint string and size string.
"""
content = read_file(os.path.join(workspace, filename))
fingerprint = hashlib.sha256(content)
return 'sha256={}'.format(base64.b64encode(fingerprint.digest())), str(len(content))


def rewrite_record_file(workspace, src_record_file, mutated_file_tuples):
"""Given a RECORD file and list of mutated file tuples, update the RECORD file in place.
The RECORD file should always be a member of the mutated files, due to both containing
versions, and having a version in its filename.
"""
mutated_files = set()
dst_record_file = None
for src, dst in mutated_file_tuples:
if src == src_record_file:
dst_record_file = dst
else:
mutated_files.add(dst)
if not dst_record_file:
raise Exception('Malformed whl or bad globs: `{}` was not rewritten.'.format(src_record_file))

output_records = []
for line in read_file(os.path.join(workspace, dst_record_file)).splitlines():
filename, fingerprint_str, size_str = line.rsplit(',', 3)
if filename in mutated_files:
fingerprint_str, size_str = fingerprint_file(workspace, filename)
output_line = ','.join((filename, fingerprint_str, size_str))
else:
output_line = line
output_records.append(output_line)

safe_file_dump(os.path.join(workspace, dst_record_file), '\r\n'.join(output_records) + '\r\n')


def reversion(args):
with temporary_dir() as workspace:
# Extract the input.
with open_zip(args.whl_file, 'r') as whl:
src_filenames = whl.namelist()
whl.extractall(workspace)

# Determine the location of the `dist-info` directory.
dist_info_dir = locate_dist_info_dir(workspace)
record_file = os.path.join(dist_info_dir, 'RECORD')

# Load metadata for the input whl.
with open(os.path.join(workspace, dist_info_dir, 'metadata.json'), 'r') as info:
metadata = json.load(info)
input_version = metadata['version']

# Rewrite and move all files (including the RECORD file), recording which files need to be
# re-fingerprinted due to content changes.
dst_filenames = []
refingerprint = []
for src_filename in src_filenames:
if os.path.isdir(os.path.join(workspace, src_filename)):
continue
dst_filename = src_filename
if any_match(args.glob, src_filename):
rewritten = replace_in_file(workspace, src_filename, input_version, args.target_version)
if rewritten is not None:
dst_filename = rewritten
refingerprint.append((src_filename, dst_filename))
dst_filenames.append(dst_filename)

# Refingerprint relevant entries in the RECORD file under their new names.
rewrite_record_file(workspace, record_file, refingerprint)

# Create a new output whl in the destination.
dst_whl_filename = os.path.basename(args.whl_file).replace(input_version, args.target_version)
dst_whl_file = os.path.join(args.dest_dir, dst_whl_filename)
with open_zip(dst_whl_file, 'w', zipfile.ZIP_DEFLATED) as whl:
for dst_filename in dst_filenames:
whl.write(os.path.join(workspace, dst_filename), dst_filename)

print('Wrote whl with version {} to {}.\n'.format(args.target_version, dst_whl_file))


def main():
"""Given an input whl file and target version, create a copy of the whl with that version.
This is accomplished via string replacement in files matching a list of globs. Pass the
optional `--glob` argument to add additional globs: ie `--glob='thing-to-match*.txt'`.
"""
parser = argparse.ArgumentParser()
parser.add_argument('whl_file',
help='The input whl file.')
parser.add_argument('dest_dir',
help='The destination directory for the output whl.')
parser.add_argument('target_version',
help='The target version of the output whl.')
parser.add_argument('--glob', action='append',
default=[
'*.dist-info/*',
'*-nspkg.pth',
],
help='Globs (fnmatch) to rewrite within the whl: may be specified multiple times.')
args = parser.parse_args()
reversion(args)

if __name__ == '__main__':
main()
12 changes: 12 additions & 0 deletions tests/python/pants_test/releases/BUILD
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# Copyright 2017 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).

python_tests(
sources = ['test_reversion.py'],
dependencies = [
'3rdparty/python:pex',
'3rdparty/python:requests',
'tests/python/pants_test:int-test'
],
tags = {'integration'},
)
Empty file.
60 changes: 60 additions & 0 deletions tests/python/pants_test/releases/test_reversion.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
# coding=utf-8
# Copyright 2017 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).

from __future__ import (absolute_import, division, generators, nested_scopes, print_function,
unicode_literals, with_statement)

import os
import shutil

import requests
from pex.bin import pex as pex_main

from pants.util.contextutil import temporary_dir
from pants_test.pants_run_integration_test import PantsRunIntegrationTest


class ReversionTest(PantsRunIntegrationTest):

def test_run(self):
with temporary_dir() as dest_dir:
# Download an input whl.
# TODO: Not happy about downloading things. Attempted to:
# ./pants setup-py --run="bdist_wheel" $target
# but was unable to locate the output whl in the context of a test (perhaps due to
# mismatched cwd?)
name_template ='virtualenv-{}-py2.py3-none-any.whl'
input_name = name_template.format('15.1.0')
url = (
'https://files.pythonhosted.org/packages/6f/86/'
'3dc328ee7b1a6419ebfac7896d882fba83c48e3561d22ddddf38294d3e83/{}'.format(input_name)
)
input_whl_file = os.path.join(dest_dir, input_name)
with open(input_whl_file, 'wb') as f:
shutil.copyfileobj(requests.get(url, stream=True).raw, f)

# Rewrite it.
output_version = '9.1.9'
output_name = name_template.format(output_version)
output_whl_file = os.path.join(dest_dir, output_name)
command = [
'run',
'-q',
'src/python/pants/releases:reversion',
'--',
input_whl_file,
dest_dir,
output_version,
]
self.assert_success(self.run_pants(command))
self.assertTrue(os.path.isfile(output_whl_file))

# Confirm that it can be consumed.
output_pex_file = os.path.join(dest_dir, 'out.pex')
pex_main.main([
'--disable-cache',
'-o', output_pex_file,
output_whl_file,
])
self.assertTrue(os.path.isfile(output_pex_file))

0 comments on commit ef5c12b

Please sign in to comment.