Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Major improvements to spack create #2707

Merged
merged 17 commits into from
Jan 17, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
297 changes: 140 additions & 157 deletions lib/spack/docs/packaging_guide.rst

Large diffs are not rendered by default.

138 changes: 90 additions & 48 deletions lib/spack/spack/cmd/checksum.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
# License along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
##############################################################################
from __future__ import print_function

import argparse
import hashlib

Expand All @@ -30,93 +32,133 @@
import spack.cmd
import spack.util.crypto
from spack.stage import Stage, FailedDownloadError
from spack.util.naming import *
from spack.version import *

description = "Checksum available versions of a package."


def setup_parser(subparser):
subparser.add_argument(
'package', metavar='PACKAGE', help='Package to list versions for')
'package',
help='Package to checksum versions for')
subparser.add_argument(
'--keep-stage', action='store_true', dest='keep_stage',
'--keep-stage', action='store_true',
help="Don't clean up staging area when command completes.")
subparser.add_argument(
'versions', nargs=argparse.REMAINDER,
help='Versions to generate checksums for')


def get_checksums(versions, urls, **kwargs):
# Allow commands like create() to do some analysis on the first
# archive after it is downloaded.
def get_checksums(url_dict, name, **kwargs):
"""Fetches and checksums archives from URLs.

This function is called by both ``spack checksum`` and ``spack create``.
The ``first_stage_function`` kwarg allows ``spack create`` to determine
things like the build system of the archive.

:param dict url_dict: A dictionary of the form: version -> URL
:param str name: The name of the package
:param callable first_stage_function: Function to run on first staging area
:param bool keep_stage: Don't clean up staging area when command completes

:returns: A multi-line string containing versions and corresponding hashes
:rtype: str
"""
first_stage_function = kwargs.get('first_stage_function', None)
keep_stage = kwargs.get('keep_stage', False)

sorted_versions = sorted(url_dict.keys(), reverse=True)

# Find length of longest string in the list for padding
max_len = max(len(str(v)) for v in sorted_versions)
num_ver = len(sorted_versions)

tty.msg("Found {0} version{1} of {2}:".format(
num_ver, '' if num_ver == 1 else 's', name),
"",
*spack.cmd.elide_list(
["{0:{1}} {2}".format(v, max_len, url_dict[v])
for v in sorted_versions]))
print()

archives_to_fetch = tty.get_number(
"How many would you like to checksum?", default=1, abort='q')

if not archives_to_fetch:
tty.die("Aborted.")

versions = sorted_versions[:archives_to_fetch]
urls = [url_dict[v] for v in versions]

tty.msg("Downloading...")
hashes = []
version_hashes = []
i = 0
for url, version in zip(urls, versions):
try:
with Stage(url, keep=keep_stage) as stage:
# Fetch the archive
stage.fetch()
if i == 0 and first_stage_function:
# Only run first_stage_function the first time,
# no need to run it every time
first_stage_function(stage, url)

hashes.append((version, spack.util.crypto.checksum(
# Checksum the archive and add it to the list
version_hashes.append((version, spack.util.crypto.checksum(
hashlib.md5, stage.archive_file)))
i += 1
except FailedDownloadError as e:
tty.msg("Failed to fetch %s" % url)
except FailedDownloadError:
tty.msg("Failed to fetch {0}".format(url))
except Exception as e:
tty.msg('Something failed on %s, skipping.\n (%s)' % (url, e))
tty.msg("Something failed on {0}, skipping.".format(url),
" ({0})".format(e))

return hashes
if not version_hashes:
tty.die("Could not fetch any versions for {0}".format(name))

# Find length of longest string in the list for padding
max_len = max(len(str(v)) for v, h in version_hashes)

# Generate the version directives to put in a package.py
version_lines = "\n".join([
" version('{0}', {1}'{2}')".format(
v, ' ' * (max_len - len(str(v))), h) for v, h in version_hashes
])
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Previously, spack checksum tau would result in:

    version('2.26', '2af91f02ad26d5bf0954146c56a8cdfa')
    version('2.25.2', 'f5e542d41eb4a7daa6241e5472f49fd7')
    version('2.25.1.1', 'f2baae27c5c024937566f33339826d7c')
    version('2.25.1', 'b9783f9bbe2862254bfdd208735241b6')
    version('2.25', '46cd48fa3f3c4ce0197017b3158a2b43')

while

$ spack create --force https://www.cs.uoregon.edu/research/tau/tau_releases/tau-2.25.tar.gz

would result in:

    version('2.26'    , '2af91f02ad26d5bf0954146c56a8cdfa')
    version('2.25.2'  , 'f5e542d41eb4a7daa6241e5472f49fd7')
    version('2.25.1.1', 'f2baae27c5c024937566f33339826d7c')
    version('2.25.1'  , 'b9783f9bbe2862254bfdd208735241b6')
    version('2.25'    , '46cd48fa3f3c4ce0197017b3158a2b43')

The latter isn't even PEP 8 compliant, and would raise problems during the flake8 tests. Now, both result in:

    version('2.26',     '2af91f02ad26d5bf0954146c56a8cdfa')
    version('2.25.2',   'f5e542d41eb4a7daa6241e5472f49fd7')
    version('2.25.1.1', 'f2baae27c5c024937566f33339826d7c')
    version('2.25.1',   'b9783f9bbe2862254bfdd208735241b6')
    version('2.25',     '46cd48fa3f3c4ce0197017b3158a2b43')


num_hash = len(version_hashes)
tty.msg("Checksummed {0} version{1} of {2}".format(
num_hash, '' if num_hash == 1 else 's', name))

return version_lines


def checksum(parser, args):
# get the package we're going to generate checksums for
# Make sure the user provided a package and not a URL
if not valid_fully_qualified_module_name(args.package):
tty.die("`spack checksum` accepts package names, not URLs. "
"Use `spack md5 <url>` instead.")
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I accidentally do this all the time.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you know what the user wants, why not just do it instead of quitting with an error message? Do we need spack md5 at all, or can its functionality be folded into spack checksum?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does it make sense to rename spack checksum? We will not always use md5, and it would be nice if spack checksum gave you whatever the current recommended checksum of a file/url was. Should spack checksum be called spack find-new-versions or spack-spider-versions or something?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You mean rename spack md5 or rename spack checksum? I don't have any particular preference. Let's leave that for another PR.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I mean:

  1. spack checksum -> spack find-new-versions or some better name.
  2. spack md5 -> spack checksum (and it might spit out a sha256)

Another PR sounds good.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, there's a lot of overlap between spack versions/checksum/md5. We can work on that during the switch to sha256 or whatever we decide on.


# Get the package we're going to generate checksums for
pkg = spack.repo.get(args.package)

# If the user asked for specific versions, use those.
if args.versions:
versions = {}
# If the user asked for specific versions, use those
url_dict = {}
for version in args.versions:
version = ver(version)
if not isinstance(version, Version):
tty.die("Cannot generate checksums for version lists or " +
"version ranges. Use unambiguous versions.")
versions[version] = pkg.url_for_version(version)
tty.die("Cannot generate checksums for version lists or "
"version ranges. Use unambiguous versions.")
url_dict[version] = pkg.url_for_version(version)
else:
versions = pkg.fetch_remote_versions()
if not versions:
tty.die("Could not fetch any versions for %s" % pkg.name)

sorted_versions = sorted(versions, reverse=True)

# Find length of longest string in the list for padding
maxlen = max(len(str(v)) for v in versions)
# Otherwise, see what versions we can find online
url_dict = pkg.fetch_remote_versions()
if not url_dict:
tty.die("Could not find any versions for {0}".format(pkg.name))

tty.msg("Found %s versions of %s" % (len(versions), pkg.name),
*spack.cmd.elide_list(
["{0:{1}} {2}".format(v, maxlen, versions[v])
for v in sorted_versions]))
print
archives_to_fetch = tty.get_number(
"How many would you like to checksum?", default=5, abort='q')

if not archives_to_fetch:
tty.msg("Aborted.")
return

version_hashes = get_checksums(
sorted_versions[:archives_to_fetch],
[versions[v] for v in sorted_versions[:archives_to_fetch]],
keep_stage=args.keep_stage)

if not version_hashes:
tty.die("Could not fetch any versions for %s" % pkg.name)
version_lines = get_checksums(
url_dict, pkg.name, keep_stage=args.keep_stage)

version_lines = [
" version('%s', '%s')" % (v, h) for v, h in version_hashes
]
tty.msg("Checksummed new versions of %s:" % pkg.name, *version_lines)
print()
print(version_lines)
Loading