From eee444fadf80a90c3958a1dfc9e8c7f2cc44e69c Mon Sep 17 00:00:00 2001 From: Maurits van Rees Date: Fri, 4 Oct 2013 19:17:04 +0200 Subject: [PATCH 01/16] Read the existing MANIFEST.in file for files to ignore. --- check_manifest.py | 44 ++++++++++++++++++++++++++++++++++++++- tests.py | 52 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 95 insertions(+), 1 deletion(-) diff --git a/check_manifest.py b/check_manifest.py index 702a57e..3b4ae04 100755 --- a/check_manifest.py +++ b/check_manifest.py @@ -394,13 +394,54 @@ def read_config(): IGNORE.extend(p for p in patterns if p) +def read_manifest(): + """Read existing configuration from MANIFEST.in. + + We use that to ignore anything the MANIFEST.in ignores. + """ + # XXX modifies global state, which is kind of evil + if not os.path.isfile('MANIFEST.in'): + return + contents = open('MANIFEST.in').read() + IGNORE.extend(_get_ignore_from_manifest(contents)) + + +def _get_ignore_from_manifest(contents): + # Gather the various ignore patterns from MANIFEST.in. + # 'contents' should be a string, which may contain newlines. + ignore = [] + for line in contents.splitlines(): + if line.startswith('exclude '): + rest = line[len('exclude '):].strip().split() + # TODO: *.cfg must only match in the top level directory, + # otherwise the user should have used global-exclude. + ignore.extend(rest) + elif line.startswith('global-exclude '): + rest = line[len('global-exclude '):].strip().split() + ignore.extend(rest) + elif line.startswith('recursive-exclude '): + rest = line[len('recursive-exclude '):].strip() + dirname, patterns = rest.split(' ', 1) + for pattern in patterns.strip().split(): + ignore.append(dirname + os.path.sep + pattern) + elif line.startswith('prune '): + dirname = line[len('prune '):].strip() + ignore.append(dirname) + ignore.append(dirname + os.path.sep + '*') + return ignore + + def file_matches(filename, patterns): """Does this filename match any of the patterns?""" return any(fnmatch.fnmatch(filename, pat) for pat in patterns) def strip_sdist_extras(filelist): - """Strip generated files that are only present in source distributions.""" + """Strip generated files that are only present in source distributions. + + We also strip files that are ignored for other reasons, like + command line arguments, setup.cfg rules or MANIFEST.in rules. + """ return [name for name in filelist if not file_matches(name, IGNORE)] @@ -451,6 +492,7 @@ def check_manifest(source_tree='.', create=False, update=False, if not is_package(source_tree): raise Failure('This is not a Python project (no setup.py).') read_config() + read_manifest() info_begin("listing source files under version control") all_source_files = sorted(get_vcs_files()) source_files = strip_sdist_extras(all_source_files) diff --git a/tests.py b/tests.py index 9d6c078..e0b5a73 100644 --- a/tests.py +++ b/tests.py @@ -137,6 +137,58 @@ def test_find_suggestions_generic_fallback_rules(self): self.assertEqual(find_suggestions(['src/id-lang.map']), (['recursive-include src *.map'], [])) + def test_get_ignore_from_manifest(self): + from check_manifest import _get_ignore_from_manifest as parse + self.assertEqual(parse(''), []) + self.assertEqual(parse(' \n '), []) + # TODO: exclude and global-exclude are treated equally at the + # moment, which is wrong. + self.assertEqual(parse('exclude *.cfg'), ['*.cfg']) + self.assertEqual(parse('#exclude *.cfg'), []) + self.assertEqual(parse('exclude *.cfg'), + ['*.cfg']) + self.assertEqual(parse('exclude *.cfg foo.* bar.txt'), + ['*.cfg', 'foo.*', 'bar.txt']) + self.assertEqual(parse('include *.cfg'), []) + self.assertEqual(parse('global-exclude *.pyc'), + ['*.pyc']) + self.assertEqual(parse('global-exclude *.pyc *.sh'), + ['*.pyc', '*.sh']) + self.assertEqual(parse('recursive-exclude dir *.pyc'), + ['dir/*.pyc']) + self.assertEqual(parse('recursive-exclude dir *.pyc *.sh'), + ['dir/*.pyc', 'dir/*.sh']) + self.assertEqual(parse('prune dir'), + ['dir', 'dir/*']) + text = """ +#exclude *.01 +exclude *.02 +exclude *.03 04.* bar.txt +exclude *.05 +global-exclude *.10 *.11 +global-exclude *.12 +include *.20 +prune 30 +recursive-exclude 40 *.41 +recursive-exclude 42 *.43 44.* +""" + self.assertEqual( + parse(text), + ['*.02', + '*.03', + '04.*', + 'bar.txt', + '*.05', + '*.10', + '*.11', + '*.12', + '30', + '30/*', + '40/*.41', + '42/*.43', + '42/44.*', + ]) + class VCSMixin(object): From 95239769fb118398c1ac54681f7f1bd1eed7bf59 Mon Sep 17 00:00:00 2001 From: Maurits van Rees Date: Fri, 4 Oct 2013 22:50:30 +0200 Subject: [PATCH 02/16] Add list for ignoring regular expressions. Use this to support ignoring patterns that are in MANIFEST.in with the 'exclude' keyword. --- check_manifest.py | 43 ++++++++++++++++++++++++++++++++-------- tests.py | 50 +++++++++++++++++++++++++++++------------------ 2 files changed, 66 insertions(+), 27 deletions(-) diff --git a/check_manifest.py b/check_manifest.py index 3b4ae04..36b0f91 100755 --- a/check_manifest.py +++ b/check_manifest.py @@ -350,6 +350,12 @@ def add_directories(names): '*.mo', ] +IGNORE_REGEXPS = [ + # Regular expressions for filename to ignore. This is useful for + # filename patterns where the '*' part must not search in + # directories. + ] + WARN_ABOUT_FILES_IN_VCS = [ # generated files should not be committed into the VCS 'PKG-INFO', @@ -403,19 +409,34 @@ def read_manifest(): if not os.path.isfile('MANIFEST.in'): return contents = open('MANIFEST.in').read() - IGNORE.extend(_get_ignore_from_manifest(contents)) + ignore, ignore_regexps = _get_ignore_from_manifest(contents) + IGNORE.extend(ignore) + IGNORE_REGEXPS.extend(ignore_regexps) def _get_ignore_from_manifest(contents): - # Gather the various ignore patterns from MANIFEST.in. - # 'contents' should be a string, which may contain newlines. + """Gather the various ignore patterns from MANIFEST.in. + + 'contents' should be a string, which may contain newlines. + + Returns a list of standard ignore patterns and a list of regular + expressions to ignore. + """ ignore = [] + ignore_regexps = [] for line in contents.splitlines(): if line.startswith('exclude '): + # An exclude of 'dirname/*css' can match 'dirname/foo.css' + # but not 'dirname/subdir/bar.css'. We need a regular + # expression for that. rest = line[len('exclude '):].strip().split() - # TODO: *.cfg must only match in the top level directory, - # otherwise the user should have used global-exclude. - ignore.extend(rest) + for pat in rest: + if '*' in pat: + pat = pat.replace('*', '[^/]*') + ignore_regexps.append(pat) + else: + # No need for special handling. + ignore.append(pat) elif line.startswith('global-exclude '): rest = line[len('global-exclude '):].strip().split() ignore.extend(rest) @@ -428,7 +449,7 @@ def _get_ignore_from_manifest(contents): dirname = line[len('prune '):].strip() ignore.append(dirname) ignore.append(dirname + os.path.sep + '*') - return ignore + return ignore, ignore_regexps def file_matches(filename, patterns): @@ -436,6 +457,11 @@ def file_matches(filename, patterns): return any(fnmatch.fnmatch(filename, pat) for pat in patterns) +def file_matches_regexps(filename, patterns): + """Does this filename match any of the regular expressions?""" + return any(re.match(pat, filename) for pat in patterns) + + def strip_sdist_extras(filelist): """Strip generated files that are only present in source distributions. @@ -443,7 +469,8 @@ def strip_sdist_extras(filelist): command line arguments, setup.cfg rules or MANIFEST.in rules. """ return [name for name in filelist - if not file_matches(name, IGNORE)] + if not file_matches(name, IGNORE) + and not file_matches_regexps(name, IGNORE_REGEXPS)] def find_bad_ideas(filelist): diff --git a/tests.py b/tests.py index e0b5a73..4b31697 100644 --- a/tests.py +++ b/tests.py @@ -139,32 +139,40 @@ def test_find_suggestions_generic_fallback_rules(self): def test_get_ignore_from_manifest(self): from check_manifest import _get_ignore_from_manifest as parse - self.assertEqual(parse(''), []) - self.assertEqual(parse(' \n '), []) - # TODO: exclude and global-exclude are treated equally at the - # moment, which is wrong. - self.assertEqual(parse('exclude *.cfg'), ['*.cfg']) - self.assertEqual(parse('#exclude *.cfg'), []) + # The return value is a tuple with two lists: + # ([], []) + self.assertEqual(parse(''), + ([], [])) + self.assertEqual(parse(' \n '), + ([], [])) + self.assertEqual(parse('exclude *.cfg'), + ([], ['[^/]*.cfg'])) + self.assertEqual(parse('#exclude *.cfg'), + ([], [])) self.assertEqual(parse('exclude *.cfg'), - ['*.cfg']) + ([], ['[^/]*.cfg'])) self.assertEqual(parse('exclude *.cfg foo.* bar.txt'), - ['*.cfg', 'foo.*', 'bar.txt']) - self.assertEqual(parse('include *.cfg'), []) + (['bar.txt'], ['[^/]*.cfg', 'foo.[^/]*'])) + self.assertEqual(parse('exclude some/directory/*.cfg'), + ([], ['some/directory/[^/]*.cfg'])) + self.assertEqual(parse('include *.cfg'), + ([], [])) self.assertEqual(parse('global-exclude *.pyc'), - ['*.pyc']) + (['*.pyc'], [])) self.assertEqual(parse('global-exclude *.pyc *.sh'), - ['*.pyc', '*.sh']) + (['*.pyc', '*.sh'], [])) self.assertEqual(parse('recursive-exclude dir *.pyc'), - ['dir/*.pyc']) + (['dir/*.pyc'], [])) self.assertEqual(parse('recursive-exclude dir *.pyc *.sh'), - ['dir/*.pyc', 'dir/*.sh']) + (['dir/*.pyc', 'dir/*.sh'], [])) self.assertEqual(parse('prune dir'), - ['dir', 'dir/*']) + (['dir', 'dir/*'], [])) text = """ #exclude *.01 exclude *.02 exclude *.03 04.* bar.txt exclude *.05 +exclude some/directory/*.cfg global-exclude *.10 *.11 global-exclude *.12 include *.20 @@ -174,11 +182,8 @@ def test_get_ignore_from_manifest(self): """ self.assertEqual( parse(text), - ['*.02', - '*.03', - '04.*', + ([ 'bar.txt', - '*.05', '*.10', '*.11', '*.12', @@ -187,7 +192,14 @@ def test_get_ignore_from_manifest(self): '40/*.41', '42/*.43', '42/44.*', - ]) + ], + [ + '[^/]*.02', + '[^/]*.03', + '04.[^/]*', + '[^/]*.05', + 'some/directory/[^/]*.cfg', + ])) class VCSMixin(object): From 09032223acfd1d347362468efb3d759b786c67ba Mon Sep 17 00:00:00 2001 From: Maurits van Rees Date: Fri, 4 Oct 2013 23:30:45 +0200 Subject: [PATCH 03/16] Test strip_sdist_extras with a MANIFEST.in file. --- tests.py | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/tests.py b/tests.py index 4b31697..ba8ba11 100644 --- a/tests.py +++ b/tests.py @@ -82,6 +82,69 @@ def test_strip_sdist_extras(self): ] self.assertEqual(strip_sdist_extras(filelist), expected) + def test_strip_sdist_extras_with_manifest(self): + import check_manifest + from check_manifest import strip_sdist_extras + from check_manifest import _get_ignore_from_manifest as parse + orig_ignore = check_manifest.IGNORE + orig_ignore_regexps = check_manifest.IGNORE_REGEXPS + manifest_in = """ +graft src +exclude *.cfg +global-exclude *.mo +prune src/dump +recursive-exclude src/zope *.sh +""" + filelist = [ + '.gitignore', + 'setup.py', + 'setup.cfg', + 'MANIFEST.in', + 'README.txt', + 'src', + 'src/helper.sh', + 'src/dump', + 'src/dump/__init__.py', + 'src/zope', + 'src/zope/__init__.py', + 'src/zope/zopehelper.sh', + 'src/zope/foo', + 'src/zope/foo/__init__.py', + 'src/zope/foo/language.po', + 'src/zope/foo/language.mo', + 'src/zope/foo/config.cfg', + 'src/zope/foo/foohelper.sh', + 'src/zope.foo.egg-info', + 'src/zope.foo.egg-info/SOURCES.txt', + ] + expected = [ + 'setup.py', + 'MANIFEST.in', + 'README.txt', + 'src', + 'src/helper.sh', + 'src/zope', + 'src/zope/__init__.py', + 'src/zope/foo', + 'src/zope/foo/__init__.py', + 'src/zope/foo/language.po', + 'src/zope/foo/config.cfg', + ] + + # This will change the definitions. + try: + # This is normally done in read_manifest: + ignore, ignore_regexps = parse(manifest_in) + check_manifest.IGNORE.extend(ignore) + check_manifest.IGNORE_REGEXPS.extend(ignore_regexps) + # Filter the file list. + result = strip_sdist_extras(filelist) + finally: + # Restore the original definitions + check_manifest.IGNORE = orig_ignore + check_manifest.IGNORE_REGEXPS = orig_ignore_regexps + self.assertEqual(result, expected) + def test_find_bad_ideas(self): from check_manifest import find_bad_ideas filelist = [ From 76e42db599dc9661c86ed6d6086e0f6610b8d3ba Mon Sep 17 00:00:00 2001 From: Maurits van Rees Date: Fri, 4 Oct 2013 23:32:35 +0200 Subject: [PATCH 04/16] changelog --- CHANGES.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGES.rst b/CHANGES.rst index e09e1ee..75a586e 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -5,6 +5,8 @@ Changelog 0.17 (unreleased) ----------------- +* Read the existing MANIFEST.in file for files to ignore. + 0.16 (2013-10-01) ----------------- From f8a593fc131c44b2c951f15cbaaaa55544752a93 Mon Sep 17 00:00:00 2001 From: Maurits van Rees Date: Mon, 7 Oct 2013 10:13:32 +0200 Subject: [PATCH 05/16] Use the 'with' statement for opening files. This avoids ResourceWarnings on Python 3.x and leaking file descriptors on PyPy. --- check_manifest.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/check_manifest.py b/check_manifest.py index 36b0f91..3175249 100755 --- a/check_manifest.py +++ b/check_manifest.py @@ -408,7 +408,8 @@ def read_manifest(): # XXX modifies global state, which is kind of evil if not os.path.isfile('MANIFEST.in'): return - contents = open('MANIFEST.in').read() + with open('MANIFEST.in') as manifest: + contents = manifest.read() ignore, ignore_regexps = _get_ignore_from_manifest(contents) IGNORE.extend(ignore) IGNORE_REGEXPS.extend(ignore_regexps) From 4fc8b69f4cd5bd8b5f7d831219167c503c524944 Mon Sep 17 00:00:00 2001 From: Maurits van Rees Date: Mon, 7 Oct 2013 10:14:36 +0200 Subject: [PATCH 06/16] Remove reduntant 'strip'. 'split' already ignores leading and trailing whitespace. --- check_manifest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/check_manifest.py b/check_manifest.py index 3175249..9aa02ae 100755 --- a/check_manifest.py +++ b/check_manifest.py @@ -430,7 +430,7 @@ def _get_ignore_from_manifest(contents): # An exclude of 'dirname/*css' can match 'dirname/foo.css' # but not 'dirname/subdir/bar.css'. We need a regular # expression for that. - rest = line[len('exclude '):].strip().split() + rest = line[len('exclude '):].split() for pat in rest: if '*' in pat: pat = pat.replace('*', '[^/]*') From 65604396623d265a271bcd5ad09f18da2687564e Mon Sep 17 00:00:00 2001 From: Maurits van Rees Date: Mon, 7 Oct 2013 10:29:49 +0200 Subject: [PATCH 07/16] Do not make a dot into a magical wildcard character. --- check_manifest.py | 2 ++ tests.py | 18 +++++++++--------- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/check_manifest.py b/check_manifest.py index 9aa02ae..99d5b62 100755 --- a/check_manifest.py +++ b/check_manifest.py @@ -434,6 +434,8 @@ def _get_ignore_from_manifest(contents): for pat in rest: if '*' in pat: pat = pat.replace('*', '[^/]*') + # Do not make a dot into a magical wildcard character. + pat = pat.replace('.', '\.') ignore_regexps.append(pat) else: # No need for special handling. diff --git a/tests.py b/tests.py index ba8ba11..94ce119 100644 --- a/tests.py +++ b/tests.py @@ -209,15 +209,15 @@ def test_get_ignore_from_manifest(self): self.assertEqual(parse(' \n '), ([], [])) self.assertEqual(parse('exclude *.cfg'), - ([], ['[^/]*.cfg'])) + ([], ['[^/]*\.cfg'])) self.assertEqual(parse('#exclude *.cfg'), ([], [])) self.assertEqual(parse('exclude *.cfg'), - ([], ['[^/]*.cfg'])) + ([], ['[^/]*\.cfg'])) self.assertEqual(parse('exclude *.cfg foo.* bar.txt'), - (['bar.txt'], ['[^/]*.cfg', 'foo.[^/]*'])) + (['bar.txt'], ['[^/]*\.cfg', 'foo\.[^/]*'])) self.assertEqual(parse('exclude some/directory/*.cfg'), - ([], ['some/directory/[^/]*.cfg'])) + ([], ['some/directory/[^/]*\.cfg'])) self.assertEqual(parse('include *.cfg'), ([], [])) self.assertEqual(parse('global-exclude *.pyc'), @@ -257,11 +257,11 @@ def test_get_ignore_from_manifest(self): '42/44.*', ], [ - '[^/]*.02', - '[^/]*.03', - '04.[^/]*', - '[^/]*.05', - 'some/directory/[^/]*.cfg', + '[^/]*\.02', + '[^/]*\.03', + '04\.[^/]*', + '[^/]*\.05', + 'some/directory/[^/]*\.cfg', ])) From 3a5afc67efef272de6a9a8d79186ec228c013576 Mon Sep 17 00:00:00 2001 From: Maurits van Rees Date: Mon, 7 Oct 2013 10:33:11 +0200 Subject: [PATCH 08/16] Keep the indentation visually clear in the test. Remove leading whitespace programmatically with textwrap.dedent. --- tests.py | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/tests.py b/tests.py index 94ce119..54105f0 100644 --- a/tests.py +++ b/tests.py @@ -3,6 +3,7 @@ import shutil import subprocess import tempfile +import textwrap import unittest @@ -231,18 +232,21 @@ def test_get_ignore_from_manifest(self): self.assertEqual(parse('prune dir'), (['dir', 'dir/*'], [])) text = """ -#exclude *.01 -exclude *.02 -exclude *.03 04.* bar.txt -exclude *.05 -exclude some/directory/*.cfg -global-exclude *.10 *.11 -global-exclude *.12 -include *.20 -prune 30 -recursive-exclude 40 *.41 -recursive-exclude 42 *.43 44.* -""" + #exclude *.01 + exclude *.02 + exclude *.03 04.* bar.txt + exclude *.05 + exclude some/directory/*.cfg + global-exclude *.10 *.11 + global-exclude *.12 + include *.20 + prune 30 + recursive-exclude 40 *.41 + recursive-exclude 42 *.43 44.* + """ + # Keep the indentation visually clear in the test, but remove + # leading whitespace programmatically. + text = textwrap.dedent(text) self.assertEqual( parse(text), ([ From 552e8866b892aabefce085e7bb82df2cef748ccf Mon Sep 17 00:00:00 2001 From: Maurits van Rees Date: Tue, 8 Oct 2013 09:57:37 +0200 Subject: [PATCH 09/16] strip not needed in combination with split. --- check_manifest.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/check_manifest.py b/check_manifest.py index 99d5b62..9d15b8a 100755 --- a/check_manifest.py +++ b/check_manifest.py @@ -354,7 +354,7 @@ def add_directories(names): # Regular expressions for filename to ignore. This is useful for # filename patterns where the '*' part must not search in # directories. - ] +] WARN_ABOUT_FILES_IN_VCS = [ # generated files should not be committed into the VCS @@ -441,12 +441,12 @@ def _get_ignore_from_manifest(contents): # No need for special handling. ignore.append(pat) elif line.startswith('global-exclude '): - rest = line[len('global-exclude '):].strip().split() + rest = line[len('global-exclude '):].split() ignore.extend(rest) elif line.startswith('recursive-exclude '): rest = line[len('recursive-exclude '):].strip() dirname, patterns = rest.split(' ', 1) - for pattern in patterns.strip().split(): + for pattern in patterns.split(): ignore.append(dirname + os.path.sep + pattern) elif line.startswith('prune '): dirname = line[len('prune '):].strip() From d4ef7d50783c6201c2d507d6cad5d16b7996bf38 Mon Sep 17 00:00:00 2001 From: Maurits van Rees Date: Tue, 8 Oct 2013 10:15:37 +0200 Subject: [PATCH 10/16] Better whitespace checking when parsing a MANIFEST.in line. It seems tabs are accepted in the manifest. We now split each line to check for a command (exclude, prune, etc) and arguments. --- check_manifest.py | 28 +++++++++++++++------------- tests.py | 2 +- 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/check_manifest.py b/check_manifest.py index 9d15b8a..84da46b 100755 --- a/check_manifest.py +++ b/check_manifest.py @@ -426,12 +426,16 @@ def _get_ignore_from_manifest(contents): ignore = [] ignore_regexps = [] for line in contents.splitlines(): - if line.startswith('exclude '): + try: + cmd, rest = line.split(None, 1) + except ValueError: + # no whitespace, so not interesting + continue + if cmd == 'exclude': # An exclude of 'dirname/*css' can match 'dirname/foo.css' # but not 'dirname/subdir/bar.css'. We need a regular # expression for that. - rest = line[len('exclude '):].split() - for pat in rest: + for pat in rest.split(): if '*' in pat: pat = pat.replace('*', '[^/]*') # Do not make a dot into a magical wildcard character. @@ -440,18 +444,16 @@ def _get_ignore_from_manifest(contents): else: # No need for special handling. ignore.append(pat) - elif line.startswith('global-exclude '): - rest = line[len('global-exclude '):].split() - ignore.extend(rest) - elif line.startswith('recursive-exclude '): - rest = line[len('recursive-exclude '):].strip() - dirname, patterns = rest.split(' ', 1) + elif cmd == 'global-exclude': + ignore.extend(rest.split()) + elif cmd == 'recursive-exclude': + dirname, patterns = rest.split(None, 1) for pattern in patterns.split(): ignore.append(dirname + os.path.sep + pattern) - elif line.startswith('prune '): - dirname = line[len('prune '):].strip() - ignore.append(dirname) - ignore.append(dirname + os.path.sep + '*') + elif cmd == 'prune': + # rest is considered to be a directory name + ignore.append(rest) + ignore.append(rest + os.path.sep + '*') return ignore, ignore_regexps diff --git a/tests.py b/tests.py index 54105f0..ee98164 100644 --- a/tests.py +++ b/tests.py @@ -215,7 +215,7 @@ def test_get_ignore_from_manifest(self): ([], [])) self.assertEqual(parse('exclude *.cfg'), ([], ['[^/]*\.cfg'])) - self.assertEqual(parse('exclude *.cfg foo.* bar.txt'), + self.assertEqual(parse('\texclude\t*.cfg foo.* bar.txt'), (['bar.txt'], ['[^/]*\.cfg', 'foo\.[^/]*'])) self.assertEqual(parse('exclude some/directory/*.cfg'), ([], ['some/directory/[^/]*\.cfg'])) From c68abc6baa4382ce585a37bd3bc7a48b847708ca Mon Sep 17 00:00:00 2001 From: Maurits van Rees Date: Tue, 8 Oct 2013 10:28:27 +0200 Subject: [PATCH 11/16] Make sure prune gets no path separator. --- check_manifest.py | 6 +++++- tests.py | 4 ++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/check_manifest.py b/check_manifest.py index 84da46b..093f5e3 100755 --- a/check_manifest.py +++ b/check_manifest.py @@ -451,7 +451,11 @@ def _get_ignore_from_manifest(contents): for pattern in patterns.split(): ignore.append(dirname + os.path.sep + pattern) elif cmd == 'prune': - # rest is considered to be a directory name + # rest is considered to be a directory name. It should + # not contain a path separator, as it actually has no + # effect in that case, but that could differ per python + # version. We strip it here to avoid double separators. + rest = rest.rstrip(os.path.sep) ignore.append(rest) ignore.append(rest + os.path.sep + '*') return ignore, ignore_regexps diff --git a/tests.py b/tests.py index ee98164..71c9042 100644 --- a/tests.py +++ b/tests.py @@ -231,6 +231,10 @@ def test_get_ignore_from_manifest(self): (['dir/*.pyc', 'dir/*.sh'], [])) self.assertEqual(parse('prune dir'), (['dir', 'dir/*'], [])) + # You should not add a slash at the end of a prune, but let's + # not fail over it or end up with double slashes. + self.assertEqual(parse('prune dir/'), + (['dir', 'dir/*'], [])) text = """ #exclude *.01 exclude *.02 From 77fe8dc2213f780d1e81f712befcdb1986d3901b Mon Sep 17 00:00:00 2001 From: Maurits van Rees Date: Tue, 8 Oct 2013 10:29:55 +0200 Subject: [PATCH 12/16] Use textwrap.dedent again to keep the code clear. --- tests.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/tests.py b/tests.py index 71c9042..6a52d56 100644 --- a/tests.py +++ b/tests.py @@ -90,12 +90,15 @@ def test_strip_sdist_extras_with_manifest(self): orig_ignore = check_manifest.IGNORE orig_ignore_regexps = check_manifest.IGNORE_REGEXPS manifest_in = """ -graft src -exclude *.cfg -global-exclude *.mo -prune src/dump -recursive-exclude src/zope *.sh -""" + graft src + exclude *.cfg + global-exclude *.mo + prune src/dump + recursive-exclude src/zope *.sh + """ + # Keep the indentation visually clear in the test, but remove + # leading whitespace programmatically. + manifest_in = textwrap.dedent(manifest_in) filelist = [ '.gitignore', 'setup.py', From cc6f20adb071c08f3c5e2ae2b93016a574cc0634 Mon Sep 17 00:00:00 2001 From: Maurits van Rees Date: Wed, 9 Oct 2013 10:26:05 +0200 Subject: [PATCH 13/16] Warn when we cannot parse recursive-exclude. --- check_manifest.py | 9 ++++++++- tests.py | 3 +++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/check_manifest.py b/check_manifest.py index 093f5e3..1edb8c8 100755 --- a/check_manifest.py +++ b/check_manifest.py @@ -447,7 +447,14 @@ def _get_ignore_from_manifest(contents): elif cmd == 'global-exclude': ignore.extend(rest.split()) elif cmd == 'recursive-exclude': - dirname, patterns = rest.split(None, 1) + try: + dirname, patterns = rest.split(None, 1) + except ValueError: + # Wrong MANIFEST.in line. + warning("You have a wrong line in MANIFEST.in: %r\n" + "'recursive-exclude' expects " + " ..." % line) + continue for pattern in patterns.split(): ignore.append(dirname + os.path.sep + pattern) elif cmd == 'prune': diff --git a/tests.py b/tests.py index 6a52d56..e243f95 100644 --- a/tests.py +++ b/tests.py @@ -232,6 +232,9 @@ def test_get_ignore_from_manifest(self): (['dir/*.pyc'], [])) self.assertEqual(parse('recursive-exclude dir *.pyc *.sh'), (['dir/*.pyc', 'dir/*.sh'], [])) + # We should not fail when a recursive-exclude line is wrong: + self.assertEqual(parse('recursive-exclude dirwithoutpattern'), + ([], [])) self.assertEqual(parse('prune dir'), (['dir', 'dir/*'], [])) # You should not add a slash at the end of a prune, but let's From b62f61a526b09731095460e2a66e913579190dce Mon Sep 17 00:00:00 2001 From: Maurits van Rees Date: Wed, 9 Oct 2013 10:50:08 +0200 Subject: [PATCH 14/16] Fix recursive-exclude when pattern does not have wildcards. --- check_manifest.py | 14 +++++++++++++- tests.py | 2 ++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/check_manifest.py b/check_manifest.py index 1edb8c8..6fe385d 100755 --- a/check_manifest.py +++ b/check_manifest.py @@ -455,8 +455,20 @@ def _get_ignore_from_manifest(contents): "'recursive-exclude' expects " " ..." % line) continue + # Strip path separator for clarity. + dirname = dirname.rstrip(os.path.sep) for pattern in patterns.split(): - ignore.append(dirname + os.path.sep + pattern) + if '*' in pattern: + ignore.append(dirname + os.path.sep + pattern) + else: + # 'recursive-exclude plone metadata.xml' should + # exclude plone/metadata.xml and + # plone/*/metadata.xml, where * can be any number + # of sub directories. We could use a regexp, but + # two ignores seems easier. + ignore.append(dirname + os.path.sep + pattern) + ignore.append(dirname + os.path.sep + '*' + os.path.sep + + pattern) elif cmd == 'prune': # rest is considered to be a directory name. It should # not contain a path separator, as it actually has no diff --git a/tests.py b/tests.py index e243f95..76716b6 100644 --- a/tests.py +++ b/tests.py @@ -232,6 +232,8 @@ def test_get_ignore_from_manifest(self): (['dir/*.pyc'], [])) self.assertEqual(parse('recursive-exclude dir *.pyc *.sh'), (['dir/*.pyc', 'dir/*.sh'], [])) + self.assertEqual(parse('recursive-exclude dir nopattern.xml'), + (['dir/nopattern.xml', 'dir/*/nopattern.xml'], [])) # We should not fail when a recursive-exclude line is wrong: self.assertEqual(parse('recursive-exclude dirwithoutpattern'), ([], [])) From f83b42d97bcfc347f3fdcfe6ba60422f4cf56734 Mon Sep 17 00:00:00 2001 From: Maurits van Rees Date: Thu, 10 Oct 2013 12:37:38 +0200 Subject: [PATCH 15/16] Fix recursive-exclude for pattern not starting with wildcard character. --- check_manifest.py | 2 +- tests.py | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/check_manifest.py b/check_manifest.py index 6fe385d..8eda776 100755 --- a/check_manifest.py +++ b/check_manifest.py @@ -458,7 +458,7 @@ def _get_ignore_from_manifest(contents): # Strip path separator for clarity. dirname = dirname.rstrip(os.path.sep) for pattern in patterns.split(): - if '*' in pattern: + if pattern.startswith('*'): ignore.append(dirname + os.path.sep + pattern) else: # 'recursive-exclude plone metadata.xml' should diff --git a/tests.py b/tests.py index 76716b6..b0ed3ce 100644 --- a/tests.py +++ b/tests.py @@ -230,8 +230,8 @@ def test_get_ignore_from_manifest(self): (['*.pyc', '*.sh'], [])) self.assertEqual(parse('recursive-exclude dir *.pyc'), (['dir/*.pyc'], [])) - self.assertEqual(parse('recursive-exclude dir *.pyc *.sh'), - (['dir/*.pyc', 'dir/*.sh'], [])) + self.assertEqual(parse('recursive-exclude dir *.pyc foo*.sh'), + (['dir/*.pyc', 'dir/foo*.sh', 'dir/*/foo*.sh'], [])) self.assertEqual(parse('recursive-exclude dir nopattern.xml'), (['dir/nopattern.xml', 'dir/*/nopattern.xml'], [])) # We should not fail when a recursive-exclude line is wrong: @@ -271,6 +271,7 @@ def test_get_ignore_from_manifest(self): '40/*.41', '42/*.43', '42/44.*', + '42/*/44.*', ], [ '[^/]*\.02', From 7538143d7778e94c4a544b7b4a35ebd4ff171110 Mon Sep 17 00:00:00 2001 From: Maurits van Rees Date: Thu, 10 Oct 2013 13:10:28 +0200 Subject: [PATCH 16/16] Use the warnings module to print warnings. Ignore warnings in one known spot in the tests. This keeps the test output clean. --- check_manifest.py | 3 ++- tests.py | 7 +++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/check_manifest.py b/check_manifest.py index 8eda776..28dfafb 100755 --- a/check_manifest.py +++ b/check_manifest.py @@ -27,6 +27,7 @@ import sys import tarfile import tempfile +import warnings import zipfile from contextlib import contextmanager @@ -92,7 +93,7 @@ def error(message): def warning(message): _check_tbc() - print(message, file=sys.stderr) + warnings.warn(message) def format_list(list_of_strings): diff --git a/tests.py b/tests.py index b0ed3ce..efa995d 100644 --- a/tests.py +++ b/tests.py @@ -5,6 +5,7 @@ import tempfile import textwrap import unittest +import warnings class Tests(unittest.TestCase): @@ -235,8 +236,10 @@ def test_get_ignore_from_manifest(self): self.assertEqual(parse('recursive-exclude dir nopattern.xml'), (['dir/nopattern.xml', 'dir/*/nopattern.xml'], [])) # We should not fail when a recursive-exclude line is wrong: - self.assertEqual(parse('recursive-exclude dirwithoutpattern'), - ([], [])) + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + self.assertEqual(parse('recursive-exclude dirwithoutpattern'), + ([], [])) self.assertEqual(parse('prune dir'), (['dir', 'dir/*'], [])) # You should not add a slash at the end of a prune, but let's