Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

various fixes to ament-copyright and add --allowed-licenses argument #416

Open
wants to merge 2 commits into
base: rolling
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 33 additions & 11 deletions ament_copyright/ament_copyright/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,11 @@
from ament_copyright import UNKNOWN_IDENTIFIER
from ament_copyright.crawler import get_files
from ament_copyright.parser import get_comment_block
from ament_copyright.parser import get_copyright_information_regex
from ament_copyright.parser import get_index_of_next_line
from ament_copyright.parser import parse_file
from ament_copyright.parser import scan_past_coding_and_shebang_lines
from ament_copyright.parser import scan_past_empty_lines
from ament_copyright.parser import search_copyright_information


def main(argv=sys.argv[1:]):
Expand Down Expand Up @@ -61,6 +61,13 @@ def main(argv=sys.argv[1:]):
default=[],
dest='excludes',
help='The filenames to exclude.')
parser.add_argument(
'--allowed-licenses',
metavar='license name',
nargs='+',
default=[],
dest='allowed_licenses',
help='List of valid licenses.')
group = parser.add_mutually_exclusive_group()
group.add_argument(
'--add-missing',
Expand Down Expand Up @@ -112,9 +119,20 @@ def main(argv=sys.argv[1:]):
if not filenames:
print('No repository roots and files found')

# if user has specified a list of allowed licenses, use only those
if args.allowed_licenses:
allowed_licenses = {}
for license_name in args.allowed_licenses:
if license_name in licenses:
allowed_licenses[license_name] = licenses[license_name]
else:
assert False, 'Requested unknown license: ' + license_name
else:
allowed_licenses = licenses

file_descriptors = {}
for filename in sorted(filenames):
file_descriptors[filename] = parse_file(filename)
file_descriptors[filename] = parse_file(filename, allowed_licenses)

if args.add_missing:
name = names.get(args.add_missing[0], args.add_missing[0])
Expand Down Expand Up @@ -274,7 +292,7 @@ def add_copyright_year(file_descriptors, new_years, verbose):
file_descriptor = file_descriptors[path]

# ignore files which do not have a header
if not getattr(file_descriptor, 'copyright_identifier', None):
if not getattr(file_descriptor, 'copyright_identifiers', None):
continue

index = scan_past_coding_and_shebang_lines(file_descriptor.content)
Expand All @@ -287,10 +305,13 @@ def add_copyright_year(file_descriptors, new_years, verbose):
else:
block = file_descriptor.content[index:]
block_offset = 0
copyright_span, years_span, name_span = search_copyright_information(block)
if copyright_span is None:

regex = get_copyright_information_regex()
match = regex.search(block)
if not match:
assert False, "Could not find copyright information in file '%s'" % \
file_descriptor.path
years_span = match.span(1)

# skip if all new years are already included
years = get_years_from_string(block[years_span[0]:years_span[1]])
Expand All @@ -311,12 +332,13 @@ def add_copyright_year(file_descriptors, new_years, verbose):
file_descriptor.content[global_years_span[1]:]

# output beginning of file for debugging
# index = global_years_span[0]
# for _ in range(3):
# index = get_index_of_next_line(content, index)
# print('<<<')
# print(content[:index - 1])
# print('>>>')
if verbose:
index = global_years_span[0]
for _ in range(3):
index = get_index_of_next_line(content, index)
print('<<<')
print(content[:index - 1])
print('>>>')

with open(file_descriptor.path, 'w', encoding='utf-8') as h:
h.write(content)
Expand Down
72 changes: 45 additions & 27 deletions ament_copyright/ament_copyright/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def read(self):
with open(self.path, 'r', encoding='utf-8') as h:
self.content = h.read()

def parse(self):
def parse(self, allowed_licenses):
raise NotImplementedError()

def identify_license(self, content, license_part, licenses=None):
Expand Down Expand Up @@ -104,7 +104,7 @@ def identify_copyright(self):
else:
self.copyright_identifiers.append(UNKNOWN_IDENTIFIER)

def parse(self):
def parse(self, allowed_licenses):
self.read()
if not self.content:
return
Expand All @@ -113,52 +113,64 @@ def parse(self):
index = scan_past_coding_and_shebang_lines(self.content)
index = scan_past_empty_lines(self.content, index)

# get first comment block without leading comment tokens
block, _ = get_comment_block(self.content, index)
copyrights, remaining_block = search_copyright_information(block)

if len(copyrights) == 0:
block = get_multiline_comment_block(self.content, index)
def parse_comment_block(block):
copyrights, remaining_block = search_copyright_information(block)

if len(copyrights) == 0:
return

self.copyrights = copyrights
self.copyrights += copyrights
# if we haven't found a license yet, try to identify it in this block
# in case of files with multiple licenses, we only consider the first one found
# an example is if you copy a file with an existing license and then you prepend yours
if self.license_identifier == UNKNOWN_IDENTIFIER:
license_text = '{copyright}' + remaining_block
self.identify_license(license_text, 'file_headers', allowed_licenses)

# parse all single-line comment blocks for copyright information
tmp_index = index
while True:
block, tmp_index = get_comment_block(self.content, tmp_index)
if block:
parse_comment_block(block)
else:
break

# parse all multi-line comment blocks for copyright information
tmp_index = index
while True:
block, tmp_index = get_multiline_comment_block(self.content, tmp_index)
if block:
parse_comment_block(block)
else:
break

self.identify_copyright()

content = '{copyright}' + remaining_block
self.identify_license(content, 'file_headers')


class ContributingDescriptor(FileDescriptor):

def __init__(self, path):
super(ContributingDescriptor, self).__init__(CONTRIBUTING_FILETYPE, path)

def parse(self):
def parse(self, allowed_licenses):
self.read()
if not self.content:
return

self.identify_license(self.content, 'contributing_files')
self.identify_license(self.content, 'contributing_files', allowed_licenses)


class LicenseDescriptor(FileDescriptor):

def __init__(self, path):
super(LicenseDescriptor, self).__init__(LICENSE_FILETYPE, path)

def parse(self):
def parse(self, allowed_licenses):
self.read()
if not self.content:
return

self.identify_license(self.content, 'license_files')
self.identify_license(self.content, 'license_files', allowed_licenses)


def parse_file(path):
def parse_file(path, allowed_licenses):
filetype = determine_filetype(path)
if filetype == SOURCE_FILETYPE:
d = SourceDescriptor(path)
Expand All @@ -168,7 +180,7 @@ def parse_file(path):
d = LicenseDescriptor(path)
else:
return None
d.parse()
d.parse(allowed_licenses)
return d


Expand All @@ -180,9 +192,7 @@ def determine_filetype(path):
return SOURCE_FILETYPE


def search_copyright_information(content):
if content is None:
return [], content
def get_copyright_information_regex():
# regex for matching years or year ranges (yyyy-yyyy) separated by colons
year = r'\d{4}'
year_range = '%s-%s' % (year, year)
Expand All @@ -191,6 +201,13 @@ def search_copyright_information(content):
r'copyright(?:\s+\(c\))?\s+(%s(?:,\s*%s)*),?\s+([^\n\r]+)$' % \
(year_or_year_range, year_or_year_range)
regex = re.compile(pattern, re.DOTALL | re.MULTILINE | re.IGNORECASE)
return regex


def search_copyright_information(content):
if content is None:
return [], content
regex = get_copyright_information_regex()

copyrights = []
while True:
Expand Down Expand Up @@ -297,6 +314,7 @@ def get_multiline_comment_block(content, index):
start_match = start_regex.search(content, index)
if not start_match:
continue
comment_token = start_match.group(1)
start_index = start_match.start(1)

# find the first match of the comment end token
Expand All @@ -323,8 +341,8 @@ def get_multiline_comment_block(content, index):
# Single-line header does not have a common prefix to strip out
lines = prefixed_lines

return '\n'.join(lines)
return None
return '\n'.join(lines), start_index + len(comment_token) + 1
return None, index


def scan_past_empty_lines(content, index):
Expand Down
10 changes: 5 additions & 5 deletions ament_copyright/test/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,7 @@ def test_get_comment_block_slashes2():
"""
index = 0
index = scan_past_empty_lines(commented_content, index)
block = get_multiline_comment_block(commented_content, index)
block, _ = get_multiline_comment_block(commented_content, index)
assert block is not None
assert block == 'ddd'

Expand Down Expand Up @@ -320,7 +320,7 @@ def test_get_multiline_comment_block_cstyle():
"""
index = 0
index = scan_past_empty_lines(commented_content, index)
block = get_multiline_comment_block(commented_content, index)
block, _ = get_multiline_comment_block(commented_content, index)
assert block is not None
assert block == '\n'.join(['aaa', 'bbb', 'ccc'])

Expand All @@ -339,7 +339,7 @@ def test_get_multiline_comment_block_cstyle2():
"""
index = 0
index = scan_past_empty_lines(commented_content, index)
block = get_multiline_comment_block(commented_content, index)
block, _ = get_multiline_comment_block(commented_content, index)
assert block is not None
assert block == '\n'.join(['aaa', 'bbb', 'ccc'])

Expand All @@ -355,7 +355,7 @@ def test_get_multiline_comment_block_xmlstyle():
"""
index = 0
index = scan_past_empty_lines(commented_content, index)
block = get_multiline_comment_block(commented_content, index)
block, _ = get_multiline_comment_block(commented_content, index)
assert block is not None
assert block == '\n'.join(['aaa', 'bbb', 'ccc'])

Expand All @@ -371,6 +371,6 @@ def test_get_multiline_comment_block_xmlstyle_prefixed():
"""
index = 0
index = scan_past_empty_lines(commented_content, index)
block = get_multiline_comment_block(commented_content, index)
block, _ = get_multiline_comment_block(commented_content, index)
assert block is not None
assert block == '\n'.join(['aaa', 'bbb', 'ccc'])