Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix #3314 duplicate code error only shows up with pylint jobs 1 #3458

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CONTRIBUTORS.txt
Original file line number Diff line number Diff line change
Expand Up @@ -423,3 +423,5 @@ contributors:
* Joshua Cannon: contributor

* Giuseppe Valente: contributor

* Frank Harrison (doublethefish): contributor
4 changes: 4 additions & 0 deletions ChangeLog
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,10 @@ What's New in Pylint 2.5.0?

Release date: 2020-04-27

* Fixes duplicate-errors not working with -j2+

Close #3314

* Fix a false negative for ``undefined-variable`` when using class attribute in comprehension.

Close #3494
Expand Down
2 changes: 2 additions & 0 deletions doc/whatsnew/2.5.rst
Original file line number Diff line number Diff line change
Expand Up @@ -100,3 +100,5 @@ multiple types of string formatting to be used.
The type of formatting to use is chosen through enabling and disabling messages
rather than through the logging-format-style option.
The fstr value of the logging-format-style option is not valid.

* Fixes duplicate code detection for --jobs=2+
2 changes: 2 additions & 0 deletions pylint/checkers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
# Copyright (c) 2018-2019 Pierre Sassoulas <pierre.sassoulas@gmail.com>
# Copyright (c) 2018 ssolanki <sushobhitsolanki@gmail.com>
# Copyright (c) 2019 Bruno P. Kinoshita <kinow@users.noreply.github.com>
# Copyright (c) 2020 Frank Harrison <doublethefish@gmail.com>

# Licensed under the GPL: https://www.gnu.org/licenses/old-licenses/gpl-2.0.html
# For details: https://github.com/PyCQA/pylint/blob/master/COPYING
Expand Down Expand Up @@ -43,6 +44,7 @@
"""

from pylint.checkers.base_checker import BaseChecker, BaseTokenChecker
from pylint.checkers.mapreduce_checker import MapReduceMixin
from pylint.utils import register_plugins


Expand Down
18 changes: 18 additions & 0 deletions pylint/checkers/mapreduce_checker.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Copyright (c) 2020 Frank Harrison <doublethefish@gmail.com>

# Licensed under the GPL: https://www.gnu.org/licenses/old-licenses/gpl-2.0.html
# For details: https://github.com/PyCQA/pylint/blob/master/COPYING
import abc


class MapReduceMixin(metaclass=abc.ABCMeta):
""" A mixin design to allow multiprocess/threaded runs of a Checker """

@abc.abstractmethod
def get_map_data(self):
""" Returns mergable/reducible data that will be examined """

@classmethod
@abc.abstractmethod
def reduce_map_data(cls, linter, data):
""" For a given Checker, receives data for all mapped runs """
34 changes: 31 additions & 3 deletions pylint/checkers/similar.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@

import astroid

from pylint.checkers import BaseChecker, table_lines_from_stats
from pylint.checkers import BaseChecker, MapReduceMixin, table_lines_from_stats
from pylint.interfaces import IRawChecker
from pylint.reporters.ureports.nodes import Table
from pylint.utils import decoding_stream
Expand Down Expand Up @@ -160,6 +160,20 @@ def _iter_sims(self):
for lineset2 in self.linesets[idx + 1 :]:
yield from self._find_common(lineset, lineset2)

def get_map_data(self):
"""Returns the data we can use for a map/reduce process

In this case we are returning this instance's Linesets, that is all file
information that will later be used for vectorisation.
"""
return self.linesets

def combine_mapreduce_data(self, linesets_collection):
"""Reduces and recombines data into a format that we can report on

The partner function of get_map_data()"""
self.linesets = [line for lineset in linesets_collection for line in lineset]


def stripped_lines(lines, ignore_comments, ignore_docstrings, ignore_imports):
"""return lines with leading/trailing whitespace and any ignored code
Expand Down Expand Up @@ -288,7 +302,7 @@ def report_similarities(sect, stats, old_stats):


# wrapper to get a pylint checker from the similar class
class SimilarChecker(BaseChecker, Similar):
class SimilarChecker(BaseChecker, Similar, MapReduceMixin):
"""checks for similarities and duplicated code. This computation may be
memory / CPU intensive, so you should disable it if you experiment some
problems.
Expand Down Expand Up @@ -352,7 +366,7 @@ def __init__(self, linter=None):
def set_option(self, optname, value, action=None, optdict=None):
"""method called to set an option (registered in the options list)

overridden to report options setting to Similar
Overridden to report options setting to Similar
"""
BaseChecker.set_option(self, optname, value, action, optdict)
if optname == "min-similarity-lines":
Expand Down Expand Up @@ -402,6 +416,20 @@ def close(self):
stats["nb_duplicated_lines"] = duplicated
stats["percent_duplicated_lines"] = total and duplicated * 100.0 / total

def get_map_data(self):
""" Passthru override """
return Similar.get_map_data(self)

@classmethod
def reduce_map_data(cls, linter, data):
"""Reduces and recombines data into a format that we can report on

The partner function of get_map_data()"""
recombined = SimilarChecker(linter)
recombined.open()
Similar.combine_mapreduce_data(recombined, linesets_collection=data)
recombined.close()


def register(linter):
"""required method to auto register this checker """
Expand Down
50 changes: 44 additions & 6 deletions pylint/lint/parallel.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,28 +67,59 @@ def _worker_check_single_file(file_item):

_worker_linter.open()
_worker_linter.check_single_file(name, filepath, modname)

mapreduce_data = collections.defaultdict(list)
for checker in _worker_linter.get_checkers():
try:
data = checker.get_map_data()
except AttributeError:
continue
mapreduce_data[checker.name].append(data)
msgs = [_get_new_args(m) for m in _worker_linter.reporter.messages]
_worker_linter.reporter.reset()
return (
id(multiprocessing.current_process()),
_worker_linter.current_name,
filepath,
_worker_linter.file_state.base_name,
msgs,
_worker_linter.stats,
_worker_linter.msg_status,
mapreduce_data,
)


def _merge_mapreduce_data(linter, all_mapreduce_data):
""" Merges map/reduce data across workers, invoking relevant APIs on checkers """
# First collate the data, preparing it so we can send it to the checkers for
# validation. The intent here is to collect all the mapreduce data for all checker-
# runs across processes - that will then be passed to a static method on the
# checkers to be reduced and further processed.
collated_map_reduce_data = collections.defaultdict(list)
for linter_data in all_mapreduce_data.values():
for run_data in linter_data:
for checker_name, data in run_data.items():
collated_map_reduce_data[checker_name].extend(data)

# Send the data to checkers that support/require consolidated data
original_checkers = linter.get_checkers()
for checker in original_checkers:
if checker.name in collated_map_reduce_data:
# Assume that if the check has returned map/reduce data that it has the
# reducer function
checker.reduce_map_data(linter, collated_map_reduce_data[checker.name])


def check_parallel(linter, jobs, files, arguments=None):
"""Use the given linter to lint the files with given amount of workers (jobs)"""
# The reporter does not need to be passed to worker processess, i.e. the reporter does
# not need to be pickleable
"""Use the given linter to lint the files with given amount of workers (jobs)
This splits the work filestream-by-filestream. If you need to do work across
multiple files, as in the similarity-checker, then inherit from MapReduceMixin and
implement the map/reduce mixin functionality"""
# The reporter does not need to be passed to worker processes, i.e. the reporter does
original_reporter = linter.reporter
linter.reporter = None

# The linter is inherited by all the pool's workers, i.e. the linter
# is identical to the linter object here. This is requred so that
# is identical to the linter object here. This is required so that
# a custom PyLinter object can be used.
initializer = functools.partial(_worker_initialize, arguments=arguments)
with multiprocessing.Pool(jobs, initializer=initializer, initargs=[linter]) as pool:
Expand All @@ -99,14 +130,20 @@ def check_parallel(linter, jobs, files, arguments=None):
linter.open()

all_stats = []
all_mapreduce_data = collections.defaultdict(list)

# Maps each file to be worked on by a single _worker_check_single_file() call,
# collecting any map/reduce data by checker module so that we can 'reduce' it
# later.
for (
worker_idx, # used to merge map/reduce data across workers
module,
file_path,
base_name,
messages,
stats,
msg_status,
mapreduce_data,
) in pool.imap_unordered(_worker_check_single_file, files):
linter.file_state.base_name = base_name
linter.set_current_module(module, file_path)
Expand All @@ -115,8 +152,9 @@ def check_parallel(linter, jobs, files, arguments=None):
linter.reporter.handle_message(msg)

all_stats.append(stats)
all_mapreduce_data[worker_idx].append(mapreduce_data)
linter.msg_status |= msg_status

_merge_mapreduce_data(linter, all_mapreduce_data)
linter.stats = _merge_stats(all_stats)

# Insert stats data to local checkers.
Expand Down
139 changes: 139 additions & 0 deletions tests/checkers/unittest_similar.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
import pytest

from pylint.checkers import similar
from pylint.lint import PyLinter
from pylint.testutils import TestReporter as Reporter

INPUT = Path(__file__).parent / ".." / "input"
SIMILAR1 = str(INPUT / "similar1")
Expand Down Expand Up @@ -234,3 +236,140 @@ def test_no_args():
assert ex.code == 1
else:
pytest.fail("not system exit")


def test_get_map_data():
"""Tests that a SimilarChecker respects the MapReduceMixin interface"""
linter = PyLinter(reporter=Reporter())

# Add a parallel checker to ensure it can map and reduce
linter.register_checker(similar.SimilarChecker(linter))

source_streams = (
str(INPUT / "similar_lines_a.py"),
str(INPUT / "similar_lines_b.py"),
)
expected_linelists = (
(
"",
"",
"",
"",
"",
"",
"def adipiscing(elit):",
'etiam = "id"',
'dictum = "purus,"',
'vitae = "pretium"',
'neque = "Vivamus"',
'nec = "ornare"',
'tortor = "sit"',
"return etiam, dictum, vitae, neque, nec, tortor",
"",
"",
"class Amet:",
"def similar_function_3_lines(self, tellus):",
"agittis = 10",
"tellus *= 300",
"return agittis, tellus",
"",
"def lorem(self, ipsum):",
'dolor = "sit"',
'amet = "consectetur"',
"return (lorem, dolor, amet)",
"",
"def similar_function_5_lines(self, similar):",
"some_var = 10",
"someother_var *= 300",
'fusce = "sit"',
'amet = "tortor"',
"return some_var, someother_var, fusce, amet",
"",
'def __init__(self, moleskie, lectus="Mauris", ac="pellentesque"):',
'metus = "ut"',
'lobortis = "urna."',
'Integer = "nisl"',
'(mauris,) = "interdum"',
'non = "odio"',
'semper = "aliquam"',
'malesuada = "nunc."',
'iaculis = "dolor"',
'facilisis = "ultrices"',
'vitae = "ut."',
"",
"return (",
"metus,",
"lobortis,",
"Integer,",
"mauris,",
"non,",
"semper,",
"malesuada,",
"iaculis,",
"facilisis,",
"vitae,",
")",
"",
"def similar_function_3_lines(self, tellus):",
"agittis = 10",
"tellus *= 300",
"return agittis, tellus",
),
(
"",
"",
"",
"",
"",
"",
"",
"class Nulla:",
'tortor = "ultrices quis porta in"',
'sagittis = "ut tellus"',
"",
"def pulvinar(self, blandit, metus):",
"egestas = [mauris for mauris in zip(blandit, metus)]",
"neque = (egestas, blandit)",
"",
"def similar_function_5_lines(self, similar):",
"some_var = 10",
"someother_var *= 300",
'fusce = "sit"',
'amet = "tortor"',
'iaculis = "dolor"',
"return some_var, someother_var, fusce, amet, iaculis, iaculis",
"",
"",
"def tortor(self):",
"ultrices = 2",
'quis = ultricies * "porta"',
"return ultricies, quis",
"",
"",
"class Commodo:",
"def similar_function_3_lines(self, tellus):",
"agittis = 10",
"tellus *= 300",
'laoreet = "commodo "',
"return agittis, tellus, laoreet",
),
)

data = []

# Manually perform a 'map' type function
for source_fname in source_streams:
sim = similar.SimilarChecker(linter)
with open(source_fname) as stream:
sim.append_stream(source_fname, stream)
# The map bit, can you tell? ;)
data.extend(sim.get_map_data())

assert len(expected_linelists) == len(data)
for source_fname, expected_lines, lineset_obj in zip(
source_streams, expected_linelists, data
):
assert source_fname == lineset_obj.name
# There doesn't seem to be a faster way of doing this, yet.
lines = (line for idx, line in lineset_obj.enumerate_stripped())
assert tuple(expected_lines) == tuple(lines)
Loading