From 2a5dd2e93c0f68f5c0fb8b4785bbcd9aa1467668 Mon Sep 17 00:00:00 2001 From: Siqi Wang <66684937+wangsiqidahaoren@users.noreply.github.com> Date: Mon, 8 Jan 2024 09:16:11 +0800 Subject: [PATCH 1/3] do some refactoring (extract method) --- _codespell.py | 1297 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1297 insertions(+) create mode 100644 _codespell.py diff --git a/_codespell.py b/_codespell.py new file mode 100644 index 0000000000..b6de09e965 --- /dev/null +++ b/_codespell.py @@ -0,0 +1,1297 @@ +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; version 2 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, see +# https://www.gnu.org/licenses/old-licenses/gpl-2.0.html. +""" +Copyright (C) 2010-2011 Lucas De Marchi +Copyright (C) 2011 ProFUSION embedded systems +""" + +import argparse +import configparser +import ctypes +import fnmatch +import os +import re +import sys +import textwrap +from ctypes import wintypes +from typing import Any, Dict, List, Match, Optional, Pattern, Sequence, Set, Tuple + +# autogenerated by setuptools_scm +from ._version import ( # type: ignore[import-not-found] + __version__ as VERSION, # noqa: N812 +) + +word_regex_def = r"[\w\-'’]+" +# While we want to treat characters like ( or " as okay for a starting break, +# these may occur unescaped in URIs, and so we are more restrictive on the +# endpoint. Emails are more restrictive, so the endpoint remains flexible. +uri_regex_def = ( + "(\\b(?:https?|[ts]?ftp|file|git|smb)://[^\\s]+(?=$|\\s)|" + "\\b[\\w.%+-]+@[\\w.-]+\\b)" +) +# Pass all misspellings through this translation table to generate +# alternative misspellings and fixes. +alt_chars = (("'", "’"),) +USAGE = """ +\t%prog [OPTIONS] [file1 file2 ... fileN] +""" + +supported_languages_en = ("en", "en_GB", "en_US", "en_CA", "en_AU") +supported_languages = supported_languages_en + +# Users might want to link this file into /usr/local/bin, so we resolve the +# symbolic link path to the real path if necessary. +_data_root = os.path.join(os.path.dirname(os.path.realpath(__file__)), "data") +_builtin_dictionaries = ( + # name, desc, name, err in aspell, correction in aspell, \ + # err dictionary array, rep dictionary array + # The arrays must contain the names of aspell dictionaries + # The aspell tests here aren't the ideal state, but the None's are + # realistic for obscure words + ("clear", "for unambiguous errors", "", False, None, supported_languages_en, None), + ( + "rare", + "for rare (but valid) words that are likely to be errors", + "_rare", + None, + None, + None, + None, + ), + ( + "informal", + "for making informal words more formal", + "_informal", + True, + True, + supported_languages_en, + supported_languages_en, + ), + ( + "usage", + "for replacing phrasing with recommended terms", + "_usage", + None, + None, + None, + None, + ), + ( + "code", + "for words from code and/or mathematics that are likely to be typos in other contexts (such as uint)", # noqa: E501 + "_code", + None, + None, + None, + None, + ), + ( + "names", + "for valid proper names that might be typos", + "_names", + None, + None, + None, + None, + ), + ( + "en-GB_to_en-US", + "for corrections from en-GB to en-US", + "_en-GB_to_en-US", + True, + True, + ("en_GB",), + ("en_US",), + ), +) +_builtin_default = "clear,rare" + +# docs say os.EX_USAGE et al. are only available on Unix systems, so to be safe +# we protect and just use the values they are on macOS and Linux +EX_OK = 0 +EX_USAGE = 64 +EX_DATAERR = 65 +EX_CONFIG = 78 + +# Windows specific constants +ENABLE_VIRTUAL_TERMINAL_PROCESSING = 0x0004 +STD_OUTPUT_HANDLE = wintypes.HANDLE(-11) + +# OPTIONS: +# +# ARGUMENTS: +# dict_filename The file containing the dictionary of misspellings. +# If set to '-', it will be read from stdin +# file1 .. fileN Files to check spelling + + +class QuietLevels: + NONE = 0 + ENCODING = 1 + BINARY_FILE = 2 + DISABLED_FIXES = 4 + NON_AUTOMATIC_FIXES = 8 + FIXES = 16 + CONFIG_FILES = 32 + + +class GlobMatch: + def __init__(self, pattern: Optional[str]) -> None: + self.pattern_list: Optional[List[str]] + if pattern: + # Pattern might be a list of comma-delimited strings + self.pattern_list = ",".join(pattern).split(",") + else: + self.pattern_list = None + + def match(self, filename: str) -> bool: + if self.pattern_list is None: + return False + + return any(fnmatch.fnmatch(filename, p) for p in self.pattern_list) + + +class Misspelling: + def __init__(self, data: str, fix: bool, reason: str) -> None: + self.data = data + self.fix = fix + self.reason = reason + + +class TermColors: + def __init__(self) -> None: + self.FILE = "\033[33m" + self.WWORD = "\033[31m" + self.FWORD = "\033[32m" + self.DISABLE = "\033[0m" + + def disable(self) -> None: + self.FILE = "" + self.WWORD = "" + self.FWORD = "" + self.DISABLE = "" + + +class Summary: + def __init__(self) -> None: + self.summary: Dict[str, int] = {} + + def update(self, wrongword: str) -> None: + if wrongword in self.summary: + self.summary[wrongword] += 1 + else: + self.summary[wrongword] = 1 + + def __str__(self) -> str: + keys = list(self.summary.keys()) + keys.sort() + + return "\n".join( + [f"{key}{self.summary.get(key):{15 - len(key)}}" for key in keys] + ) + + +class FileOpener: + def __init__(self, use_chardet: bool, quiet_level: int) -> None: + self.use_chardet = use_chardet + if use_chardet: + self.init_chardet() + self.quiet_level = quiet_level + + def init_chardet(self) -> None: + try: + from chardet.universaldetector import UniversalDetector + except ImportError as e: + msg = ( + "There's no chardet installed to import from. " + "Please, install it and check your PYTHONPATH " + "environment variable" + ) + raise ImportError(msg) from e + + self.encdetector = UniversalDetector() + + def open(self, filename: str) -> Tuple[List[str], str]: + if self.use_chardet: + return self.open_with_chardet(filename) + return self.open_with_internal(filename) + + def open_with_chardet(self, filename: str) -> Tuple[List[str], str]: + self.encdetector.reset() + with open(filename, "rb") as fb: + for line in fb: + self.encdetector.feed(line) + if self.encdetector.done: + break + self.encdetector.close() + encoding = self.encdetector.result["encoding"] + + try: + f = open(filename, encoding=encoding, newline="") + except UnicodeDecodeError: + print(f"ERROR: Could not detect encoding: {filename}", file=sys.stderr) + raise + except LookupError: + print( + f"ERROR: Don't know how to handle encoding {encoding}: {filename}", + file=sys.stderr, + ) + raise + else: + lines = f.readlines() + f.close() + + return lines, f.encoding + + def open_with_internal(self, filename: str) -> Tuple[List[str], str]: + encoding = None + first_try = True + for encoding in ("utf-8", "iso-8859-1"): + if first_try: + first_try = False + elif not self.quiet_level & QuietLevels.ENCODING: + print(f'WARNING: Trying next encoding "{encoding}"', file=sys.stderr) + with open(filename, encoding=encoding, newline="") as f: + try: + lines = f.readlines() + except UnicodeDecodeError: + if not self.quiet_level & QuietLevels.ENCODING: + print( + f'WARNING: Cannot decode file using encoding "{encoding}": ' + f"{filename}", + file=sys.stderr, + ) + else: + break + else: + msg = "Unknown encoding" + raise Exception(msg) + + return lines, encoding + + +# -.-:-.-:-.-:-.:-.-:-.-:-.-:-.-:-.:-.-:-.-:-.-:-.-:-.:-.-:- + + +# If someday this breaks, we can just switch to using RawTextHelpFormatter, +# but it has the disadvantage of not wrapping our long lines. + + +class NewlineHelpFormatter(argparse.HelpFormatter): + """Help formatter that preserves newlines and deals with lists.""" + + def _split_lines(self, text: str, width: int) -> List[str]: + parts = text.split("\n") + out = [] + for part in parts: + # Eventually we could allow others... + indent_start = "- " + if part.startswith(indent_start): + offset = len(indent_start) + else: + offset = 0 + part = part[offset:] + part = self._whitespace_matcher.sub(" ", part).strip() + parts = textwrap.wrap(part, width - offset) + parts = [" " * offset + p for p in parts] + if offset: + parts[0] = indent_start + parts[0][offset:] + out.extend(parts) + return out + + +def _toml_to_parseconfig(toml_dict: Dict[str, Any]) -> Dict[str, Any]: + """Convert a dict read from a TOML file to the parseconfig.read_dict() format.""" + return {k: "" if v is True else v for k, v in toml_dict.items() if v is not False} + + +def _supports_ansi_colors() -> bool: + if sys.platform == "win32": + # Windows Terminal enables ANSI escape codes by default. In other cases + # it is disabled. + # See https://ss64.com/nt/syntax-ansi.html for more information. + kernel32 = ctypes.WinDLL("kernel32") + + # fmt: off + kernel32.GetConsoleMode.argtypes = ( + wintypes.HANDLE, # _In_ hConsoleHandle + wintypes.LPDWORD, # _Out_ lpMode + ) + # fmt: on + kernel32.GetConsoleMode.restype = wintypes.BOOL + + mode = wintypes.DWORD() + handle = kernel32.GetStdHandle(STD_OUTPUT_HANDLE) + if not kernel32.GetConsoleMode(handle, ctypes.byref(mode)): + # TODO: print a warning with the error message on stderr? + return False + + return (mode.value & ENABLE_VIRTUAL_TERMINAL_PROCESSING) != 0 + elif sys.platform == "wasi": + # WASI disables ANSI escape codes for security reasons. + # See https://github.com/WebAssembly/WASI/issues/162. + return False + elif sys.stdout.isatty(): + return True + + return False + + +def parse_options( + args: Sequence[str], +) -> Tuple[argparse.Namespace, argparse.ArgumentParser, List[str]]: + parser = argparse.ArgumentParser(formatter_class=NewlineHelpFormatter) + + parser.set_defaults(colors=_supports_ansi_colors()) + parser.add_argument("--version", action="version", version=VERSION) + + parser.add_argument( + "-d", + "--disable-colors", + action="store_false", + dest="colors", + help="disable colors, even when printing to terminal", + ) + parser.add_argument( + "-c", + "--enable-colors", + action="store_true", + dest="colors", + help="enable colors, even when not printing to terminal", + ) + + parser.add_argument( + "-w", + "--write-changes", + action="store_true", + default=False, + help="write changes in place if possible", + ) + + parser.add_argument( + "-D", + "--dictionary", + action="append", + help="custom dictionary file that contains spelling " + "corrections. If this flag is not specified or " + 'equals "-" then the default dictionary is used. ' + "This option can be specified multiple times.", + ) + builtin_opts = "\n- ".join( + [""] + [f"{d[0]!r} {d[1]}" for d in _builtin_dictionaries] + ) + parser.add_argument( + "--builtin", + dest="builtin", + default=_builtin_default, + metavar="BUILTIN-LIST", + help="comma-separated list of builtin dictionaries " + 'to include (when "-D -" or no "-D" is passed). ' + "Current options are:" + builtin_opts + "\n" + "The default is %(default)r.", + ) + parser.add_argument( + "--ignore-regex", + action="store", + type=str, + help="regular expression that is used to find " + "patterns to ignore by treating as whitespace. " + "When writing regular expressions, consider " + "ensuring there are boundary non-word chars, " + 'e.g., "\\bmatch\\b". Defaults to ' + "empty/disabled.", + ) + parser.add_argument( + "-I", + "--ignore-words", + action="append", + metavar="FILE", + help="file that contains words that will be ignored " + "by codespell. File must contain 1 word per line." + " Words are case sensitive based on how they are " + "written in the dictionary file", + ) + parser.add_argument( + "-L", + "--ignore-words-list", + action="append", + metavar="WORDS", + help="comma separated list of words to be ignored " + "by codespell. Words are case sensitive based on " + "how they are written in the dictionary file", + ) + parser.add_argument( + "--uri-ignore-words-list", + action="append", + metavar="WORDS", + help="comma separated list of words to be ignored " + "by codespell in URIs and emails only. Words are " + "case sensitive based on how they are written in " + 'the dictionary file. If set to "*", all ' + "misspelling in URIs and emails will be ignored.", + ) + parser.add_argument( + "-r", + "--regex", + action="store", + type=str, + help="regular expression that is used to find words. " + "By default any alphanumeric character, the " + "underscore, the hyphen, and the apostrophe are " + "used to build words. This option cannot be " + "specified together with --write-changes.", + ) + parser.add_argument( + "--uri-regex", + action="store", + type=str, + help="regular expression that is used to find URIs " + "and emails. A default expression is provided.", + ) + parser.add_argument( + "-s", + "--summary", + action="store_true", + default=False, + help="print summary of fixes", + ) + + parser.add_argument( + "--count", + action="store_true", + default=False, + help="print the number of errors as the last line of stderr", + ) + + parser.add_argument( + "-S", + "--skip", + action="append", + help="comma-separated list of files to skip. It " + "accepts globs as well. E.g.: if you want " + "codespell to skip .eps and .txt files, " + 'you\'d give "*.eps,*.txt" to this option.', + ) + + parser.add_argument( + "-x", + "--exclude-file", + type=str, + metavar="FILE", + help="ignore whole lines that match those " + "in the file FILE. The lines in FILE " + "should match the to-be-excluded lines exactly", + ) + + parser.add_argument( + "-i", + "--interactive", + action="store", + type=int, + default=0, + help="set interactive mode when writing changes:\n" + "- 0: no interactivity.\n" + "- 1: ask for confirmation.\n" + "- 2: ask user to choose one fix when more than one is available.\n" # noqa: E501 + "- 3: both 1 and 2", + ) + + parser.add_argument( + "-q", + "--quiet-level", + action="store", + type=int, + default=34, + help="bitmask that allows suppressing messages:\n" + "- 0: print all messages.\n" + "- 1: disable warnings about wrong encoding.\n" + "- 2: disable warnings about binary files.\n" + "- 4: omit warnings about automatic fixes that were disabled in the dictionary.\n" # noqa: E501 + "- 8: don't print anything for non-automatic fixes.\n" + "- 16: don't print the list of fixed files.\n" + "- 32: don't print configuration files.\n" + "As usual with bitmasks, these levels can be " + "combined; e.g. use 3 for levels 1+2, 7 for " + "1+2+4, 23 for 1+2+4+16, etc. " + "The default mask is %(default)s.", + ) + + parser.add_argument( + "-e", + "--hard-encoding-detection", + action="store_true", + default=False, + help="use chardet to detect the encoding of each " + "file. This can slow down codespell, but is more " + "reliable in detecting encodings other than " + "utf-8, iso8859-1, and ascii.", + ) + + parser.add_argument( + "-f", + "--check-filenames", + action="store_true", + default=False, + help="check file names as well", + ) + + parser.add_argument( + "-H", + "--check-hidden", + action="store_true", + default=False, + help='check hidden files and directories (those starting with ".") as well.', + ) + parser.add_argument( + "-A", + "--after-context", + type=int, + metavar="LINES", + help="print LINES of trailing context", + ) + parser.add_argument( + "-B", + "--before-context", + type=int, + metavar="LINES", + help="print LINES of leading context", + ) + parser.add_argument( + "-C", + "--context", + type=int, + metavar="LINES", + help="print LINES of surrounding context", + ) + parser.add_argument( + "--stdin-single-line", + action="store_true", + help="output just a single line for each misspelling in stdin mode", + ) + parser.add_argument("--config", type=str, help="path to config file.") + parser.add_argument("--toml", type=str, help="path to a pyproject.toml file.") + parser.add_argument("files", nargs="*", help="files or directories to check") + + # Parse command line options. + options = parser.parse_args(list(args)) + + # Load config files and look for ``codespell`` options. + cfg_files = ["setup.cfg", ".codespellrc"] + if options.config: + cfg_files.append(options.config) + config = configparser.ConfigParser(interpolation=None) + + # Read toml before other config files. + toml_files = [] + tomllib_raise_error = False + if os.path.isfile("pyproject.toml"): + toml_files.append("pyproject.toml") + if options.toml: + toml_files.append(options.toml) + tomllib_raise_error = True + if toml_files: + if sys.version_info >= (3, 11): + import tomllib + else: + try: + import tomli as tomllib # type: ignore[no-redef] + except ImportError as e: + if tomllib_raise_error: + msg = ( + f"tomllib or tomli are required to read pyproject.toml " + f"but could not be imported, got: {e}" + ) + raise ImportError(msg) from None + tomllib = None # type: ignore[assignment] + if tomllib is not None: + for toml_file in toml_files: + with open(toml_file, "rb") as f: + data = tomllib.load(f).get("tool", {}) + if "codespell" in data: + data["codespell"] = _toml_to_parseconfig(data["codespell"]) + config.read_dict(data) + + # Collect which config files are going to be used + used_cfg_files = [] + for cfg_file in cfg_files: + _cfg = configparser.ConfigParser() + _cfg.read(cfg_file) + if _cfg.has_section("codespell"): + used_cfg_files.append(cfg_file) + + # Use config files + config.read(used_cfg_files) + if config.has_section("codespell"): + # Build a "fake" argv list using option name and value. + cfg_args = [] + for key in config["codespell"]: + # Add option as arg. + cfg_args.append(f"--{key}") + # If value is blank, skip. + val = config["codespell"][key] + if val: + cfg_args.append(val) + + # Parse config file options. + options = parser.parse_args(cfg_args) + + # Re-parse command line options to override config. + options = parser.parse_args(list(args), namespace=options) + + if not options.files: + options.files.append(".") + + return options, parser, used_cfg_files + + +def parse_ignore_words_option(ignore_words_option: List[str]) -> Set[str]: + ignore_words: Set[str] = set() + if ignore_words_option: + for comma_separated_words in ignore_words_option: + ignore_words.update( + word.strip() for word in comma_separated_words.split(",") + ) + return ignore_words + + +def build_exclude_hashes(filename: str, exclude_lines: Set[str]) -> None: + with open(filename, encoding="utf-8") as f: + exclude_lines.update(line.rstrip() for line in f) + + +def build_ignore_words(filename: str, ignore_words: Set[str]) -> None: + with open(filename, encoding="utf-8") as f: + ignore_words.update(line.strip() for line in f) + + +def add_misspelling( + key: str, + data: str, + misspellings: Dict[str, Misspelling], +) -> None: + data = data.strip() + + if "," in data: + fix = False + data, reason = data.rsplit(",", 1) + reason = reason.lstrip() + else: + fix = True + reason = "" + + misspellings[key] = Misspelling(data, fix, reason) + + +def build_dict( + filename: str, + misspellings: Dict[str, Misspelling], + ignore_words: Set[str], +) -> None: + with open(filename, encoding="utf-8") as f: + translate_tables = [(x, str.maketrans(x, y)) for x, y in alt_chars] + for line in f: + [key, data] = line.split("->") + # TODO for now, convert both to lower. Someday we can maybe add + # support for fixing caps. + key = key.lower() + data = data.lower() + if key not in ignore_words: + add_misspelling(key, data, misspellings) + # generate alternative misspellings/fixes + for x, table in translate_tables: + if x in key: + alt_key = key.translate(table) + alt_data = data.translate(table) + if alt_key not in ignore_words: + add_misspelling(alt_key, alt_data, misspellings) + + +def is_hidden(filename: str, check_hidden: bool) -> bool: + bfilename = os.path.basename(filename) + + return bfilename not in ("", ".", "..") and ( + not check_hidden and bfilename[0] == "." + ) + + +def is_text_file(filename: str) -> bool: + with open(filename, mode="rb") as f: + s = f.read(1024) + return b"\x00" not in s + + +def fix_case(word: str, fixword: str) -> str: + if word == word.capitalize(): + return ", ".join(w.strip().capitalize() for w in fixword.split(",")) + if word == word.upper(): + return fixword.upper() + # they are both lower case + # or we don't have any idea + return fixword + + +def ask_for_word_fix( + line: str, + match: Match[str], + misspelling: Misspelling, + interactivity: int, + colors: TermColors, +) -> Tuple[bool, str]: + wrongword = match.group() + if interactivity <= 0: + return misspelling.fix, fix_case(wrongword, misspelling.data) + + line_ui = ( + f"{line[:match.start()]}" + f"{colors.WWORD}{wrongword}{colors.DISABLE}" + f"{line[match.end():]}" + ) + + if misspelling.fix and interactivity & 1: + r = "" + fixword = fix_case(wrongword, misspelling.data) + while not r: + print(f"{line_ui}\t{wrongword} ==> {fixword} (Y/n) ", end="", flush=True) + r = sys.stdin.readline().strip().upper() + if not r: + r = "Y" + if r not in ("Y", "N"): + print("Say 'y' or 'n'") + r = "" + + if r == "N": + misspelling.fix = False + + elif (interactivity & 2) and not misspelling.reason: + # if it is not disabled, i.e. it just has more than one possible fix, + # we ask the user which word to use + + r = "" + opt = [w.strip() for w in misspelling.data.split(",")] + while not r: + print(f"{line_ui} Choose an option (blank for none): ", end="") + for i, o in enumerate(opt): + fixword = fix_case(wrongword, o) + print(f" {i}) {fixword}", end="") + print(": ", end="", flush=True) + + n = sys.stdin.readline().strip() + if not n: + break + + try: + i = int(n) + r = opt[i] + except (ValueError, IndexError): + print("Not a valid option\n") + + if r: + misspelling.fix = True + misspelling.data = r + + return misspelling.fix, fix_case(wrongword, misspelling.data) + + +def print_context( + lines: List[str], + index: int, + context: Tuple[int, int], +) -> None: + # context = (context_before, context_after) + for i in range(index - context[0], index + context[1] + 1): + if 0 <= i < len(lines): + print(f"{'>' if i == index else ':'} {lines[i].rstrip()}") + + +def _ignore_word_sub( + text: str, + ignore_word_regex: Optional[Pattern[str]], +) -> str: + if ignore_word_regex: + text = ignore_word_regex.sub(" ", text) + return text + + +def extract_words( + text: str, + word_regex: Pattern[str], + ignore_word_regex: Optional[Pattern[str]], +) -> List[str]: + return word_regex.findall(_ignore_word_sub(text, ignore_word_regex)) + + +def extract_words_iter( + text: str, + word_regex: Pattern[str], + ignore_word_regex: Optional[Pattern[str]], +) -> List[Match[str]]: + return list(word_regex.finditer(_ignore_word_sub(text, ignore_word_regex))) + + +def apply_uri_ignore_words( + check_matches: List[Match[str]], + line: str, + word_regex: Pattern[str], + ignore_word_regex: Optional[Pattern[str]], + uri_regex: Pattern[str], + uri_ignore_words: Set[str], +) -> List[Match[str]]: + if not uri_ignore_words: + return check_matches + for uri in uri_regex.findall(line): + for uri_word in extract_words(uri, word_regex, ignore_word_regex): + if uri_word in uri_ignore_words: + # determine/remove only the first among matches + for i, match in enumerate(check_matches): + if match.group() == uri_word: + check_matches = check_matches[:i] + check_matches[i + 1 :] + break + return check_matches + + +def parse_file( + filename: str, + colors: TermColors, + summary: Optional[Summary], + misspellings: Dict[str, Misspelling], + exclude_lines: Set[str], + file_opener: FileOpener, + word_regex: Pattern[str], + ignore_word_regex: Optional[Pattern[str]], + uri_regex: Pattern[str], + uri_ignore_words: Set[str], + context: Optional[Tuple[int, int]], + options: argparse.Namespace, +) -> int: + bad_count = 0 + lines = None + changed = False + + if filename == "-": + f = sys.stdin + encoding = "utf-8" + lines = f.readlines() + else: + if options.check_filenames: + bad_count = count_bed_spell(bad_count, colors, filename, ignore_word_regex, misspellings, options, summary, + word_regex) + + # ignore irregular files + if not os.path.isfile(filename): + return bad_count + + try: + text = is_text_file(filename) + except PermissionError as e: + print(f"WARNING: {e.strerror}: {filename}", file=sys.stderr) + return bad_count + except OSError: + return bad_count + + if not text: + if not options.quiet_level & QuietLevels.BINARY_FILE: + print(f"WARNING: Binary file: {filename}", file=sys.stderr) + return bad_count + try: + lines, encoding = file_opener.open(filename) + except OSError: + return bad_count + + for i, line in enumerate(lines): + if line.rstrip() in exclude_lines: + continue + + fixed_words = set() + asked_for = set() + + # If all URI spelling errors will be ignored, erase any URI before + # extracting words. Otherwise, apply ignores after extracting words. + # This ensures that if a URI ignore word occurs both inside a URI and + # outside, it will still be a spelling error. + if "*" in uri_ignore_words: + line = uri_regex.sub(" ", line) + check_matches = extract_words_iter(line, word_regex, ignore_word_regex) + if "*" not in uri_ignore_words: + check_matches = apply_uri_ignore_words( + check_matches, + line, + word_regex, + ignore_word_regex, + uri_regex, + uri_ignore_words, + ) + bad_count, changed = count_matches(asked_for, bad_count, changed, check_matches, colors, context, filename, + fixed_words, i, line, lines, misspellings, options, summary) + + if changed: + if filename == "-": + print("---") + for line in lines: + print(line, end="") + else: + if not options.quiet_level & QuietLevels.FIXES: + print( + f"{colors.FWORD}FIXED:{colors.DISABLE} {filename}", + file=sys.stderr, + ) + with open(filename, "w", encoding=encoding, newline="") as f: + f.writelines(lines) + return bad_count + + +def count_matches(asked_for, bad_count, changed, check_matches, colors, context, filename, fixed_words, i, line, lines, + misspellings, options, summary): + for match in check_matches: + word = match.group() + lword = word.lower() + if lword in misspellings: + # Sometimes we find a 'misspelling' which is actually a valid word + # preceded by a string escape sequence. Ignore such cases as + # they're usually false alarms; see issue #17 among others. + char_before_idx = match.start() - 1 + if ( + char_before_idx >= 0 + and line[char_before_idx] == "\\" + # bell, backspace, formfeed, newline, carriage-return, tab, vtab. + and word.startswith(("a", "b", "f", "n", "r", "t", "v")) + and lword[1:] not in misspellings + ): + continue + + context_shown = False + fix = misspellings[lword].fix + fixword = fix_case(word, misspellings[lword].data) + + if options.interactive and lword not in asked_for: + if context is not None: + context_shown = True + print_context(lines, i, context) + fix, fixword = ask_for_word_fix( + lines[i], + match, + misspellings[lword], + options.interactive, + colors=colors, + ) + asked_for.add(lword) + + if summary and fix: + summary.update(lword) + + if word in fixed_words: # can skip because of re.sub below + continue + + if options.write_changes and fix: + changed = True + lines[i] = re.sub(rf"\b{word}\b", fixword, lines[i]) + fixed_words.add(word) + continue + + # otherwise warning was explicitly set by interactive mode + if ( + options.interactive & 2 + and not fix + and not misspellings[lword].reason + ): + continue + + cfilename = f"{colors.FILE}{filename}{colors.DISABLE}" + cline = f"{colors.FILE}{i + 1}{colors.DISABLE}" + cwrongword = f"{colors.WWORD}{word}{colors.DISABLE}" + crightword = f"{colors.FWORD}{fixword}{colors.DISABLE}" + + reason = misspellings[lword].reason + if reason: + if options.quiet_level & QuietLevels.DISABLED_FIXES: + continue + creason = f" | {colors.FILE}{reason}{colors.DISABLE}" + else: + if options.quiet_level & QuietLevels.NON_AUTOMATIC_FIXES: + continue + creason = "" + + # If we get to this point (uncorrected error) we should change + # our bad_count and thus return value + bad_count += 1 + + if (not context_shown) and (context is not None): + print_context(lines, i, context) + if filename != "-": + print( + f"{cfilename}:{cline}: {cwrongword} " + f"==> {crightword}{creason}" + ) + elif options.stdin_single_line: + print(f"{cline}: {cwrongword} ==> {crightword}{creason}") + else: + print( + f"{cline}: {line.strip()}\n\t{cwrongword} " + f"==> {crightword}{creason}" + ) + return bad_count, changed + + +def count_bed_spell(bad_count, colors, filename, ignore_word_regex, misspellings, options, summary, word_regex): + for word in extract_words(filename, word_regex, ignore_word_regex): + lword = word.lower() + if lword not in misspellings: + continue + fix = misspellings[lword].fix + fixword = fix_case(word, misspellings[lword].data) + + if summary and fix: + summary.update(lword) + + cfilename = f"{colors.FILE}{filename}{colors.DISABLE}" + cwrongword = f"{colors.WWORD}{word}{colors.DISABLE}" + crightword = f"{colors.FWORD}{fixword}{colors.DISABLE}" + + reason = misspellings[lword].reason + if reason: + if options.quiet_level & QuietLevels.DISABLED_FIXES: + continue + creason = f" | {colors.FILE}{reason}{colors.DISABLE}" + else: + if options.quiet_level & QuietLevels.NON_AUTOMATIC_FIXES: + continue + creason = "" + + bad_count += 1 + + print(f"{cfilename}: {cwrongword} ==> {crightword}{creason}") + return bad_count + + +def _script_main() -> int: + """Wrap to main() for setuptools.""" + return main(*sys.argv[1:]) + + +def main(*args: str) -> int: + """Contains flow control""" + try: + options, parser, used_cfg_files = parse_options(args) + except configparser.Error as e: + print( + f"ERROR: ill-formed config file: {e.message}", + file=sys.stderr, + ) + return EX_CONFIG + + # Report used config files + if not options.quiet_level & QuietLevels.CONFIG_FILES: + if len(used_cfg_files) > 0: + print("Used config files:") + for ifile, cfg_file in enumerate(used_cfg_files, start=1): + print(f" {ifile}: {cfg_file}") + + if options.regex and options.write_changes: + print( + "ERROR: --write-changes cannot be used together with --regex", + file=sys.stderr, + ) + parser.print_help() + return EX_USAGE + word_regex = options.regex or word_regex_def + try: + word_regex = re.compile(word_regex) + except re.error as e: + print(f'ERROR: invalid --regex "{word_regex}" ({e})', file=sys.stderr) + parser.print_help() + return EX_USAGE + + if options.ignore_regex: + try: + ignore_word_regex = re.compile(options.ignore_regex) + except re.error as e: + print( + f'ERROR: invalid --ignore-regex "{options.ignore_regex}" ({e})', + file=sys.stderr, + ) + parser.print_help() + return EX_USAGE + else: + ignore_word_regex = None + + ignore_words_files = options.ignore_words or [] + ignore_words = parse_ignore_words_option(options.ignore_words_list) + for ignore_words_file in ignore_words_files: + if not os.path.isfile(ignore_words_file): + print( + f"ERROR: cannot find ignore-words file: {ignore_words_file}", + file=sys.stderr, + ) + parser.print_help() + return EX_USAGE + build_ignore_words(ignore_words_file, ignore_words) + + uri_regex = options.uri_regex or uri_regex_def + try: + uri_regex = re.compile(uri_regex) + except re.error as e: + print( + f'ERROR: invalid --uri-regex "{uri_regex}" ({e})', + file=sys.stderr, + ) + parser.print_help() + return EX_USAGE + uri_ignore_words = parse_ignore_words_option(options.uri_ignore_words_list) + + dictionaries = options.dictionary or ["-"] + + use_dictionaries = [] + for dictionary in dictionaries: + if dictionary == "-": + # figure out which builtin dictionaries to use + use = sorted(set(options.builtin.split(","))) + for u in use: + for builtin in _builtin_dictionaries: + if builtin[0] == u: + use_dictionaries.append( + os.path.join(_data_root, f"dictionary{builtin[2]}.txt") + ) + break + else: + print( + f"ERROR: Unknown builtin dictionary: {u}", + file=sys.stderr, + ) + parser.print_help() + return EX_USAGE + else: + if not os.path.isfile(dictionary): + print( + f"ERROR: cannot find dictionary file: {dictionary}", + file=sys.stderr, + ) + parser.print_help() + return EX_USAGE + use_dictionaries.append(dictionary) + misspellings: Dict[str, Misspelling] = {} + for dictionary in use_dictionaries: + build_dict(dictionary, misspellings, ignore_words) + colors = TermColors() + if not options.colors: + colors.disable() + + if options.summary: + summary = Summary() + else: + summary = None + + context = None + if options.context is not None: + if (options.before_context is not None) or (options.after_context is not None): + print( + "ERROR: --context/-C cannot be used together with " + "--context-before/-B or --context-after/-A", + file=sys.stderr, + ) + parser.print_help() + return EX_USAGE + context_both = max(0, options.context) + context = (context_both, context_both) + elif (options.before_context is not None) or (options.after_context is not None): + context_before = 0 + context_after = 0 + if options.before_context is not None: + context_before = max(0, options.before_context) + if options.after_context is not None: + context_after = max(0, options.after_context) + context = (context_before, context_after) + + exclude_lines: Set[str] = set() + if options.exclude_file: + build_exclude_hashes(options.exclude_file, exclude_lines) + + file_opener = FileOpener(options.hard_encoding_detection, options.quiet_level) + + glob_match = GlobMatch(options.skip) + try: + glob_match.match("/random/path") # does not need a real path + except re.error: + print( + "ERROR: --skip/-S has been fed an invalid glob, " + "try escaping special characters", + file=sys.stderr, + ) + return EX_USAGE + + bad_count = 0 + for filename in sorted(options.files): + # ignore hidden files + if is_hidden(filename, options.check_hidden): + continue + + if os.path.isdir(filename): + for root, dirs, files in os.walk(filename): + if glob_match.match(root): # skip (absolute) directories + dirs.clear() + continue + if is_hidden(root, options.check_hidden): # dir itself hidden + continue + for file_ in sorted(files): + # ignore hidden files in directories + if is_hidden(file_, options.check_hidden): + continue + if glob_match.match(file_): # skip files + continue + fname = os.path.join(root, file_) + if glob_match.match(fname): # skip paths + continue + bad_count += parse_file( + fname, + colors, + summary, + misspellings, + exclude_lines, + file_opener, + word_regex, + ignore_word_regex, + uri_regex, + uri_ignore_words, + context, + options, + ) + + # skip (relative) directories + dirs[:] = [ + dir_ + for dir_ in dirs + if not glob_match.match(dir_) + and not is_hidden(dir_, options.check_hidden) + ] + + elif not glob_match.match(filename): # skip files + bad_count += parse_file( + filename, + colors, + summary, + misspellings, + exclude_lines, + file_opener, + word_regex, + ignore_word_regex, + uri_regex, + uri_ignore_words, + context, + options, + ) + + if summary: + print("\n-------8<-------\nSUMMARY:") + print(summary) + if options.count: + print(bad_count, file=sys.stderr) + return EX_DATAERR if bad_count else EX_OK From ad1dd1689e6d8b2b6148922db8668ccf32c36b6d Mon Sep 17 00:00:00 2001 From: Siqi Wang <66684937+wangsiqidahaoren@users.noreply.github.com> Date: Mon, 8 Jan 2024 10:03:05 +0800 Subject: [PATCH 2/3] extract method for long method smell --- codespell_lib/_codespell.py | 229 +++++++++++++++++++----------------- 1 file changed, 121 insertions(+), 108 deletions(-) diff --git a/codespell_lib/_codespell.py b/codespell_lib/_codespell.py index 9eeb8c3ac4..b6de09e965 100644 --- a/codespell_lib/_codespell.py +++ b/codespell_lib/_codespell.py @@ -884,33 +884,8 @@ def parse_file( lines = f.readlines() else: if options.check_filenames: - for word in extract_words(filename, word_regex, ignore_word_regex): - lword = word.lower() - if lword not in misspellings: - continue - fix = misspellings[lword].fix - fixword = fix_case(word, misspellings[lword].data) - - if summary and fix: - summary.update(lword) - - cfilename = f"{colors.FILE}{filename}{colors.DISABLE}" - cwrongword = f"{colors.WWORD}{word}{colors.DISABLE}" - crightword = f"{colors.FWORD}{fixword}{colors.DISABLE}" - - reason = misspellings[lword].reason - if reason: - if options.quiet_level & QuietLevels.DISABLED_FIXES: - continue - creason = f" | {colors.FILE}{reason}{colors.DISABLE}" - else: - if options.quiet_level & QuietLevels.NON_AUTOMATIC_FIXES: - continue - creason = "" - - bad_count += 1 - - print(f"{cfilename}: {cwrongword} ==> {crightword}{creason}") + bad_count = count_bed_spell(bad_count, colors, filename, ignore_word_regex, misspellings, options, summary, + word_regex) # ignore irregular files if not os.path.isfile(filename): @@ -956,107 +931,145 @@ def parse_file( uri_regex, uri_ignore_words, ) - for match in check_matches: - word = match.group() - lword = word.lower() - if lword in misspellings: - # Sometimes we find a 'misspelling' which is actually a valid word - # preceded by a string escape sequence. Ignore such cases as - # they're usually false alarms; see issue #17 among others. - char_before_idx = match.start() - 1 - if ( + bad_count, changed = count_matches(asked_for, bad_count, changed, check_matches, colors, context, filename, + fixed_words, i, line, lines, misspellings, options, summary) + + if changed: + if filename == "-": + print("---") + for line in lines: + print(line, end="") + else: + if not options.quiet_level & QuietLevels.FIXES: + print( + f"{colors.FWORD}FIXED:{colors.DISABLE} {filename}", + file=sys.stderr, + ) + with open(filename, "w", encoding=encoding, newline="") as f: + f.writelines(lines) + return bad_count + + +def count_matches(asked_for, bad_count, changed, check_matches, colors, context, filename, fixed_words, i, line, lines, + misspellings, options, summary): + for match in check_matches: + word = match.group() + lword = word.lower() + if lword in misspellings: + # Sometimes we find a 'misspelling' which is actually a valid word + # preceded by a string escape sequence. Ignore such cases as + # they're usually false alarms; see issue #17 among others. + char_before_idx = match.start() - 1 + if ( char_before_idx >= 0 and line[char_before_idx] == "\\" # bell, backspace, formfeed, newline, carriage-return, tab, vtab. and word.startswith(("a", "b", "f", "n", "r", "t", "v")) and lword[1:] not in misspellings - ): - continue + ): + continue - context_shown = False - fix = misspellings[lword].fix - fixword = fix_case(word, misspellings[lword].data) - - if options.interactive and lword not in asked_for: - if context is not None: - context_shown = True - print_context(lines, i, context) - fix, fixword = ask_for_word_fix( - lines[i], - match, - misspellings[lword], - options.interactive, - colors=colors, - ) - asked_for.add(lword) + context_shown = False + fix = misspellings[lword].fix + fixword = fix_case(word, misspellings[lword].data) + + if options.interactive and lword not in asked_for: + if context is not None: + context_shown = True + print_context(lines, i, context) + fix, fixword = ask_for_word_fix( + lines[i], + match, + misspellings[lword], + options.interactive, + colors=colors, + ) + asked_for.add(lword) - if summary and fix: - summary.update(lword) + if summary and fix: + summary.update(lword) - if word in fixed_words: # can skip because of re.sub below - continue + if word in fixed_words: # can skip because of re.sub below + continue - if options.write_changes and fix: - changed = True - lines[i] = re.sub(rf"\b{word}\b", fixword, lines[i]) - fixed_words.add(word) - continue + if options.write_changes and fix: + changed = True + lines[i] = re.sub(rf"\b{word}\b", fixword, lines[i]) + fixed_words.add(word) + continue - # otherwise warning was explicitly set by interactive mode - if ( + # otherwise warning was explicitly set by interactive mode + if ( options.interactive & 2 and not fix and not misspellings[lword].reason - ): + ): + continue + + cfilename = f"{colors.FILE}{filename}{colors.DISABLE}" + cline = f"{colors.FILE}{i + 1}{colors.DISABLE}" + cwrongword = f"{colors.WWORD}{word}{colors.DISABLE}" + crightword = f"{colors.FWORD}{fixword}{colors.DISABLE}" + + reason = misspellings[lword].reason + if reason: + if options.quiet_level & QuietLevels.DISABLED_FIXES: + continue + creason = f" | {colors.FILE}{reason}{colors.DISABLE}" + else: + if options.quiet_level & QuietLevels.NON_AUTOMATIC_FIXES: continue + creason = "" - cfilename = f"{colors.FILE}{filename}{colors.DISABLE}" - cline = f"{colors.FILE}{i + 1}{colors.DISABLE}" - cwrongword = f"{colors.WWORD}{word}{colors.DISABLE}" - crightword = f"{colors.FWORD}{fixword}{colors.DISABLE}" + # If we get to this point (uncorrected error) we should change + # our bad_count and thus return value + bad_count += 1 - reason = misspellings[lword].reason - if reason: - if options.quiet_level & QuietLevels.DISABLED_FIXES: - continue - creason = f" | {colors.FILE}{reason}{colors.DISABLE}" - else: - if options.quiet_level & QuietLevels.NON_AUTOMATIC_FIXES: - continue - creason = "" + if (not context_shown) and (context is not None): + print_context(lines, i, context) + if filename != "-": + print( + f"{cfilename}:{cline}: {cwrongword} " + f"==> {crightword}{creason}" + ) + elif options.stdin_single_line: + print(f"{cline}: {cwrongword} ==> {crightword}{creason}") + else: + print( + f"{cline}: {line.strip()}\n\t{cwrongword} " + f"==> {crightword}{creason}" + ) + return bad_count, changed - # If we get to this point (uncorrected error) we should change - # our bad_count and thus return value - bad_count += 1 - if (not context_shown) and (context is not None): - print_context(lines, i, context) - if filename != "-": - print( - f"{cfilename}:{cline}: {cwrongword} " - f"==> {crightword}{creason}" - ) - elif options.stdin_single_line: - print(f"{cline}: {cwrongword} ==> {crightword}{creason}") - else: - print( - f"{cline}: {line.strip()}\n\t{cwrongword} " - f"==> {crightword}{creason}" - ) +def count_bed_spell(bad_count, colors, filename, ignore_word_regex, misspellings, options, summary, word_regex): + for word in extract_words(filename, word_regex, ignore_word_regex): + lword = word.lower() + if lword not in misspellings: + continue + fix = misspellings[lword].fix + fixword = fix_case(word, misspellings[lword].data) - if changed: - if filename == "-": - print("---") - for line in lines: - print(line, end="") + if summary and fix: + summary.update(lword) + + cfilename = f"{colors.FILE}{filename}{colors.DISABLE}" + cwrongword = f"{colors.WWORD}{word}{colors.DISABLE}" + crightword = f"{colors.FWORD}{fixword}{colors.DISABLE}" + + reason = misspellings[lword].reason + if reason: + if options.quiet_level & QuietLevels.DISABLED_FIXES: + continue + creason = f" | {colors.FILE}{reason}{colors.DISABLE}" else: - if not options.quiet_level & QuietLevels.FIXES: - print( - f"{colors.FWORD}FIXED:{colors.DISABLE} {filename}", - file=sys.stderr, - ) - with open(filename, "w", encoding=encoding, newline="") as f: - f.writelines(lines) + if options.quiet_level & QuietLevels.NON_AUTOMATIC_FIXES: + continue + creason = "" + + bad_count += 1 + + print(f"{cfilename}: {cwrongword} ==> {crightword}{creason}") return bad_count From 0c69c869c63cb3a09fe1a9e72f306f300784c042 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 8 Jan 2024 02:04:20 +0000 Subject: [PATCH 3/3] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- _codespell.py | 78 ++++++++++++++++++++++++++++--------- codespell_lib/_codespell.py | 78 ++++++++++++++++++++++++++++--------- 2 files changed, 118 insertions(+), 38 deletions(-) diff --git a/_codespell.py b/_codespell.py index b6de09e965..5431b0c952 100644 --- a/_codespell.py +++ b/_codespell.py @@ -884,8 +884,16 @@ def parse_file( lines = f.readlines() else: if options.check_filenames: - bad_count = count_bed_spell(bad_count, colors, filename, ignore_word_regex, misspellings, options, summary, - word_regex) + bad_count = count_bed_spell( + bad_count, + colors, + filename, + ignore_word_regex, + misspellings, + options, + summary, + word_regex, + ) # ignore irregular files if not os.path.isfile(filename): @@ -931,8 +939,22 @@ def parse_file( uri_regex, uri_ignore_words, ) - bad_count, changed = count_matches(asked_for, bad_count, changed, check_matches, colors, context, filename, - fixed_words, i, line, lines, misspellings, options, summary) + bad_count, changed = count_matches( + asked_for, + bad_count, + changed, + check_matches, + colors, + context, + filename, + fixed_words, + i, + line, + lines, + misspellings, + options, + summary, + ) if changed: if filename == "-": @@ -950,8 +972,22 @@ def parse_file( return bad_count -def count_matches(asked_for, bad_count, changed, check_matches, colors, context, filename, fixed_words, i, line, lines, - misspellings, options, summary): +def count_matches( + asked_for, + bad_count, + changed, + check_matches, + colors, + context, + filename, + fixed_words, + i, + line, + lines, + misspellings, + options, + summary, +): for match in check_matches: word = match.group() lword = word.lower() @@ -961,11 +997,11 @@ def count_matches(asked_for, bad_count, changed, check_matches, colors, context, # they're usually false alarms; see issue #17 among others. char_before_idx = match.start() - 1 if ( - char_before_idx >= 0 - and line[char_before_idx] == "\\" - # bell, backspace, formfeed, newline, carriage-return, tab, vtab. - and word.startswith(("a", "b", "f", "n", "r", "t", "v")) - and lword[1:] not in misspellings + char_before_idx >= 0 + and line[char_before_idx] == "\\" + # bell, backspace, formfeed, newline, carriage-return, tab, vtab. + and word.startswith(("a", "b", "f", "n", "r", "t", "v")) + and lword[1:] not in misspellings ): continue @@ -999,11 +1035,7 @@ def count_matches(asked_for, bad_count, changed, check_matches, colors, context, continue # otherwise warning was explicitly set by interactive mode - if ( - options.interactive & 2 - and not fix - and not misspellings[lword].reason - ): + if options.interactive & 2 and not fix and not misspellings[lword].reason: continue cfilename = f"{colors.FILE}{filename}{colors.DISABLE}" @@ -1029,8 +1061,7 @@ def count_matches(asked_for, bad_count, changed, check_matches, colors, context, print_context(lines, i, context) if filename != "-": print( - f"{cfilename}:{cline}: {cwrongword} " - f"==> {crightword}{creason}" + f"{cfilename}:{cline}: {cwrongword} " f"==> {crightword}{creason}" ) elif options.stdin_single_line: print(f"{cline}: {cwrongword} ==> {crightword}{creason}") @@ -1042,7 +1073,16 @@ def count_matches(asked_for, bad_count, changed, check_matches, colors, context, return bad_count, changed -def count_bed_spell(bad_count, colors, filename, ignore_word_regex, misspellings, options, summary, word_regex): +def count_bed_spell( + bad_count, + colors, + filename, + ignore_word_regex, + misspellings, + options, + summary, + word_regex, +): for word in extract_words(filename, word_regex, ignore_word_regex): lword = word.lower() if lword not in misspellings: diff --git a/codespell_lib/_codespell.py b/codespell_lib/_codespell.py index b6de09e965..5431b0c952 100644 --- a/codespell_lib/_codespell.py +++ b/codespell_lib/_codespell.py @@ -884,8 +884,16 @@ def parse_file( lines = f.readlines() else: if options.check_filenames: - bad_count = count_bed_spell(bad_count, colors, filename, ignore_word_regex, misspellings, options, summary, - word_regex) + bad_count = count_bed_spell( + bad_count, + colors, + filename, + ignore_word_regex, + misspellings, + options, + summary, + word_regex, + ) # ignore irregular files if not os.path.isfile(filename): @@ -931,8 +939,22 @@ def parse_file( uri_regex, uri_ignore_words, ) - bad_count, changed = count_matches(asked_for, bad_count, changed, check_matches, colors, context, filename, - fixed_words, i, line, lines, misspellings, options, summary) + bad_count, changed = count_matches( + asked_for, + bad_count, + changed, + check_matches, + colors, + context, + filename, + fixed_words, + i, + line, + lines, + misspellings, + options, + summary, + ) if changed: if filename == "-": @@ -950,8 +972,22 @@ def parse_file( return bad_count -def count_matches(asked_for, bad_count, changed, check_matches, colors, context, filename, fixed_words, i, line, lines, - misspellings, options, summary): +def count_matches( + asked_for, + bad_count, + changed, + check_matches, + colors, + context, + filename, + fixed_words, + i, + line, + lines, + misspellings, + options, + summary, +): for match in check_matches: word = match.group() lword = word.lower() @@ -961,11 +997,11 @@ def count_matches(asked_for, bad_count, changed, check_matches, colors, context, # they're usually false alarms; see issue #17 among others. char_before_idx = match.start() - 1 if ( - char_before_idx >= 0 - and line[char_before_idx] == "\\" - # bell, backspace, formfeed, newline, carriage-return, tab, vtab. - and word.startswith(("a", "b", "f", "n", "r", "t", "v")) - and lword[1:] not in misspellings + char_before_idx >= 0 + and line[char_before_idx] == "\\" + # bell, backspace, formfeed, newline, carriage-return, tab, vtab. + and word.startswith(("a", "b", "f", "n", "r", "t", "v")) + and lword[1:] not in misspellings ): continue @@ -999,11 +1035,7 @@ def count_matches(asked_for, bad_count, changed, check_matches, colors, context, continue # otherwise warning was explicitly set by interactive mode - if ( - options.interactive & 2 - and not fix - and not misspellings[lword].reason - ): + if options.interactive & 2 and not fix and not misspellings[lword].reason: continue cfilename = f"{colors.FILE}{filename}{colors.DISABLE}" @@ -1029,8 +1061,7 @@ def count_matches(asked_for, bad_count, changed, check_matches, colors, context, print_context(lines, i, context) if filename != "-": print( - f"{cfilename}:{cline}: {cwrongword} " - f"==> {crightword}{creason}" + f"{cfilename}:{cline}: {cwrongword} " f"==> {crightword}{creason}" ) elif options.stdin_single_line: print(f"{cline}: {cwrongword} ==> {crightword}{creason}") @@ -1042,7 +1073,16 @@ def count_matches(asked_for, bad_count, changed, check_matches, colors, context, return bad_count, changed -def count_bed_spell(bad_count, colors, filename, ignore_word_regex, misspellings, options, summary, word_regex): +def count_bed_spell( + bad_count, + colors, + filename, + ignore_word_regex, + misspellings, + options, + summary, + word_regex, +): for word in extract_words(filename, word_regex, ignore_word_regex): lword = word.lower() if lword not in misspellings: