From 0119f5d8e5deb1f048a8320c8bc7c9c56c8acd7e Mon Sep 17 00:00:00 2001 From: olli <144932831+OLILHR@users.noreply.github.com> Date: Mon, 5 Aug 2024 15:18:46 +0200 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8=20Prompt=20for=20input,=20output=20an?= =?UTF-8?q?d=20filters=20in=20case=20no=20paths=20are=20provided=20(#6)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Prompt for input, output and filters in case no path is provided * Fix encoding error during consolidation * Add input/output path flags * Use lazy logging and more specific file error handling * Fix formatting * Add default filter * Fix ignore list's implicit-str-concat --- .alloyignore.mock | 1 - .gitignore | 2 +- alloy/collector.py | 24 +++++++--------- alloy/filter.py | 72 +++++++++++++++++++++++++++++++++++----------- alloy/main.py | 32 +++++++++++++++------ 5 files changed, 91 insertions(+), 40 deletions(-) diff --git a/.alloyignore.mock b/.alloyignore.mock index 386ed16..1530ce3 100644 --- a/.alloyignore.mock +++ b/.alloyignore.mock @@ -1,4 +1,3 @@ # required for unittests - .png .svg \ No newline at end of file diff --git a/.gitignore b/.gitignore index 2f92609..3fcd7d9 100644 --- a/.gitignore +++ b/.gitignore @@ -14,4 +14,4 @@ Thumbs.db # alloy specific files .alloyignore -codebase.md +alloy.md diff --git a/alloy/collector.py b/alloy/collector.py index 437a241..275d43a 100644 --- a/alloy/collector.py +++ b/alloy/collector.py @@ -7,12 +7,6 @@ def consolidate(path, extensions=None): - """ - Consolidates the content of all files from a given directory into a single markdown file. Any files, directories and - extensions specified in .alloyignore are excluded. If optional file extensions are provided, only files with these - extensions will be included in the consolidated markdown file, regardless of whether they are listed in .alloyignore - or not. - """ project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) exclude_files = read_alloyignore(project_root, extensions) codebase = "" @@ -29,12 +23,16 @@ def consolidate(path, extensions=None): _, file_extension = os.path.splitext(file) try: - with open(file_path, "r", encoding="utf-8") as p: - content = p.read().rstrip() - except UnicodeDecodeError as e: - _logger.error(str(e)) - continue - - codebase += f"\n#### {relative_path}\n\n```{file_extension[1:]}\n{content}\n```\n" + with open(file_path, "r", encoding="utf-8") as f: + content = f.read() + except UnicodeDecodeError: + try: + with open(file_path, "r", encoding="iso-8859-1") as f: + content = f.read() + except (OSError, IOError) as e: + _logger.warning("Unable to read %s: %s. Skipping this file.", file_path, str(e)) + continue + + codebase += f"\n#### {relative_path}\n\n```{file_extension[1:]}\n{content.rstrip()}\n```\n" return codebase diff --git a/alloy/filter.py b/alloy/filter.py index 4b3c14f..98e473b 100644 --- a/alloy/filter.py +++ b/alloy/filter.py @@ -1,21 +1,25 @@ import os +def ignore_patterns(file_path): + ignore_list = [] + with open(file_path, "r", encoding="utf-8") as f: + for line in f: + line = line.strip() + if line and not line.startswith("#"): # ignore comments in .alloyignore and DEFAULT_IGNORE_LIST + ignore_list.append(line) + return ignore_list + + def read_alloyignore(project_root, extension_filter): """ Excludes all files, extensions and directories specified in .alloyignore, located inside the root directory. """ alloyignore = os.path.join(project_root, ".alloyignore") + default_ignore_list = DEFAULT_IGNORE_LIST.copy() - if not os.path.exists(alloyignore): - return lambda _: False - - ignore_list = [] - with open(alloyignore, "r", encoding="utf-8") as f: - for line in f: - line = line.strip() - if line and not line.startswith("#"): - ignore_list.append(line) # ignore comments in .alloyignore + if os.path.exists(alloyignore): + default_ignore_list.extend(ignore_patterns(alloyignore)) # pylint: disable=too-many-return-statements def exclude_files(file_path): @@ -25,20 +29,19 @@ def exclude_files(file_path): if file_extension[1:] in extension_filter: return False - for pattern in ignore_list: + for pattern in default_ignore_list: pattern = pattern.replace(os.sep, "/") if pattern.startswith("/"): # covers absolute paths from the root if file_path.startswith(pattern[1:]): return True elif pattern.endswith("/"): # ignores certain directories - if any(part == pattern[:-1] for part in file_path.split(os.sep)): - return True - elif pattern.startswith("*."): # ignores certain file extensions - if file_path.endswith(pattern[1:]): - return True - elif pattern.endswith("*"): # ignores certain files with depending on their prefixes - if os.path.basename(file_path).startswith(pattern[:-1]): + if any(part == pattern[:-1] for part in file_path.split("/")): return True + elif "*" in pattern: # handle wildcard patterns + parts = pattern.split("*") + if len(parts) == 2: + if file_path.startswith(parts[0]) and file_path.endswith(parts[1]): + return True elif pattern in file_path or pattern == os.path.basename(file_path): return True return False @@ -66,3 +69,38 @@ def parse_extensions(_csx, _param, value): if not value: return None return [ext.strip() for item in value for ext in item.split(",")] + + +DEFAULT_IGNORE_LIST = [ + ".cache", + ".coverage", + "dist", + ".DS_Store", + ".env", + "*.env", + ".env.*", + ".git", + ".github", + ".gitignore", + ".gitattributes", + ".gitmodules", + ".idea", + "Thumbs.db", + ".venv", + ".vscode", + # python + "__pycache__", + "*.pyc", + ".pytest_cache", + "py.typed", + ".mypy_cache", + ".tox", + # node.js + "node_modules", + "package.json", + "package-lock.json", + "npm-debug.log", + # alloy specific files + ".alloyignore", + "alloy.md", +] diff --git a/alloy/main.py b/alloy/main.py index 6b2e093..b4fa728 100644 --- a/alloy/main.py +++ b/alloy/main.py @@ -14,26 +14,42 @@ @click.command() -@click.argument("path", type=click.Path(exists=True)) +@click.option("-i", "--input-path", type=click.Path(exists=True), help="Input path for the codebase") +@click.option("-o", "--output-path", type=click.Path(), help="Output path for the generated markdown") @click.option( "--filter", "-f", "extensions", callback=parse_extensions, multiple=True, - help="Filter files by extension via an optional '-f' flag, for instance: -f py,json,yml", + help="OPTIONAL FILTERING BY EXTENSIONS; FOR INSTANCE: -f py,json", # consolidates only .py and .json files ) -def generate_markdown(path, extensions): +def generate_markdown(input_path, output_path, extensions): + project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + current_dir = os.getcwd() + + if input_path is None: + input_path = click.prompt("INPUT PATH:", type=click.Path(exists=True), default=current_dir) + else: + input_path = os.path.abspath(os.path.join(current_dir, input_path)) + + if output_path is None: + output_path = click.prompt("OUTPUT PATH:", type=click.Path(), default=project_root) + else: + output_path = os.path.abspath(os.path.join(current_dir, output_path)) + extensions = list(extensions) if extensions else None - markdown_content = consolidate(path, extensions) - project_root = os.path.dirname(os.path.abspath(__file__)) - output_file = os.path.join(project_root, "../codebase.md") + + markdown_content = consolidate(input_path, extensions) + output_file = os.path.join(output_path, "alloy.md") + + os.makedirs(output_path, exist_ok=True) with open(output_file, "w", encoding="utf-8") as f: f.write(markdown_content) - _logger.info("Markdown file generated at %s", output_file) + _logger.info("CODEBASE CONSOLIDATED AT %s", output_file) if __name__ == "__main__": - generate_markdown() # pylint: disable=no-value-for-parameter + generate_markdown.main()