Skip to content

Commit

Permalink
✨ Prompt for input, output and filters in case no paths are provided (#6
Browse files Browse the repository at this point in the history
)

* Prompt for input, output and filters in case no path is provided

* Fix encoding error during consolidation

* Add input/output path flags

* Use lazy logging and more specific file error handling

* Fix formatting

* Add default filter

* Fix ignore list's implicit-str-concat
  • Loading branch information
OLILHR authored Aug 5, 2024
1 parent ecf95bf commit 0119f5d
Show file tree
Hide file tree
Showing 5 changed files with 91 additions and 40 deletions.
1 change: 0 additions & 1 deletion .alloyignore.mock
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# required for unittests

.png
.svg
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,4 @@ Thumbs.db

# alloy specific files
.alloyignore
codebase.md
alloy.md
24 changes: 11 additions & 13 deletions alloy/collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,6 @@


def consolidate(path, extensions=None):
"""
Consolidates the content of all files from a given directory into a single markdown file. Any files, directories and
extensions specified in .alloyignore are excluded. If optional file extensions are provided, only files with these
extensions will be included in the consolidated markdown file, regardless of whether they are listed in .alloyignore
or not.
"""
project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
exclude_files = read_alloyignore(project_root, extensions)
codebase = ""
Expand All @@ -29,12 +23,16 @@ def consolidate(path, extensions=None):
_, file_extension = os.path.splitext(file)

try:
with open(file_path, "r", encoding="utf-8") as p:
content = p.read().rstrip()
except UnicodeDecodeError as e:
_logger.error(str(e))
continue

codebase += f"\n#### {relative_path}\n\n```{file_extension[1:]}\n{content}\n```\n"
with open(file_path, "r", encoding="utf-8") as f:
content = f.read()
except UnicodeDecodeError:
try:
with open(file_path, "r", encoding="iso-8859-1") as f:
content = f.read()
except (OSError, IOError) as e:
_logger.warning("Unable to read %s: %s. Skipping this file.", file_path, str(e))
continue

codebase += f"\n#### {relative_path}\n\n```{file_extension[1:]}\n{content.rstrip()}\n```\n"

return codebase
72 changes: 55 additions & 17 deletions alloy/filter.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,25 @@
import os


def ignore_patterns(file_path):
ignore_list = []
with open(file_path, "r", encoding="utf-8") as f:
for line in f:
line = line.strip()
if line and not line.startswith("#"): # ignore comments in .alloyignore and DEFAULT_IGNORE_LIST
ignore_list.append(line)
return ignore_list


def read_alloyignore(project_root, extension_filter):
"""
Excludes all files, extensions and directories specified in .alloyignore, located inside the root directory.
"""
alloyignore = os.path.join(project_root, ".alloyignore")
default_ignore_list = DEFAULT_IGNORE_LIST.copy()

if not os.path.exists(alloyignore):
return lambda _: False

ignore_list = []
with open(alloyignore, "r", encoding="utf-8") as f:
for line in f:
line = line.strip()
if line and not line.startswith("#"):
ignore_list.append(line) # ignore comments in .alloyignore
if os.path.exists(alloyignore):
default_ignore_list.extend(ignore_patterns(alloyignore))

# pylint: disable=too-many-return-statements
def exclude_files(file_path):
Expand All @@ -25,20 +29,19 @@ def exclude_files(file_path):
if file_extension[1:] in extension_filter:
return False

for pattern in ignore_list:
for pattern in default_ignore_list:
pattern = pattern.replace(os.sep, "/")
if pattern.startswith("/"): # covers absolute paths from the root
if file_path.startswith(pattern[1:]):
return True
elif pattern.endswith("/"): # ignores certain directories
if any(part == pattern[:-1] for part in file_path.split(os.sep)):
return True
elif pattern.startswith("*."): # ignores certain file extensions
if file_path.endswith(pattern[1:]):
return True
elif pattern.endswith("*"): # ignores certain files with depending on their prefixes
if os.path.basename(file_path).startswith(pattern[:-1]):
if any(part == pattern[:-1] for part in file_path.split("/")):
return True
elif "*" in pattern: # handle wildcard patterns
parts = pattern.split("*")
if len(parts) == 2:
if file_path.startswith(parts[0]) and file_path.endswith(parts[1]):
return True
elif pattern in file_path or pattern == os.path.basename(file_path):
return True
return False
Expand Down Expand Up @@ -66,3 +69,38 @@ def parse_extensions(_csx, _param, value):
if not value:
return None
return [ext.strip() for item in value for ext in item.split(",")]


DEFAULT_IGNORE_LIST = [
".cache",
".coverage",
"dist",
".DS_Store",
".env",
"*.env",
".env.*",
".git",
".github",
".gitignore",
".gitattributes",
".gitmodules",
".idea",
"Thumbs.db",
".venv",
".vscode",
# python
"__pycache__",
"*.pyc",
".pytest_cache",
"py.typed",
".mypy_cache",
".tox",
# node.js
"node_modules",
"package.json",
"package-lock.json",
"npm-debug.log",
# alloy specific files
".alloyignore",
"alloy.md",
]
32 changes: 24 additions & 8 deletions alloy/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,26 +14,42 @@


@click.command()
@click.argument("path", type=click.Path(exists=True))
@click.option("-i", "--input-path", type=click.Path(exists=True), help="Input path for the codebase")
@click.option("-o", "--output-path", type=click.Path(), help="Output path for the generated markdown")
@click.option(
"--filter",
"-f",
"extensions",
callback=parse_extensions,
multiple=True,
help="Filter files by extension via an optional '-f' flag, for instance: -f py,json,yml",
help="OPTIONAL FILTERING BY EXTENSIONS; FOR INSTANCE: -f py,json", # consolidates only .py and .json files
)
def generate_markdown(path, extensions):
def generate_markdown(input_path, output_path, extensions):
project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
current_dir = os.getcwd()

if input_path is None:
input_path = click.prompt("INPUT PATH:", type=click.Path(exists=True), default=current_dir)
else:
input_path = os.path.abspath(os.path.join(current_dir, input_path))

if output_path is None:
output_path = click.prompt("OUTPUT PATH:", type=click.Path(), default=project_root)
else:
output_path = os.path.abspath(os.path.join(current_dir, output_path))

extensions = list(extensions) if extensions else None
markdown_content = consolidate(path, extensions)
project_root = os.path.dirname(os.path.abspath(__file__))
output_file = os.path.join(project_root, "../codebase.md")

markdown_content = consolidate(input_path, extensions)
output_file = os.path.join(output_path, "alloy.md")

os.makedirs(output_path, exist_ok=True)

with open(output_file, "w", encoding="utf-8") as f:
f.write(markdown_content)

_logger.info("Markdown file generated at %s", output_file)
_logger.info("CODEBASE CONSOLIDATED AT %s", output_file)


if __name__ == "__main__":
generate_markdown() # pylint: disable=no-value-for-parameter
generate_markdown.main()

0 comments on commit 0119f5d

Please sign in to comment.