From 396b37e33563b16b7be40c55e2664f813dbda6f2 Mon Sep 17 00:00:00 2001 From: Tom Aldcroft Date: Fri, 25 Aug 2023 16:34:17 -0400 Subject: [PATCH 1/6] Initial commit of module to convert docstrings to numpydoc format --- skare3_tools/scripts/convert_numpydoc.py | 311 +++++++++++++++++++++++ 1 file changed, 311 insertions(+) create mode 100644 skare3_tools/scripts/convert_numpydoc.py diff --git a/skare3_tools/scripts/convert_numpydoc.py b/skare3_tools/scripts/convert_numpydoc.py new file mode 100644 index 0000000..401bab8 --- /dev/null +++ b/skare3_tools/scripts/convert_numpydoc.py @@ -0,0 +1,311 @@ +# Licensed under a 3-clause BSD style license - see LICENSE.rst +"""Convert docstrings from reST to numpydoc format.""" + +import ast +import re +from pathlib import Path + +REST_MARKERS_RETURNS = [":returns:", ":return:", ":rtype:"] +REST_MARKERS_PARAMS = [":param "] + + +def get_function_docstrings(module_file: str) -> dict[dict]: + """ + Get the docstring for each function in the given module file. + + Parameters + ---------- + module_file : str + The path to the module file. + + Returns + ------- + dict + A dictionary of function names and docstring information. + """ + with open(module_file, "r") as f: + module_source = f.read() + + module_ast = ast.parse(module_source) + + function_docstrings = [] + + function_nodes = [] + for node in module_ast.body: + if isinstance(node, ast.FunctionDef): + function_nodes.append(node) + elif isinstance(node, ast.ClassDef): + for method_node in node.body: + if isinstance(method_node, ast.FunctionDef): + function_nodes.append(method_node) + + for node in function_nodes: + function_name = node.name + function_docstring = ast.get_docstring(node, clean=False) + if function_docstring: + function_docstrings.append( + { + "name": function_name, + "text": function_docstring, + "idx_func_start": node.lineno - 1, + "idx_func_stop": node.end_lineno - 1, + } + ) + + return function_docstrings + + +def find_quote_style(lines: list): + """Find the quote style used for a docstring. + + This assumes that the lines are part of a function definition and that + the docstring is the first thing in the function definition. + """ + for line in lines: + for quotes in ['"""', "'''"]: + if quotes in line: + return quotes + + +def get_docstring_blocks(module_file) -> list[dict]: + """Get all the docstrings that look like reST format in the list of lines. + + Returns a list of dict with keys: + - ``idx0``: (int) Index of start of docstring text + - ``idx1``: (int) Index of end of docstring text + - ``lines``: (list) Lines of docstring + + :param lines: list + List of Python code lines. + :returns: list of dict + """ + docstrings_ast = get_function_docstrings(module_file) + lines = Path(module_file).read_text().splitlines() + + docstring_blocks = [] + + # for function_name, docstring_ast in docstrings_ast.items(): + for docstring_ast in docstrings_ast: + # Skip functions without any reST markers in the docstring + if not any( + marker in docstring_ast["text"] + for marker in REST_MARKERS_RETURNS + REST_MARKERS_PARAMS + ): + continue + + idx0 = None + idx1 = None + + # Line range for this function including docstring + idx0_func = docstring_ast["idx_func_start"] + idx1_func = docstring_ast["idx_func_stop"] + + quotes = find_quote_style(lines[idx0_func:idx1_func]) + + for idx in range(idx0_func, idx1_func): + line = lines[idx].strip() + if re.match(f"^{quotes}.*{quotes}$", line): + idx0 = idx + idx1 = idx + 1 + break + if idx0 is None and line.startswith(quotes): + idx0 = idx + elif idx0 is not None and line.endswith(quotes): + if line.strip() != quotes: + raise ValueError( + f"docstring {quotes} must be on separate line\n" + f"line: {line}\n" + f"line number: {idx + 1}\n" + ) + # Don't include final quotes in this processing + idx1 = idx + break + + if idx0 is not None and idx1 is not None: + lines_out = lines[idx0:idx1] + indent = len(lines_out[0]) - len(lines_out[0].lstrip()) + lines_out = [line[indent:] for line in lines_out] + + docstring_block = { + # "function_name": function_name, + "idx0": idx0, + "idx1": idx1, + "indent": indent, + "lines": lines_out, + } + docstring_blocks.append(docstring_block) + + return docstring_blocks + + +def get_first_marker_index(lines, markers): + for idx, line in enumerate(lines): + if any(line.startswith(marker) for marker in markers): + return idx + else: + return len(lines) + + +def get_marker_idxs(lines: list[str], markers_rest: list[str]): + # Get line indexes in lines where a reST marker is found + idxs = [] + markers = [] + for idx, line in enumerate(lines): + for marker in markers_rest: + if line.startswith(marker): + idxs.append(idx) + markers.append(marker) + break + idxs.append(len(lines)) + return idxs, markers + + +def params_to_numpydoc(lines): + if not lines: + return [] + + idxs, _ = get_marker_idxs(lines, [":param "]) + lines_out = [ + "Parameters", + "----------", + ] + + for idx0, idx1 in zip(idxs[:-1], idxs[1:]): + lines_param = lines[idx0:idx1] + line_param = lines_param[0] + match = re.match(r":param \s+ (\w+) \s* : \s* (.*)", line_param, re.VERBOSE) + if match: + name = match.group(1) + desc = match.group(2) + else: + raise ValueError(f"Could not parse line: {line_param}") + + if idx1 - idx0 == 1: + # Single line param, no type(s) given + lines_out.append(name) + lines_out.append(" " + desc.strip()) + else: + # Multiline, so assume the first line is the type(s) + lines_out.append(f"{name} : {desc}") + for line in lines_param[1:]: + lines_out.append(" " + line.strip()) + + return lines_out + + +def returns_to_numpydoc(lines): + if not lines: + return [] + + idxs, markers = get_marker_idxs(lines, REST_MARKERS_RETURNS) + + return_type = None + return_desc_lines = [] + + for idx0, idx1, marker in zip(idxs[:-1], idxs[1:], markers): + if marker == ":rtype:": + return_type = " ".join(lines[idx0:idx1]) + return_type = return_type[len(marker) :].strip() + + elif marker in [":return:", ":returns:"]: + return_desc_lines = [lines[idx0][len(marker) :]] + lines[idx0 + 1 : idx1] + + lines_out = [ + "Returns", + "-------", + ] + + if return_type is None: + # No explicit return type so just use the description. + prefix = "" + else: + lines_out.append(return_type) + prefix = " " + + for line in return_desc_lines: + lines_out.append(prefix + line.strip()) + + return lines_out + + +def convert_lines_to_numpydoc(lines): + """Convert docstring lines to numpydoc format. + + :param lines: list + List of lines of docstring text. + :returns: list + List of lines of docstring text in numpydoc format. + """ + lines_out = None + + idx_any = get_first_marker_index( + lines, [":param ", ":returns:", ":rtype:", ":return:"] + ) + idx_params = get_first_marker_index(lines, [":param "]) + idx_returns = get_first_marker_index(lines, [":returns:", ":rtype:", ":return:"]) + + lines_out = lines[:idx_any] + # Cut lines_out at the end if they are blank + while lines_out[-1].strip() == "": + lines_out = lines_out[:-1] + + lines_params = [line for line in lines[idx_params:idx_returns] if line.strip()] + lines_returns = [line for line in lines[idx_returns:] if line.strip()] + + lines_params_out = params_to_numpydoc(lines_params) + lines_returns_out = returns_to_numpydoc(lines_returns) + + if lines_params_out: + lines_out.append("") + lines_out.extend(lines_params_out) + + if lines_returns_out: + lines_out.append("") + lines_out.extend(lines_returns_out) + + return lines_out, lines_params, lines_returns + + +def indent_lines(lines, indent): + out_lines = [] + for line in lines: + if line: + out_lines.append(indent + line) + else: + out_lines.append(line) + return out_lines + + +def convert_module_to_numpydoc(module_file_in, module_file_out): + """Convert module docstrings to numpydoc format. + + :param module_file: str + Path to module file. + :returns: list + List of lines of docstring text in numpydoc format. + """ + lines = Path(module_file_in).read_text().splitlines() + lines_orig = lines.copy() + + docstring_blocks = get_docstring_blocks(module_file_in) + + for docstring_block in reversed(docstring_blocks): + idx0 = docstring_block["idx0"] + idx1 = docstring_block["idx1"] + lines_out, _, _ = convert_lines_to_numpydoc(docstring_block["lines"]) + lines_out = indent_lines(lines_out, " " * docstring_block["indent"]) + lines = lines[:idx0] + lines_out + lines[idx1:] + + if module_file_in == module_file_out and lines == lines_orig: + # Don't bother rewriting unchanged file + return + + print(f"Writing {module_file_out}") + file_end = "\n" if lines else "" + Path(module_file_out).write_text("\n".join(lines) + file_end) + + +def convert_directory_to_numpydoc(dir_file): + """Walk through a directory and convert all docstrings to numpydoc format.""" + for path in Path(dir_file).glob("**/*.py"): + convert_module_to_numpydoc(path, path) From f5c42f4d87a631a889ff3d69e8ffd2fcdd126f39 Mon Sep 17 00:00:00 2001 From: Tom Aldcroft Date: Fri, 25 Aug 2023 17:37:18 -0400 Subject: [PATCH 2/6] Docstrings, comments, code tidy --- skare3_tools/scripts/convert_numpydoc.py | 213 +++++++++++++++++++---- 1 file changed, 175 insertions(+), 38 deletions(-) diff --git a/skare3_tools/scripts/convert_numpydoc.py b/skare3_tools/scripts/convert_numpydoc.py index 401bab8..a1bef3c 100644 --- a/skare3_tools/scripts/convert_numpydoc.py +++ b/skare3_tools/scripts/convert_numpydoc.py @@ -9,7 +9,7 @@ REST_MARKERS_PARAMS = [":param "] -def get_function_docstrings(module_file: str) -> dict[dict]: +def get_function_docstrings(module_file: str) -> list[dict]: """ Get the docstring for each function in the given module file. @@ -20,8 +20,8 @@ def get_function_docstrings(module_file: str) -> dict[dict]: Returns ------- - dict - A dictionary of function names and docstring information. + list of dict + A list of dicts of function names and docstring information. """ with open(module_file, "r") as f: module_source = f.read() @@ -58,8 +58,21 @@ def get_function_docstrings(module_file: str) -> dict[dict]: def find_quote_style(lines: list): """Find the quote style used for a docstring. - This assumes that the lines are part of a function definition and that - the docstring is the first thing in the function definition. + Parameters + ---------- + lines : list + A list of strings representing the lines of code. + + Returns + ------- + str + The quote style used for the docstring (either triple single or triple double + quotes). + + Notes + ----- + This function assumes that the lines are part of a function definition and that the + docstring is the first thing in the function definition. """ for line in lines: for quotes in ['"""', "'''"]: @@ -67,24 +80,34 @@ def find_quote_style(lines: list): return quotes -def get_docstring_blocks(module_file) -> list[dict]: +def get_docstring_blocks(module_file: str) -> list[dict]: """Get all the docstrings that look like reST format in the list of lines. Returns a list of dict with keys: - ``idx0``: (int) Index of start of docstring text - ``idx1``: (int) Index of end of docstring text + - ``indent``: (int) Number of spaces to indent docstring text - ``lines``: (list) Lines of docstring - :param lines: list - List of Python code lines. - :returns: list of dict + Parameters + ---------- + module_file : str + Path to module file. + + Returns + ------- + list of dict """ + # Use ast to get information about all functions/methods that have a docstring. + # This conventiently gives us the line numbers for the start and end of the + # function definition. We can then use this to find the docstring in the list of + # lines for the module and extract it. docstrings_ast = get_function_docstrings(module_file) + lines = Path(module_file).read_text().splitlines() docstring_blocks = [] - # for function_name, docstring_ast in docstrings_ast.items(): for docstring_ast in docstrings_ast: # Skip functions without any reST markers in the docstring if not any( @@ -105,19 +128,22 @@ def get_docstring_blocks(module_file) -> list[dict]: for idx in range(idx0_func, idx1_func): line = lines[idx].strip() if re.match(f"^{quotes}.*{quotes}$", line): - idx0 = idx - idx1 = idx + 1 + # Single-line docstring, ignore it since it can't have reST markers. + # And we shouldn't be here anyway since we already checked for reST + # markers. break if idx0 is None and line.startswith(quotes): idx0 = idx elif idx0 is not None and line.endswith(quotes): if line.strip() != quotes: + # Docstring with text and final """ on same line caused some trouble + # so just tell the user to fix it by hand. raise ValueError( - f"docstring {quotes} must be on separate line\n" + f"docstring {quotes} must be on separate line (fix by hand)\n" f"line: {line}\n" f"line number: {idx + 1}\n" ) - # Don't include final quotes in this processing + # Don't include final quotes in this processing, it makes things easier. idx1 = idx break @@ -127,7 +153,6 @@ def get_docstring_blocks(module_file) -> list[dict]: lines_out = [line[indent:] for line in lines_out] docstring_block = { - # "function_name": function_name, "idx0": idx0, "idx1": idx1, "indent": indent, @@ -138,7 +163,23 @@ def get_docstring_blocks(module_file) -> list[dict]: return docstring_blocks -def get_first_marker_index(lines, markers): +def get_first_marker_index(lines: list, markers: list): + """ + Get the index of the first line that starts with a given marker. + + Parameters + ---------- + lines : list + A list of strings representing the lines of text to search. + markers : list + A list of strings representing the markers to search for. + + Returns + ------- + int + The index of the first line that starts with one of the given markers. + If no such line is found, returns the length of the ``lines`` list. + """ for idx, line in enumerate(lines): if any(line.startswith(marker) for marker in markers): return idx @@ -147,7 +188,24 @@ def get_first_marker_index(lines, markers): def get_marker_idxs(lines: list[str], markers_rest: list[str]): - # Get line indexes in lines where a reST marker is found + """ + Get the indices of all lines that start with a given marker. + + Parameters + ---------- + lines : list + A list of strings representing the lines of text to search. + markers : list + A list of strings representing the markers to search for. + + Returns + ------- + idxs : list + A list of integers representing the indices of all lines that start with + one of the given markers. If no such lines are found, returns an empty list. + markers : list + A list of strings representing the markers that were found. + """ idxs = [] markers = [] for idx, line in enumerate(lines): @@ -160,7 +218,25 @@ def get_marker_idxs(lines: list[str], markers_rest: list[str]): return idxs, markers -def params_to_numpydoc(lines): +def params_to_numpydoc(lines: list) -> list: + """ + Convert lines of reST parameters to numpydoc format. + + Parameters + ---------- + lines : list + List of lines of reST parameters. + + Returns + ------- + list + List of lines of numpydoc parameters. + + Raises + ------ + ValueError + If the lines cannot be parsed. + """ if not lines: return [] @@ -193,7 +269,20 @@ def params_to_numpydoc(lines): return lines_out -def returns_to_numpydoc(lines): +def returns_to_numpydoc(lines: list) -> list: + """ + Convert lines of reST returns section to numpydoc format. + + Parameters + ---------- + lines : list + List of lines of reST returns. + + Returns + ------- + list + List of lines of numpydoc returns. + """ if not lines: return [] @@ -216,14 +305,18 @@ def returns_to_numpydoc(lines): ] if return_type is None: - # No explicit return type so just use the description. - prefix = "" - else: - lines_out.append(return_type) - prefix = " " + # No explicit return type. + if len(return_desc_lines) == 1: + # Single line return description, so assume it is the return type. + return_type = return_desc_lines[0].strip() + return_desc_lines = [] + else: + # Multiline return description, so use "out" as the thing being returned. + return_type = "out" + lines_out.append(return_type) for line in return_desc_lines: - lines_out.append(prefix + line.strip()) + lines_out.append(" " + line.strip()) return lines_out @@ -231,24 +324,33 @@ def returns_to_numpydoc(lines): def convert_lines_to_numpydoc(lines): """Convert docstring lines to numpydoc format. - :param lines: list + Parameters + ---------- + lines : list List of lines of docstring text. - :returns: list + + Returns + ------- + list List of lines of docstring text in numpydoc format. """ lines_out = None idx_any = get_first_marker_index( - lines, [":param ", ":returns:", ":rtype:", ":return:"] + lines, REST_MARKERS_RETURNS + REST_MARKERS_PARAMS ) - idx_params = get_first_marker_index(lines, [":param "]) - idx_returns = get_first_marker_index(lines, [":returns:", ":rtype:", ":return:"]) + idx_params = get_first_marker_index(lines, REST_MARKERS_PARAMS) + idx_returns = get_first_marker_index(lines, REST_MARKERS_RETURNS) + # Start out with the original lines up to the first marker (i.e. the start of + # existing parameters or returns sections). lines_out = lines[:idx_any] + # Cut lines_out at the end if they are blank while lines_out[-1].strip() == "": lines_out = lines_out[:-1] + # This assumes that params are before returns. We always adhere to this convention. lines_params = [line for line in lines[idx_params:idx_returns] if line.strip()] lines_returns = [line for line in lines[idx_returns:] if line.strip()] @@ -263,10 +365,24 @@ def convert_lines_to_numpydoc(lines): lines_out.append("") lines_out.extend(lines_returns_out) - return lines_out, lines_params, lines_returns + return lines_out + +def indent_lines(lines: list, indent: str) -> list: + """Indent lines of text. -def indent_lines(lines, indent): + Parameters + ---------- + lines : list + List of lines of text. + indent : str + String to use for indentation. + + Returns + ------- + list + List of lines of text with indentation added. + """ out_lines = [] for line in lines: if line: @@ -276,23 +392,35 @@ def indent_lines(lines, indent): return out_lines -def convert_module_to_numpydoc(module_file_in, module_file_out): +def convert_module_to_numpydoc(module_file_in, module_file_out=None): """Convert module docstrings to numpydoc format. - :param module_file: str + Parameters + ---------- + module_file_in : str Path to module file. - :returns: list - List of lines of docstring text in numpydoc format. + module_file_out : str + Path to output module file. If None, overwrite module_file_in. + + Returns + ------- + list + List of lines of docstring text in numpydoc format. """ + if module_file_out is None: + module_file_out = module_file_in + lines = Path(module_file_in).read_text().splitlines() lines_orig = lines.copy() docstring_blocks = get_docstring_blocks(module_file_in) + # Go through existing docstrings in reverse order so that we can modify the lines + # list in-place without messing up the line numbers. for docstring_block in reversed(docstring_blocks): idx0 = docstring_block["idx0"] idx1 = docstring_block["idx1"] - lines_out, _, _ = convert_lines_to_numpydoc(docstring_block["lines"]) + lines_out = convert_lines_to_numpydoc(docstring_block["lines"]) lines_out = indent_lines(lines_out, " " * docstring_block["indent"]) lines = lines[:idx0] + lines_out + lines[idx1:] @@ -306,6 +434,15 @@ def convert_module_to_numpydoc(module_file_in, module_file_out): def convert_directory_to_numpydoc(dir_file): - """Walk through a directory and convert all docstrings to numpydoc format.""" + """Walk through a directory and convert all docstrings to numpydoc format. + + This function will overwrite the original files so be sure they are in version + control or backed up. + + Parameters + ---------- + dir_file : str + Path to directory. + """ for path in Path(dir_file).glob("**/*.py"): convert_module_to_numpydoc(path, path) From c77d78fbb48324b2ce92525b9e4b689d67a7257e Mon Sep 17 00:00:00 2001 From: Tom Aldcroft Date: Thu, 14 Sep 2023 05:48:53 -0400 Subject: [PATCH 3/6] Fix flake8 and ruff --- .github/workflows/flake8.yml | 4 ++-- skare3_tools/github/scripts/add_secrets.py | 2 +- skare3_tools/github/scripts/merge_pr.py | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/flake8.yml b/.github/workflows/flake8.yml index 169e0e2..9f2bcb4 100644 --- a/.github/workflows/flake8.yml +++ b/.github/workflows/flake8.yml @@ -9,10 +9,10 @@ jobs: steps: - uses: actions/checkout@v2 - - name: Set up Python 3.8 + - name: Set up Python 3.10 uses: actions/setup-python@v1 with: - python-version: 3.8 + python-version: 3.10 - name: Lint with flake8 run: | pip install flake8 diff --git a/skare3_tools/github/scripts/add_secrets.py b/skare3_tools/github/scripts/add_secrets.py index ecb6d05..ebfdcc4 100755 --- a/skare3_tools/github/scripts/add_secrets.py +++ b/skare3_tools/github/scripts/add_secrets.py @@ -153,7 +153,7 @@ def add_secrets(repository, secrets): _driver_.find_element_by_id("secret_name").send_keys(secret) value = secrets[secret] - if type(value) is dict: + if isinstance(value, dict): value = json.dumps(value) _driver_.find_element_by_id("secret_value").send_keys(value) diff --git a/skare3_tools/github/scripts/merge_pr.py b/skare3_tools/github/scripts/merge_pr.py index 1d7f1c5..8ef5683 100755 --- a/skare3_tools/github/scripts/merge_pr.py +++ b/skare3_tools/github/scripts/merge_pr.py @@ -46,7 +46,7 @@ def main(): kwargs["state"] = "open" prs = repository.pull_requests(**kwargs) - if type(prs) is dict and not prs["response"]["ok"]: + if isinstance(prs, dict) and not prs["response"]["ok"]: print(f'Failed getting requested PR: {prs["response"]["reason"]}') sys.exit(1) @@ -57,7 +57,7 @@ def main(): # sanity checks sha = prs[0]["head"]["sha"] if args.sha and sha != args.sha: - print(f"Requested sha does not match that of the PR") + print("Requested sha does not match that of the PR") sys.exit(1) # do the merge From ac0296bb2535deebee7905507d8516547acdf358 Mon Sep 17 00:00:00 2001 From: Tom Aldcroft Date: Thu, 14 Sep 2023 05:58:12 -0400 Subject: [PATCH 4/6] Remove redundant CI check on pull_request --- .github/workflows/black.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/black.yml b/.github/workflows/black.yml index b04fb15..baa98cb 100644 --- a/.github/workflows/black.yml +++ b/.github/workflows/black.yml @@ -1,6 +1,6 @@ name: Lint -on: [push, pull_request] +on: [push] jobs: lint: From 0d001236c555e276929a1eafa14ce5443d3f5d3c Mon Sep 17 00:00:00 2001 From: Tom Aldcroft Date: Thu, 14 Sep 2023 05:58:40 -0400 Subject: [PATCH 5/6] Fix formatting for black --- skare3_tools/scripts/convert_numpydoc.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/skare3_tools/scripts/convert_numpydoc.py b/skare3_tools/scripts/convert_numpydoc.py index a1bef3c..e2257e9 100644 --- a/skare3_tools/scripts/convert_numpydoc.py +++ b/skare3_tools/scripts/convert_numpydoc.py @@ -336,9 +336,7 @@ def convert_lines_to_numpydoc(lines): """ lines_out = None - idx_any = get_first_marker_index( - lines, REST_MARKERS_RETURNS + REST_MARKERS_PARAMS - ) + idx_any = get_first_marker_index(lines, REST_MARKERS_RETURNS + REST_MARKERS_PARAMS) idx_params = get_first_marker_index(lines, REST_MARKERS_PARAMS) idx_returns = get_first_marker_index(lines, REST_MARKERS_RETURNS) From aab8a603497862da9f9ef3d954f695c4c38c940d Mon Sep 17 00:00:00 2001 From: Tom Aldcroft Date: Thu, 14 Sep 2023 05:59:57 -0400 Subject: [PATCH 6/6] Fix flake8 version again --- .github/workflows/flake8.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/flake8.yml b/.github/workflows/flake8.yml index 9f2bcb4..2dd5c11 100644 --- a/.github/workflows/flake8.yml +++ b/.github/workflows/flake8.yml @@ -12,7 +12,7 @@ jobs: - name: Set up Python 3.10 uses: actions/setup-python@v1 with: - python-version: 3.10 + python-version: "3.10" - name: Lint with flake8 run: | pip install flake8