From 10b89bebae6b8bf0faeac61bd0dbd02bd7416df8 Mon Sep 17 00:00:00 2001 From: Benjamin Woodruff Date: Sat, 23 Jan 2021 18:16:09 -0800 Subject: [PATCH] [native] Add a rust implementation of whitespace_parser This adds an experimental new native extension, with the goal of improving performance, currently just reimplementing `whitespace_parser`. This appears to make parsing marginally faster overall, but the performance gains are pretty limited because whitespace parsing is a small portion of the overall parsing process, and there's some added overhead of converting types (mostly unicode strings) between rust and python. Instead, this helps show what's needed to port part of the codebase over to rust in an example that's small enough to be digestible, but big enough to be more than a trivial toy. I originally started by trying to port the tokenizer to rust, since it takes a larger portion of time and it's infrequently modified (making it a good candidate), but found issues with the implementation direction I was taking, so I scrapped that for now. --- .editorconfig | 6 +- .gitignore | 1 + libcst/_parser/py_whitespace_parser.py | 261 ++++++ libcst/_parser/tests/test_detect_config.py | 11 +- .../_parser/tests/test_whitespace_parser.py | 11 +- libcst/_parser/types/config.py | 40 +- libcst/_parser/types/py_config.py | 54 ++ libcst/_parser/types/py_whitespace_state.py | 36 + libcst/_parser/types/whitespace_state.py | 35 +- libcst/_parser/whitespace_parser.py | 274 +------ libcst_native/Cargo.lock | 312 ++++++++ libcst_native/Cargo.toml | 31 + libcst_native/README.md | 66 ++ libcst_native/src/lib.rs | 36 + libcst_native/src/macros.rs | 33 + libcst_native/src/parser_config.rs | 137 ++++ libcst_native/src/py_cached.rs | 76 ++ libcst_native/src/test_utils.rs | 42 + libcst_native/src/whitespace_parser.rs | 745 ++++++++++++++++++ libcst_native/src/whitespace_state.rs | 80 ++ requirements-dev.txt | 1 + setup.py | 1 + stubs/libcst_native/parser_config.pyi | 45 ++ stubs/libcst_native/whitespace_parser.pyi | 29 + stubs/libcst_native/whitespace_state.pyi | 15 + 25 files changed, 2050 insertions(+), 328 deletions(-) create mode 100644 libcst/_parser/py_whitespace_parser.py create mode 100644 libcst/_parser/types/py_config.py create mode 100644 libcst/_parser/types/py_whitespace_state.py create mode 100644 libcst_native/Cargo.lock create mode 100644 libcst_native/Cargo.toml create mode 100644 libcst_native/README.md create mode 100644 libcst_native/src/lib.rs create mode 100644 libcst_native/src/macros.rs create mode 100644 libcst_native/src/parser_config.rs create mode 100644 libcst_native/src/py_cached.rs create mode 100644 libcst_native/src/test_utils.rs create mode 100644 libcst_native/src/whitespace_parser.rs create mode 100644 libcst_native/src/whitespace_state.rs create mode 100644 stubs/libcst_native/parser_config.pyi create mode 100644 stubs/libcst_native/whitespace_parser.pyi create mode 100644 stubs/libcst_native/whitespace_state.pyi diff --git a/.editorconfig b/.editorconfig index 0824f6693..c4f3c65d8 100644 --- a/.editorconfig +++ b/.editorconfig @@ -1,6 +1,6 @@ root = true -[*.{py,pyi,toml,md}] +[*.{py,pyi,rs,toml,md}] charset = "utf-8" end_of_line = lf indent_size = 4 @@ -8,3 +8,7 @@ indent_style = space insert_final_newline = true trim_trailing_whitespace = true max_line_length = 88 + +[*.rs] +# https://github.com/rust-dev-tools/fmt-rfcs/blob/master/guide/guide.md +max_line_length = 100 diff --git a/.gitignore b/.gitignore index 85fb5573a..e302b1db3 100644 --- a/.gitignore +++ b/.gitignore @@ -13,3 +13,4 @@ build/ .coverage .hypothesis/ .pyre_configuration +libcst_native/target/ diff --git a/libcst/_parser/py_whitespace_parser.py b/libcst/_parser/py_whitespace_parser.py new file mode 100644 index 000000000..d1421f1da --- /dev/null +++ b/libcst/_parser/py_whitespace_parser.py @@ -0,0 +1,261 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from typing import List, Optional, Sequence, Tuple, Union + +from libcst._nodes.whitespace import ( + COMMENT_RE, + NEWLINE_RE, + SIMPLE_WHITESPACE_RE, + Comment, + EmptyLine, + Newline, + ParenthesizedWhitespace, + SimpleWhitespace, + TrailingWhitespace, +) +from libcst._parser.types.config import BaseWhitespaceParserConfig +from libcst._parser.types.whitespace_state import WhitespaceState as State + + +# BEGIN PARSER ENTRYPOINTS + + +def parse_simple_whitespace( + config: BaseWhitespaceParserConfig, state: State +) -> SimpleWhitespace: + # The match never fails because the pattern can match an empty string + lines = config.lines + # pyre-fixme[16]: Optional type has no attribute `group`. + ws_line = SIMPLE_WHITESPACE_RE.match(lines[state.line - 1], state.column).group(0) + ws_line_list = [ws_line] + while "\\" in ws_line: + # continuation character + state.line += 1 + state.column = 0 + ws_line = SIMPLE_WHITESPACE_RE.match(lines[state.line - 1], state.column).group( + 0 + ) + ws_line_list.append(ws_line) + + # TODO: we could special-case the common case where there's no continuation + # character to avoid list construction and joining. + + # once we've finished collecting continuation characters + state.column += len(ws_line) + return SimpleWhitespace("".join(ws_line_list)) + + +def parse_empty_lines( + config: BaseWhitespaceParserConfig, + state: State, + *, + override_absolute_indent: Optional[str] = None, +) -> Sequence[EmptyLine]: + # If override_absolute_indent is true, then we need to parse all lines up + # to and including the last line that is indented at our level. These all + # belong to the footer and not to the next line's leading_lines. All lines + # that have indent=False and come after the last line where indent=True + # do not belong to this node. + state_for_line = State( + state.line, state.column, state.absolute_indent, state.is_parenthesized + ) + lines: List[Tuple[State, EmptyLine]] = [] + while True: + el = _parse_empty_line( + config, state_for_line, override_absolute_indent=override_absolute_indent + ) + if el is None: + break + + # Store the updated state with the element we parsed. Then make a new state + # clone for the next element. + lines.append((state_for_line, el)) + state_for_line = State( + state_for_line.line, + state_for_line.column, + state.absolute_indent, + state.is_parenthesized, + ) + + if override_absolute_indent is not None: + # We need to find the last element that is indented, and then split the list + # at that point. + for i in range(len(lines) - 1, -1, -1): + if lines[i][1].indent: + lines = lines[: (i + 1)] + break + else: + # We didn't find any lines, throw them all away + lines = [] + + if lines: + # Update the state line and column to match the last line actually parsed. + final_state: State = lines[-1][0] + state.line = final_state.line + state.column = final_state.column + return [r[1] for r in lines] + + +def parse_trailing_whitespace( + config: BaseWhitespaceParserConfig, state: State +) -> TrailingWhitespace: + trailing_whitespace = _parse_trailing_whitespace(config, state) + if trailing_whitespace is None: + raise Exception( + "Internal Error: Failed to parse TrailingWhitespace. This should never " + + "happen because a TrailingWhitespace is never optional in the grammar, " + + "so this error should've been caught by parso first." + ) + return trailing_whitespace + + +def parse_parenthesizable_whitespace( + config: BaseWhitespaceParserConfig, state: State +) -> Union[SimpleWhitespace, ParenthesizedWhitespace]: + if state.is_parenthesized: + # First, try parenthesized (don't need speculation because it either + # parses or doesn't modify state). + parenthesized_whitespace = _parse_parenthesized_whitespace(config, state) + if parenthesized_whitespace is not None: + return parenthesized_whitespace + # Now, just parse and return a simple whitespace + return parse_simple_whitespace(config, state) + + +# END PARSER ENTRYPOINTS +# BEGIN PARSER INTERNAL PRODUCTIONS + + +def _parse_empty_line( + config: BaseWhitespaceParserConfig, + state: State, + *, + override_absolute_indent: Optional[str] = None, +) -> Optional[EmptyLine]: + # begin speculative parsing + speculative_state = State( + state.line, state.column, state.absolute_indent, state.is_parenthesized + ) + try: + indent = _parse_indent( + config, speculative_state, override_absolute_indent=override_absolute_indent + ) + except Exception: + # We aren't on a new line, speculative parsing failed + return None + whitespace = parse_simple_whitespace(config, speculative_state) + comment = _parse_comment(config, speculative_state) + newline = _parse_newline(config, speculative_state) + if newline is None: + # speculative parsing failed + return None + # speculative parsing succeeded + state.line = speculative_state.line + state.column = speculative_state.column + # don't need to copy absolute_indent/is_parenthesized because they don't change. + return EmptyLine(indent, whitespace, comment, newline) + + +def _parse_indent( + config: BaseWhitespaceParserConfig, + state: State, + *, + override_absolute_indent: Optional[str] = None, +) -> bool: + """ + Returns True if indentation was found, otherwise False. + """ + absolute_indent = ( + override_absolute_indent + if override_absolute_indent is not None + else state.absolute_indent + ) + line_str = config.lines[state.line - 1] + if state.column != 0: + if state.column == len(line_str) and state.line == len(config.lines): + # We're at EOF, treat this as a failed speculative parse + return False + raise Exception("Internal Error: Column should be 0 when parsing an indent.") + if line_str.startswith(absolute_indent, state.column): + state.column += len(absolute_indent) + return True + return False + + +def _parse_comment( + config: BaseWhitespaceParserConfig, state: State +) -> Optional[Comment]: + comment_match = COMMENT_RE.match(config.lines[state.line - 1], state.column) + if comment_match is None: + return None + comment = comment_match.group(0) + state.column += len(comment) + return Comment(comment) + + +def _parse_newline( + config: BaseWhitespaceParserConfig, state: State +) -> Optional[Newline]: + # begin speculative parsing + line_str = config.lines[state.line - 1] + newline_match = NEWLINE_RE.match(line_str, state.column) + if newline_match is not None: + # speculative parsing succeeded + newline_str = newline_match.group(0) + state.column += len(newline_str) + if state.column != len(line_str): + raise Exception("Internal Error: Found a newline, but it wasn't the EOL.") + if state.line < len(config.lines): + # this newline was the end of a line, and there's another line, + # therefore we should move to the next line + state.line += 1 + state.column = 0 + if newline_str == config.default_newline: + # Just inherit it from the Module instead of explicitly setting it. + return Newline() + else: + return Newline(newline_str) + else: # no newline was found, speculative parsing failed + return None + + +def _parse_trailing_whitespace( + config: BaseWhitespaceParserConfig, state: State +) -> Optional[TrailingWhitespace]: + # Begin speculative parsing + speculative_state = State( + state.line, state.column, state.absolute_indent, state.is_parenthesized + ) + whitespace = parse_simple_whitespace(config, speculative_state) + comment = _parse_comment(config, speculative_state) + newline = _parse_newline(config, speculative_state) + if newline is None: + # Speculative parsing failed + return None + # Speculative parsing succeeded + state.line = speculative_state.line + state.column = speculative_state.column + # don't need to copy absolute_indent/is_parenthesized because they don't change. + return TrailingWhitespace(whitespace, comment, newline) + + +def _parse_parenthesized_whitespace( + config: BaseWhitespaceParserConfig, state: State +) -> Optional[ParenthesizedWhitespace]: + first_line = _parse_trailing_whitespace(config, state) + if first_line is None: + # Speculative parsing failed + return None + empty_lines = () + while True: + empty_line = _parse_empty_line(config, state) + if empty_line is None: + # This isn't an empty line, so parse it below + break + empty_lines = empty_lines + (empty_line,) + indent = _parse_indent(config, state) + last_line = parse_simple_whitespace(config, state) + return ParenthesizedWhitespace(first_line, empty_lines, indent, last_line) diff --git a/libcst/_parser/tests/test_detect_config.py b/libcst/_parser/tests/test_detect_config.py index b17c9fe58..fdda965b8 100644 --- a/libcst/_parser/tests/test_detect_config.py +++ b/libcst/_parser/tests/test_detect_config.py @@ -3,12 +3,15 @@ # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. -import dataclasses from typing import Union from libcst._parser.detect_config import detect_config from libcst._parser.parso.utils import PythonVersionInfo -from libcst._parser.types.config import ParserConfig, PartialParserConfig +from libcst._parser.types.config import ( + ParserConfig, + PartialParserConfig, + parser_config_asdict, +) from libcst.testing.utils import UnitTest, data_provider @@ -316,7 +319,7 @@ def test_detect_module_config( expected_config: ParserConfig, ) -> None: self.assertEqual( - dataclasses.asdict( + parser_config_asdict( detect_config( source, partial=partial, @@ -324,5 +327,5 @@ def test_detect_module_config( detect_default_newline=detect_default_newline, ).config ), - dataclasses.asdict(expected_config), + parser_config_asdict(expected_config), ) diff --git a/libcst/_parser/tests/test_whitespace_parser.py b/libcst/_parser/tests/test_whitespace_parser.py index dcbafa7e0..f5bd338b1 100644 --- a/libcst/_parser/tests/test_whitespace_parser.py +++ b/libcst/_parser/tests/test_whitespace_parser.py @@ -3,12 +3,11 @@ # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. -from dataclasses import dataclass -from typing import Callable, Sequence, TypeVar +from typing import Callable, TypeVar import libcst as cst from libcst._nodes.deep_equals import deep_equals -from libcst._parser.types.config import BaseWhitespaceParserConfig +from libcst._parser.types.config import MockWhitespaceParserConfig as Config from libcst._parser.types.whitespace_state import WhitespaceState as State from libcst._parser.whitespace_parser import ( parse_empty_lines, @@ -21,12 +20,6 @@ _T = TypeVar("_T") -@dataclass(frozen=True) -class Config(BaseWhitespaceParserConfig): - lines: Sequence[str] - default_newline: str - - class WhitespaceParserTest(UnitTest): @data_provider( { diff --git a/libcst/_parser/types/config.py b/libcst/_parser/types/config.py index 7c76e4c71..2d6371292 100644 --- a/libcst/_parser/types/config.py +++ b/libcst/_parser/types/config.py @@ -3,14 +3,12 @@ # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. - -import abc import codecs import re import sys from dataclasses import dataclass, field, fields from enum import Enum -from typing import FrozenSet, List, Optional, Pattern, Sequence, Union +from typing import Any, Callable, FrozenSet, List, Mapping, Optional, Pattern, Union from libcst._add_slots import add_slots from libcst._nodes.whitespace import NEWLINE_RE @@ -19,33 +17,21 @@ _INDENT_RE: Pattern[str] = re.compile(r"[ \t]+") +try: + from libcst_native import parser_config as config_mod -class BaseWhitespaceParserConfig(abc.ABC): - """ - Represents the subset of ParserConfig that the whitespace parser requires. This - makes calling the whitespace parser in tests with a mocked configuration easier. - """ - - lines: Sequence[str] - default_newline: str - + MockWhitespaceParserConfig = config_mod.BaseWhitespaceParserConfig +except ImportError: + from libcst._parser.types import py_config as config_mod -@add_slots # We'll access these properties frequently, so use slots -@dataclass(frozen=True) -class ParserConfig(BaseWhitespaceParserConfig): - """ - An internal configuration object that the python parser passes around. These values - are global to the parsed code and should not change during the lifetime of the - parser object. - """ + # pyre-fixme[9]: This is a small implementation difference between native and python + MockWhitespaceParserConfig = config_mod.MockWhitespaceParserConfig - lines: Sequence[str] - encoding: str - default_indent: str - default_newline: str - has_trailing_newline: bool - version: PythonVersionInfo - future_imports: FrozenSet[str] +BaseWhitespaceParserConfig = config_mod.BaseWhitespaceParserConfig +ParserConfig = config_mod.ParserConfig +parser_config_asdict: Callable[ + [ParserConfig], Mapping[str, Any] +] = config_mod.parser_config_asdict class AutoConfig(Enum): diff --git a/libcst/_parser/types/py_config.py b/libcst/_parser/types/py_config.py new file mode 100644 index 000000000..6722a9eae --- /dev/null +++ b/libcst/_parser/types/py_config.py @@ -0,0 +1,54 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import abc +from dataclasses import asdict, dataclass +from typing import Any, FrozenSet, Mapping, Sequence + +from libcst._parser.parso.utils import PythonVersionInfo + + +class BaseWhitespaceParserConfig(abc.ABC): + """ + Represents the subset of ParserConfig that the whitespace parser requires. This + makes calling the whitespace parser in tests with a mocked configuration easier. + """ + + lines: Sequence[str] + default_newline: str + + +@dataclass(frozen=True) +class MockWhitespaceParserConfig(BaseWhitespaceParserConfig): + """ + An internal type used by unit tests. + """ + + lines: Sequence[str] + default_newline: str + + +@dataclass(frozen=True) +class ParserConfig(BaseWhitespaceParserConfig): + """ + An internal configuration object that the python parser passes around. These + values are global to the parsed code and should not change during the lifetime + of the parser object. + """ + + lines: Sequence[str] + encoding: str + default_indent: str + default_newline: str + has_trailing_newline: bool + version: PythonVersionInfo + future_imports: FrozenSet[str] + + +def parser_config_asdict(config: ParserConfig) -> Mapping[str, Any]: + """ + An internal helper function used by unit tests to compare configs. + """ + return asdict(config) diff --git a/libcst/_parser/types/py_whitespace_state.py b/libcst/_parser/types/py_whitespace_state.py new file mode 100644 index 000000000..41244b98a --- /dev/null +++ b/libcst/_parser/types/py_whitespace_state.py @@ -0,0 +1,36 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from dataclasses import dataclass + +from libcst._add_slots import add_slots + + +@add_slots +@dataclass(frozen=False) +class WhitespaceState: + """ + A frequently mutated store of the whitespace parser's current state. This object + must be cloned prior to speculative parsing. + + This is in contrast to the `config` object each whitespace parser function takes, + which is frozen and never mutated. + + Whitespace parsing works by mutating this state object. By encapsulating saving, and + re-using state objects inside the top-level python parser, the whitespace parser is + able to be reentrant. One 'convert' function can consume part of the whitespace, and + another 'convert' function can consume the rest, depending on who owns what + whitespace. + + This is similar to the approach you might take to parse nested languages (e.g. + JavaScript inside of HTML). We're treating whitespace as a separate language and + grammar from the rest of Python's grammar. + """ + + line: int # one-indexed (to match parso's behavior) + column: int # zero-indexed (to match parso's behavior) + # What to look for when executing `_parse_indent`. + absolute_indent: str + is_parenthesized: bool diff --git a/libcst/_parser/types/whitespace_state.py b/libcst/_parser/types/whitespace_state.py index b5554a2bc..a9798054c 100644 --- a/libcst/_parser/types/whitespace_state.py +++ b/libcst/_parser/types/whitespace_state.py @@ -7,34 +7,9 @@ Defines the state object used by the whitespace parser. """ -from dataclasses import dataclass +try: + from libcst_native import whitespace_state as mod +except ImportError: + from libcst._parser.types import py_whitespace_state as mod -from libcst._add_slots import add_slots - - -@add_slots -@dataclass(frozen=False) -class WhitespaceState: - """ - A frequently mutated store of the whitespace parser's current state. This object - must be cloned prior to speculative parsing. - - This is in contrast to the `config` object each whitespace parser function takes, - which is frozen and never mutated. - - Whitespace parsing works by mutating this state object. By encapsulating saving, and - re-using state objects inside the top-level python parser, the whitespace parser is - able to be reentrant. One 'convert' function can consume part of the whitespace, and - another 'convert' function can consume the rest, depending on who owns what - whitespace. - - This is similar to the approach you might take to parse nested languages (e.g. - JavaScript inside of HTML). We're treating whitespace as a separate language and - grammar from the rest of Python's grammar. - """ - - line: int # one-indexed (to match parso's behavior) - column: int # zero-indexed (to match parso's behavior) - # What to look for when executing `_parse_indent`. - absolute_indent: str - is_parenthesized: bool +WhitespaceState = mod.WhitespaceState diff --git a/libcst/_parser/whitespace_parser.py b/libcst/_parser/whitespace_parser.py index b9df6c7e1..9ffb6a7dc 100644 --- a/libcst/_parser/whitespace_parser.py +++ b/libcst/_parser/whitespace_parser.py @@ -5,7 +5,7 @@ """ Parso doesn't attempt to parse (or even emit tokens for) whitespace or comments that -isn't syntatically important. Instead, we're just given the whitespace as a "prefix" of +aren't syntatically important. Instead, we're just given the whitespace as a "prefix" of the token. However, in our CST, whitespace is gathered into far more detailed objects than a simple @@ -15,259 +15,19 @@ hand-rolled recursive descent parser. """ -from typing import List, Optional, Sequence, Tuple, Union - -from libcst._nodes.whitespace import ( - COMMENT_RE, - NEWLINE_RE, - SIMPLE_WHITESPACE_RE, - Comment, - EmptyLine, - Newline, - ParenthesizedWhitespace, - SimpleWhitespace, - TrailingWhitespace, -) -from libcst._parser.types.config import BaseWhitespaceParserConfig -from libcst._parser.types.whitespace_state import WhitespaceState as State - - -# BEGIN PARSER ENTRYPOINTS - - -def parse_simple_whitespace( - config: BaseWhitespaceParserConfig, state: State -) -> SimpleWhitespace: - # The match never fails because the pattern can match an empty string - lines = config.lines - # pyre-fixme[16]: Optional type has no attribute `group`. - ws_line = SIMPLE_WHITESPACE_RE.match(lines[state.line - 1], state.column).group(0) - ws_line_list = [ws_line] - while "\\" in ws_line: - # continuation character - state.line += 1 - state.column = 0 - ws_line = SIMPLE_WHITESPACE_RE.match(lines[state.line - 1], state.column).group( - 0 - ) - ws_line_list.append(ws_line) - - # TODO: we could special-case the common case where there's no continuation - # character to avoid list construction and joining. - - # once we've finished collecting continuation characters - state.column += len(ws_line) - return SimpleWhitespace("".join(ws_line_list)) - - -def parse_empty_lines( - config: BaseWhitespaceParserConfig, - state: State, - *, - override_absolute_indent: Optional[str] = None, -) -> Sequence[EmptyLine]: - # If override_absolute_indent is true, then we need to parse all lines up - # to and including the last line that is indented at our level. These all - # belong to the footer and not to the next line's leading_lines. All lines - # that have indent=False and come after the last line where indent=True - # do not belong to this node. - state_for_line = State( - state.line, state.column, state.absolute_indent, state.is_parenthesized - ) - lines: List[Tuple[State, EmptyLine]] = [] - while True: - el = _parse_empty_line( - config, state_for_line, override_absolute_indent=override_absolute_indent - ) - if el is None: - break - - # Store the updated state with the element we parsed. Then make a new state - # clone for the next element. - lines.append((state_for_line, el)) - state_for_line = State( - state_for_line.line, - state_for_line.column, - state.absolute_indent, - state.is_parenthesized, - ) - - if override_absolute_indent is not None: - # We need to find the last element that is indented, and then split the list - # at that point. - for i in range(len(lines) - 1, -1, -1): - if lines[i][1].indent: - lines = lines[: (i + 1)] - break - else: - # We didn't find any lines, throw them all away - lines = [] - - if lines: - # Update the state line and column to match the last line actually parsed. - final_state: State = lines[-1][0] - state.line = final_state.line - state.column = final_state.column - return [r[1] for r in lines] - - -def parse_trailing_whitespace( - config: BaseWhitespaceParserConfig, state: State -) -> TrailingWhitespace: - trailing_whitespace = _parse_trailing_whitespace(config, state) - if trailing_whitespace is None: - raise Exception( - "Internal Error: Failed to parse TrailingWhitespace. This should never " - + "happen because a TrailingWhitespace is never optional in the grammar, " - + "so this error should've been caught by parso first." - ) - return trailing_whitespace - - -def parse_parenthesizable_whitespace( - config: BaseWhitespaceParserConfig, state: State -) -> Union[SimpleWhitespace, ParenthesizedWhitespace]: - if state.is_parenthesized: - # First, try parenthesized (don't need speculation because it either - # parses or doesn't modify state). - parenthesized_whitespace = _parse_parenthesized_whitespace(config, state) - if parenthesized_whitespace is not None: - return parenthesized_whitespace - # Now, just parse and return a simple whitespace - return parse_simple_whitespace(config, state) - - -# END PARSER ENTRYPOINTS -# BEGIN PARSER INTERNAL PRODUCTIONS - - -def _parse_empty_line( - config: BaseWhitespaceParserConfig, - state: State, - *, - override_absolute_indent: Optional[str] = None, -) -> Optional[EmptyLine]: - # begin speculative parsing - speculative_state = State( - state.line, state.column, state.absolute_indent, state.is_parenthesized - ) - try: - indent = _parse_indent( - config, speculative_state, override_absolute_indent=override_absolute_indent - ) - except Exception: - # We aren't on a new line, speculative parsing failed - return None - whitespace = parse_simple_whitespace(config, speculative_state) - comment = _parse_comment(config, speculative_state) - newline = _parse_newline(config, speculative_state) - if newline is None: - # speculative parsing failed - return None - # speculative parsing succeeded - state.line = speculative_state.line - state.column = speculative_state.column - # don't need to copy absolute_indent/is_parenthesized because they don't change. - return EmptyLine(indent, whitespace, comment, newline) - - -def _parse_indent( - config: BaseWhitespaceParserConfig, - state: State, - *, - override_absolute_indent: Optional[str] = None, -) -> bool: - """ - Returns True if indentation was found, otherwise False. - """ - absolute_indent = ( - override_absolute_indent - if override_absolute_indent is not None - else state.absolute_indent - ) - line_str = config.lines[state.line - 1] - if state.column != 0: - if state.column == len(line_str) and state.line == len(config.lines): - # We're at EOF, treat this as a failed speculative parse - return False - raise Exception("Internal Error: Column should be 0 when parsing an indent.") - if line_str.startswith(absolute_indent, state.column): - state.column += len(absolute_indent) - return True - return False - - -def _parse_comment( - config: BaseWhitespaceParserConfig, state: State -) -> Optional[Comment]: - comment_match = COMMENT_RE.match(config.lines[state.line - 1], state.column) - if comment_match is None: - return None - comment = comment_match.group(0) - state.column += len(comment) - return Comment(comment) - - -def _parse_newline( - config: BaseWhitespaceParserConfig, state: State -) -> Optional[Newline]: - # begin speculative parsing - line_str = config.lines[state.line - 1] - newline_match = NEWLINE_RE.match(line_str, state.column) - if newline_match is not None: - # speculative parsing succeeded - newline_str = newline_match.group(0) - state.column += len(newline_str) - if state.column != len(line_str): - raise Exception("Internal Error: Found a newline, but it wasn't the EOL.") - if state.line < len(config.lines): - # this newline was the end of a line, and there's another line, - # therefore we should move to the next line - state.line += 1 - state.column = 0 - if newline_str == config.default_newline: - # Just inherit it from the Module instead of explicitly setting it. - return Newline() - else: - return Newline(newline_str) - else: # no newline was found, speculative parsing failed - return None - - -def _parse_trailing_whitespace( - config: BaseWhitespaceParserConfig, state: State -) -> Optional[TrailingWhitespace]: - # Begin speculative parsing - speculative_state = State( - state.line, state.column, state.absolute_indent, state.is_parenthesized - ) - whitespace = parse_simple_whitespace(config, speculative_state) - comment = _parse_comment(config, speculative_state) - newline = _parse_newline(config, speculative_state) - if newline is None: - # Speculative parsing failed - return None - # Speculative parsing succeeded - state.line = speculative_state.line - state.column = speculative_state.column - # don't need to copy absolute_indent/is_parenthesized because they don't change. - return TrailingWhitespace(whitespace, comment, newline) - - -def _parse_parenthesized_whitespace( - config: BaseWhitespaceParserConfig, state: State -) -> Optional[ParenthesizedWhitespace]: - first_line = _parse_trailing_whitespace(config, state) - if first_line is None: - # Speculative parsing failed - return None - empty_lines = () - while True: - empty_line = _parse_empty_line(config, state) - if empty_line is None: - # This isn't an empty line, so parse it below - break - empty_lines = empty_lines + (empty_line,) - indent = _parse_indent(config, state) - last_line = parse_simple_whitespace(config, state) - return ParenthesizedWhitespace(first_line, empty_lines, indent, last_line) +try: + # It'd be better to do `from libcst_native.whitespace_parser import *`, but we're + # blocked on https://github.com/PyO3/pyo3/issues/759 + # (which ultimately seems to be a limitation of how importlib works) + from libcst_native import whitespace_parser as mod +except ImportError: + from libcst._parser import py_whitespace_parser as mod + +# pyre-fixme[5]: There's no sane way to type these re-exports +parse_simple_whitespace = mod.parse_simple_whitespace +# pyre-fixme[5]: There's no sane way to type these re-exports +parse_empty_lines = mod.parse_empty_lines +# pyre-fixme[5]: There's no sane way to type these re-exports +parse_trailing_whitespace = mod.parse_trailing_whitespace +# pyre-fixme[5]: There's no sane way to type these re-exports +parse_parenthesizable_whitespace = mod.parse_parenthesizable_whitespace diff --git a/libcst_native/Cargo.lock b/libcst_native/Cargo.lock new file mode 100644 index 000000000..ad7168ac9 --- /dev/null +++ b/libcst_native/Cargo.lock @@ -0,0 +1,312 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +[[package]] +name = "aho-corasick" +version = "0.7.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7404febffaa47dac81aa44dba71523c9d069b1bdc50a77db41195149e17f68e5" +dependencies = [ + "memchr", +] + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "ctor" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "10bcb9d7dcbf7002aaffbb53eac22906b64cdcc127971dcc387d8eb7c95d5560" +dependencies = [ + "quote", + "syn", +] + +[[package]] +name = "ghost" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a5bcf1bbeab73aa4cf2fde60a846858dc036163c7c33bec309f8d17de785479" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "indoc" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5a75aeaaef0ce18b58056d306c27b07436fbb34b8816c53094b76dd81803136" +dependencies = [ + "unindent", +] + +[[package]] +name = "instant" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61124eeebbd69b8190558df225adf7e4caafce0d743919e5d6b19652314ec5ec" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "inventory" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f0f7efb804ec95e33db9ad49e4252f049e37e8b0a4652e3cd61f7999f2eff7f" +dependencies = [ + "ctor", + "ghost", + "inventory-impl", +] + +[[package]] +name = "inventory-impl" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75c094e94816723ab936484666968f5b58060492e880f3c8d00489a1e244fa51" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "libc" +version = "0.2.82" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89203f3fba0a3795506acaad8ebce3c80c0af93f994d5a1d7a0b1eeb23271929" + +[[package]] +name = "libcst_native" +version = "0.1.0" +dependencies = [ + "once_cell", + "paste", + "pyo3", + "regex", + "test-case", +] + +[[package]] +name = "lock_api" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd96ffd135b2fd7b973ac026d28085defbe8983df057ced3eb4f2130b0831312" +dependencies = [ + "scopeguard", +] + +[[package]] +name = "memchr" +version = "2.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ee1c47aaa256ecabcaea351eae4a9b01ef39ed810004e298d2511ed284b1525" + +[[package]] +name = "once_cell" +version = "1.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13bd41f508810a131401606d54ac32a467c97172d74ba7662562ebba5ad07fa0" + +[[package]] +name = "parking_lot" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d7744ac029df22dca6284efe4e898991d28e3085c706c972bcd7da4a27a15eb" +dependencies = [ + "instant", + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ccb628cad4f84851442432c60ad8e1f607e29752d0bf072cbd0baf28aa34272" +dependencies = [ + "cfg-if", + "instant", + "libc", + "redox_syscall", + "smallvec", + "winapi", +] + +[[package]] +name = "paste" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c5d65c4d95931acda4498f675e332fcbdc9a06705cd07086c510e9b6009cd1c1" + +[[package]] +name = "proc-macro2" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e0704ee1a7e00d7bb417d0770ea303c1bccbabf0ef1667dae92b5967f5f8a71" +dependencies = [ + "unicode-xid", +] + +[[package]] +name = "pyo3" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "00ca634cf3acd58a599b535ed6cb188223298977d471d146121792bfa23b754c" +dependencies = [ + "cfg-if", + "ctor", + "indoc", + "inventory", + "libc", + "parking_lot", + "paste", + "pyo3-macros", + "unindent", +] + +[[package]] +name = "pyo3-macros" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "483ac516dbda6789a5b4be0271e7a31b9ad4ec8c0a5955050e8076f72bdbef8f" +dependencies = [ + "pyo3-macros-backend", + "quote", + "syn", +] + +[[package]] +name = "pyo3-macros-backend" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15230cabcda008f03565ed8bac40f094cbb5ee1b46e6551f1ec3a0e922cf7df9" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "quote" +version = "1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "991431c3519a3f36861882da93630ce66b52918dcf1b8e2fd66b397fc96f28df" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "redox_syscall" +version = "0.1.57" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41cc0f7e4d5d4544e8861606a285bb08d3e70712ccc7d2b84d7c0ccfaf4b05ce" + +[[package]] +name = "regex" +version = "1.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9251239e129e16308e70d853559389de218ac275b515068abc96829d05b948a" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", + "thread_local", +] + +[[package]] +name = "regex-syntax" +version = "0.6.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5eb417147ba9860a96cfe72a0b93bf88fee1744b5636ec99ab20c1aa9376581" + +[[package]] +name = "scopeguard" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" + +[[package]] +name = "smallvec" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe0f37c9e8f3c5a4a66ad655a93c74daac4ad00c441533bf5c6e7990bb42604e" + +[[package]] +name = "syn" +version = "1.0.59" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07cb8b1b4ebf86a89ee88cbd201b022b94138c623644d035185c84d3f41b7e66" +dependencies = [ + "proc-macro2", + "quote", + "unicode-xid", +] + +[[package]] +name = "test-case" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "956044ef122917dde830c19dec5f76d0670329fde4104836d62ebcb14f4865f1" +dependencies = [ + "cfg-if", + "proc-macro2", + "quote", + "syn", + "version_check", +] + +[[package]] +name = "thread_local" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "301bdd13d23c49672926be451130892d274d3ba0b410c18e00daa7990ff38d99" +dependencies = [ + "once_cell", +] + +[[package]] +name = "unicode-xid" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7fe0bb3479651439c9112f72b6c505038574c9fbb575ed1bf3b797fa39dd564" + +[[package]] +name = "unindent" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f14ee04d9415b52b3aeab06258a3f07093182b88ba0f9b8d203f211a7a7d41c7" + +[[package]] +name = "version_check" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5a972e5669d67ba988ce3dc826706fb0a8b01471c088cb0b6110b805cc36aed" + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" diff --git a/libcst_native/Cargo.toml b/libcst_native/Cargo.toml new file mode 100644 index 000000000..ae517537d --- /dev/null +++ b/libcst_native/Cargo.toml @@ -0,0 +1,31 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +[package] +name = "libcst_native" +version = "0.1.0" +authors = ["LibCST Developers"] +edition = "2018" + +[lib] +name = "libcst_native" +crate-type = ["cdylib", "rlib"] + +[features] +# This is a bit of a hack, since `cargo test` doesn't work with `extension-module`. +# To run tests, use `cargo test --no-default-features`. +# +# Once https://github.com/PyO3/pyo3/pull/1123 lands, it may be better to use +# `-Zextra-link-arg` for this instead. +default = ["pyo3/extension-module"] + +[dependencies] +once_cell = "1.5.2" +paste = "1.0.4" +pyo3 = "0.13.0" +regex = "1.4.3" + +[dev-dependencies] +test-case = "1.1.0" diff --git a/libcst_native/README.md b/libcst_native/README.md new file mode 100644 index 000000000..f33563b2e --- /dev/null +++ b/libcst_native/README.md @@ -0,0 +1,66 @@ +# libcst_native + +A very experimental native extension to speed up LibCST. This does not currently provide +much performance benefit and is therefore not recommended for general use. + +The extension is written in Rust using [PyO3](https://pyo3.rs/). + +This installs as a separate python package that LibCST looks for and will import if it's +available. + + +## Using with LibCST + +[Set up a rust development environment](https://www.rust-lang.org/tools/install). Using +`rustup` is recommended, but not necessary. Rust 1.45.0+ should work. + +Follow the instructions for setting up a virtualenv in the top-level README, then: + +``` +cd libcst_native +maturin develop # install libcst_native to the virtualenv +cd .. # cd back into the main project +python -m unittest +``` + +This will run the python test suite. Nothing special is required to use `libcst_native`, +since `libcst` will automatically use the native extension when it's installed. + +When benchmarking this code, make sure to run `maturin develop` with the `--release` +flag to enable compiler optimizations. + +You can disable the native extension by uninstalling the package from your virtualenv: + +``` +pip uninstall libcst_native +``` + + +## Rust Tests + +In addition to running the python test suite, you can run some tests written in rust +with + +``` +cargo test --no-default-features +``` + +The `--no-default-features` flag needed to work around an incompatibility between tests +and pyo3's `extension-module` feature. + + +## Code Formatting + +Use `cargo fmt` to format your code. + + +## Release + +This isn't currently supported, so there's no releases available, but the end-goal would +be to publish this on PyPI. + +Because this is a native extension, it must be re-built for each platform/architecture. +The per-platform build could be automated using a CI system, [like github +actions][gh-actions]. + +[gh-actions]: https://github.com/PyO3/maturin/blob/master/.github/workflows/release.yml diff --git a/libcst_native/src/lib.rs b/libcst_native/src/lib.rs new file mode 100644 index 000000000..6a7a01d51 --- /dev/null +++ b/libcst_native/src/lib.rs @@ -0,0 +1,36 @@ +// Copyright (c) Facebook, Inc. and its affiliates. +// +// This source code is licensed under the MIT license found in the +// LICENSE file in the root directory of this source tree. + +#[macro_use] +mod macros; + +// Our submodules are all pub so that they're shown in `cargo doc`'s output. +// This crate isn't intended to be used from within rust, and won't be published to `crates.io`. +pub mod parser_config; +pub mod py_cached; +pub mod whitespace_parser; +pub mod whitespace_state; + +#[cfg(any(test, doc))] +pub mod test_utils; + +use pyo3::prelude::*; + +#[pymodule] +fn libcst_native(py: Python, m: &PyModule) -> PyResult<()> { + let parser_config_mod = PyModule::new(py, "parser_config")?; + parser_config::init_module(py, parser_config_mod)?; + m.add_submodule(parser_config_mod)?; + + let whitespace_state_mod = PyModule::new(py, "whitespace_state")?; + whitespace_state::init_module(py, whitespace_state_mod)?; + m.add_submodule(whitespace_state_mod)?; + + let whitespace_parser_mod = PyModule::new(py, "whitespace_parser")?; + whitespace_parser::init_module(py, whitespace_parser_mod)?; + m.add_submodule(whitespace_parser_mod)?; + + Ok(()) +} diff --git a/libcst_native/src/macros.rs b/libcst_native/src/macros.rs new file mode 100644 index 000000000..1c47e3fad --- /dev/null +++ b/libcst_native/src/macros.rs @@ -0,0 +1,33 @@ +// Copyright (c) Facebook, Inc. and its affiliates. +// +// This source code is licensed under the MIT license found in the +// LICENSE file in the root directory of this source tree. + +/// Generates a function that lazily imports and caches a module's member. This will hold a +/// permanent reference to the imported member. Python's module cache is rarely purged though, so +/// it typically won't matter. +/// +/// This cache is cheaper than looking up the module in python's module cache inspecting the +/// module's `__dict__` each time you want access to the member. +/// +/// If you have multiple imports from the same module, we'll call `py.import` once for each member +/// of the module. +#[macro_export] +macro_rules! py_import { + ( $module_name:expr, $member_name:expr, $getter_fn:ident ) => { + paste::paste! { + static [] + : pyo3::once_cell::GILOnceCell> + = pyo3::once_cell::GILOnceCell::new(); + + fn $getter_fn<'py>(py: pyo3::Python<'py>) -> pyo3::PyResult<&'py pyo3::PyAny> { + Ok([].get_or_init(py, || { + Ok(py.import($module_name)?.get($member_name)?.to_object(py)) + }) + .as_ref() + .map_err(|err| err.clone_ref(py))? + .as_ref(py)) + } + } + }; +} diff --git a/libcst_native/src/parser_config.rs b/libcst_native/src/parser_config.rs new file mode 100644 index 000000000..93fe24742 --- /dev/null +++ b/libcst_native/src/parser_config.rs @@ -0,0 +1,137 @@ +// Copyright (c) Facebook, Inc. and its affiliates. +// +// This source code is licensed under the MIT license found in the +// LICENSE file in the root directory of this source tree. + +use pyo3::exceptions::PyIndexError; +use pyo3::prelude::*; +use pyo3::types::{IntoPyDict, PyDict, PySequence, PyString}; +use pyo3::wrap_pyfunction; + +use crate::py_cached::PyCached; + +#[pyclass(subclass, module = "libcst_native.parser_config")] +#[text_signature = "(*, lines, default_newline)"] +pub struct BaseWhitespaceParserConfig { + pub lines: PyCached>, + pub default_newline: PyCached, +} + +#[pymethods] +impl BaseWhitespaceParserConfig { + #[new] + fn new(lines: &PySequence, default_newline: &PyString) -> PyResult { + // These fields will get initialized when ParserConfig.__init__ (our subclass) runs + Ok(Self { + lines: lines.extract()?, + default_newline: default_newline.extract()?, + }) + } + + #[getter] + fn get_lines(&self, py: Python) -> PyObject { + self.lines.to_object(py) + } + + #[getter] + fn get_default_newline(&self, py: Python) -> PyObject { + self.default_newline.to_object(py) + } +} + +impl BaseWhitespaceParserConfig { + /// Equivalent to `config.lines.unwrap()[line_number - 1]`, but it return a PyErr when we get + /// an index that's out of range, instead of panicing. + pub fn get_line(&self, line_number: usize) -> PyResult<&str> { + let err_fn = + || PyIndexError::new_err(format!("line number of {} is out of range", line_number)); + self.lines + .get(line_number.checked_sub(1).ok_or_else(err_fn)?) + .map(|l| &l[..]) + .ok_or_else(err_fn) + } + + /// Equivalent to `config.get_line(line_number)[column_index..]`, but it return a PyErr when + /// we get an column index that's out of range, instead of panicing. + pub fn get_line_after_column(&self, line_number: usize, column_index: usize) -> PyResult<&str> { + self.get_line(line_number)? + .get(column_index..) + .ok_or_else(|| { + PyIndexError::new_err(format!("column index of {} is out of range", column_index)) + }) + } +} + +// These fields are private and PyObject, since we don't currently care about using them from +// within rust. +#[pyclass(extends=BaseWhitespaceParserConfig, module="libcst_native.parser_config")] +#[text_signature = "(*, lines, encoding, default_indent, default_newline, has_trailing_newline, version, future_imports)"] +pub struct ParserConfig { + // lines is inherited + #[pyo3(get)] + encoding: PyObject, + #[pyo3(get)] + default_indent: PyObject, + // default_newline is inherited + #[pyo3(get)] + has_trailing_newline: PyObject, + #[pyo3(get)] + version: PyObject, + #[pyo3(get)] + future_imports: PyObject, +} + +#[pymethods] +impl ParserConfig { + #[new] + fn new( + lines: &PySequence, + encoding: PyObject, + default_indent: PyObject, + default_newline: &PyString, + has_trailing_newline: PyObject, + version: PyObject, + future_imports: PyObject, + ) -> PyResult<(Self, BaseWhitespaceParserConfig)> { + Ok(( + Self { + encoding, + default_indent, + has_trailing_newline, + version, + future_imports, + }, + BaseWhitespaceParserConfig::new(lines, default_newline)?, + )) + } +} + +/// An internal helper function used by python unit tests to compare configs. +#[pyfunction] +fn parser_config_asdict<'py>(py: Python<'py>, config: PyRef<'py, ParserConfig>) -> &'py PyDict { + let super_config: &BaseWhitespaceParserConfig = config.as_ref(); + vec![ + ("lines", super_config.lines.to_object(py)), + ("encoding", config.encoding.clone_ref(py)), + ("default_indent", config.default_indent.clone_ref(py)), + ( + "default_newline", + super_config.default_newline.to_object(py), + ), + ( + "has_trailing_newline", + config.has_trailing_newline.clone_ref(py), + ), + ("version", config.version.clone_ref(py)), + ("future_imports", config.future_imports.clone_ref(py)), + ] + .into_py_dict(py) +} + +pub fn init_module(_py: Python, m: &PyModule) -> PyResult<()> { + m.add_class::()?; + m.add_class::()?; + m.add_function(wrap_pyfunction!(parser_config_asdict, m)?) + .unwrap(); + Ok(()) +} diff --git a/libcst_native/src/py_cached.rs b/libcst_native/src/py_cached.rs new file mode 100644 index 000000000..e8a4dfd4a --- /dev/null +++ b/libcst_native/src/py_cached.rs @@ -0,0 +1,76 @@ +// Copyright (c) Facebook, Inc. and its affiliates. +// +// This source code is licensed under the MIT license found in the +// LICENSE file in the root directory of this source tree. + +use pyo3::prelude::*; +use std::convert::AsRef; +use std::ops::Deref; + +/// An immutable wrapper around a rust type T and it's PyObject equivalent. Caches the conversion +/// to and from the PyObject. +pub struct PyCached { + native: T, + py_object: PyObject, +} + +impl PyCached +where + T: ToPyObject, +{ + pub fn new(py: Python, native: T) -> Self { + Self { + py_object: native.to_object(py), + native, + } + } +} + +impl<'source, T> FromPyObject<'source> for PyCached +where + T: FromPyObject<'source>, +{ + fn extract(ob: &'source PyAny) -> PyResult { + Python::with_gil(|py| { + Ok(PyCached { + native: ob.extract()?, + py_object: ob.to_object(py), + }) + }) + } +} + +impl IntoPy for PyCached { + fn into_py(self, _py: Python) -> PyObject { + self.py_object + } +} + +impl ToPyObject for PyCached { + fn to_object(&self, py: Python) -> PyObject { + self.py_object.clone_ref(py) + } +} + +impl AsRef for PyCached { + fn as_ref(&self) -> &T { + &self.native + } +} + +impl Deref for PyCached { + type Target = T; + + fn deref(&self) -> &Self::Target { + &self.native + } +} + +impl From for PyCached +where + T: ToPyObject, +{ + fn from(val: T) -> Self { + Python::with_gil(|py| Self::new(py, val)) + } +} diff --git a/libcst_native/src/test_utils.rs b/libcst_native/src/test_utils.rs new file mode 100644 index 000000000..6a462c8df --- /dev/null +++ b/libcst_native/src/test_utils.rs @@ -0,0 +1,42 @@ +// Copyright (c) Facebook, Inc. and its affiliates. +// +// This source code is licensed under the MIT license found in the +// LICENSE file in the root directory of this source tree. + +use pyo3::prelude::*; + +py_import!("libcst._nodes.deep_equals", "deep_equals", get_deep_equals); + +pub fn repr_or_panic(py: Python, value: T) -> String +where + T: ToPyObject, +{ + value + .to_object(py) + .as_ref(py) + .repr() + .expect("failed to call repr") + .extract() + .expect("repr should've returned str") +} + +pub fn py_assert_deep_equals(py: Python, left: L, right: R) +where + L: ToPyObject, + R: ToPyObject, +{ + let (left, right) = (left.to_object(py), right.to_object(py)); + let equals = get_deep_equals(py) + .expect("failed to import deep_equals") + .call1((&left, &right)) + .expect("failed to call deep_equals") + .extract::() + .expect("deep_equals should return a bool"); + if !equals { + panic!( + "assertion failed: {} was not deeply equal to {}", + repr_or_panic(py, &left), + repr_or_panic(py, &right), + ); + } +} diff --git a/libcst_native/src/whitespace_parser.rs b/libcst_native/src/whitespace_parser.rs new file mode 100644 index 000000000..22579bd3e --- /dev/null +++ b/libcst_native/src/whitespace_parser.rs @@ -0,0 +1,745 @@ +// Copyright (c) Facebook, Inc. and its affiliates. +// +// This source code is licensed under the MIT license found in the +// LICENSE file in the root directory of this source tree. + +/// Parso doesn't attempt to parse (or even emit tokens for) whitespace or comments that aren't +/// syntatically important. Instead, we're just given the whitespace as a "prefix" of the token. +/// +/// However, in our CST, whitespace is gathered into far more detailed objects than a simple str. +/// +/// Fortunately this isn't hard for us to parse ourselves, so we just use our own hand-rolled +/// recursive descent parser. +use once_cell::sync::Lazy; +use pyo3::exceptions::PyException; +use pyo3::prelude::*; +use pyo3::types::PyTuple; +use pyo3::wrap_pyfunction; +use regex::Regex; + +use crate::parser_config::BaseWhitespaceParserConfig as Config; +use crate::whitespace_state::WhitespaceState as State; + +static SIMPLE_WHITESPACE_RE: Lazy = + Lazy::new(|| Regex::new(r"\A([ \f\t]|\\(\r\n?|\n))*").expect("regex")); +static NEWLINE_RE: Lazy = Lazy::new(|| Regex::new(r"\A(\r\n?|\n)").expect("regex")); +static COMMENT_RE: Lazy = Lazy::new(|| Regex::new(r"\A#[^\r\n]*").expect("regex")); + +py_import!( + "libcst._nodes.whitespace", + "SimpleWhitespace", + get_simple_whitespace_cls +); +py_import!("libcst._nodes.whitespace", "EmptyLine", get_empty_line_cls); +py_import!("libcst._nodes.whitespace", "Comment", get_comment_cls); +py_import!("libcst._nodes.whitespace", "Newline", get_newline_cls); +py_import!( + "libcst._nodes.whitespace", + "TrailingWhitespace", + get_trailing_whitespace_cls +); +py_import!( + "libcst._nodes.whitespace", + "ParenthesizedWhitespace", + get_parenthesized_whitespace_cls +); + +fn new_simple_whitespace<'py>(py: Python<'py>, value: &str) -> PyResult<&'py PyAny> { + Ok(get_simple_whitespace_cls(py)?.call1((value,))?) +} + +fn new_empty_line<'py>( + py: Python<'py>, + indent: bool, + whitespace: &'py PyAny, + comment: Option<&'py PyAny>, + newline: &'py PyAny, +) -> PyResult<&'py PyAny> { + Ok(get_empty_line_cls(py)?.call1((indent, whitespace, comment, newline))?) +} + +fn new_comment<'py>(py: Python<'py>, value: &str) -> PyResult<&'py PyAny> { + Ok(get_comment_cls(py)?.call1((value,))?) +} + +fn new_newline<'py>(py: Python<'py>, value: Option<&str>) -> PyResult<&'py PyAny> { + Ok(get_newline_cls(py)?.call1((value,))?) +} + +fn new_trailing_whitespace<'py>( + py: Python<'py>, + whitespace: &'py PyAny, + comment: Option<&'py PyAny>, + newline: &'py PyAny, +) -> PyResult<&'py PyAny> { + Ok(get_trailing_whitespace_cls(py)?.call1((whitespace, comment, newline))?) +} + +fn new_parenthesized_whitespace<'py>( + py: Python<'py>, + first_line: &'py PyAny, + empty_lines: Vec<&'py PyAny>, + indent: bool, + last_line: &'py PyAny, +) -> PyResult<&'py PyAny> { + Ok(get_parenthesized_whitespace_cls(py)?.call1(( + first_line, + PyTuple::new(py, empty_lines), + indent, + last_line, + ))?) +} + +// BEGIN PARSER ENTRYPOINTS + +#[pyfunction] +pub fn parse_simple_whitespace<'py>( + py: Python<'py>, + config: &Config, + state: &mut State, +) -> PyResult<&'py PyAny> { + let capture_ws = |line, column| -> PyResult<&str> { + Ok(SIMPLE_WHITESPACE_RE + .find(config.get_line_after_column(line, column)?) + .expect("SIMPLE_WHITESPACE_RE supports 0-length matches, so it must always match") + .as_str()) + }; + let mut prev_line = capture_ws(state.line, state.column)?; + let mut ws = prev_line.to_string(); + while prev_line.contains('\\') { + // continuation character + state.line += 1; + state.column = 0; + prev_line = capture_ws(state.line, state.column)?; + ws.push_str(prev_line); + } + state.column += prev_line.len(); + new_simple_whitespace(py, &ws[..]) +} + +#[pyfunction] +#[text_signature = "(config, state, *, override_absolute_indent)"] +pub fn parse_empty_lines<'py>( + py: Python<'py>, + config: &Config, + state: &mut State, + override_absolute_indent: Option<&str>, +) -> PyResult<&'py PyTuple> { + // If override_absolute_indent is Some, then we need to parse all lines up to and including the + // last line that is indented at our level. These all belong to the footer and not to the next + // line's leading_lines. + // + // We don't know what the last line with indent=True is, and there could be indent=False lines + // interspersed with indent=True lines, so we need to speculatively parse all possible empty + // lines, and then unwind to find the last empty line with indent=True. + let mut speculative_state = state.clone(); + let mut lines = Vec::new(); + while let Some(empty_line) = + parse_empty_line(py, config, &mut speculative_state, override_absolute_indent)? + { + lines.push((speculative_state.clone(), empty_line)); + } + + if override_absolute_indent.is_some() { + // Remove elements from the end until we find an indented line. + while let Some((_, empty_line)) = lines.last() { + if empty_line.getattr("indent")?.is_true()? { + break; + } + lines.pop(); + } + } + + if let Some((final_state, _)) = lines.last() { + // update the state to match the last line that we captured + *state = final_state.clone(); + } + + Ok(PyTuple::new( + py, + lines.iter().map(|(_, empty_line)| empty_line), + )) +} + +#[pyfunction] +pub fn parse_trailing_whitespace<'py>( + py: Python<'py>, + config: &Config, + state: &mut State, +) -> PyResult<&'py PyAny> { + if let Some(trailing_whitespace) = parse_optional_trailing_whitespace(py, config, state)? { + Ok(trailing_whitespace) + } else { + Err(PyException::new_err(concat!( + "Internal Error: Failed to parse TrailingWhitespace. This should never ", + "happen because a TrailingWhitespace is never optional in the grammar, ", + "so this error should've been caught by parso first.", + ))) + } +} + +#[pyfunction] +pub fn parse_parenthesizable_whitespace<'py>( + py: Python<'py>, + config: &Config, + state: &mut State, +) -> PyResult<&'py PyAny> { + if state.is_parenthesized { + // First, try parenthesized (don't need speculation because it either parses or doesn't + // modify state). + if let Some(parenthesized_whitespace) = parse_parenthesized_whitespace(py, config, state)? { + return Ok(parenthesized_whitespace); + } + } + // It's not parenthesized, or ParenthesizedWhitespace didn't parse. Just parse and return a + // SimpleWhitespace. + parse_simple_whitespace(py, config, state) +} + +// END PARSER ENTRYPOINTS +// BEGIN PARSER INTERNAL PRODUCTIONS + +pub fn parse_empty_line<'py>( + py: Python<'py>, + config: &Config, + state: &mut State, + override_absolute_indent: Option<&str>, +) -> PyResult> { + // begin speculative parsing + let mut speculative_state = state.clone(); + if let Ok(indent) = parse_indent(py, config, &mut speculative_state, override_absolute_indent) { + let whitespace = parse_simple_whitespace(py, config, &mut speculative_state)?; + let comment = parse_comment(py, config, &mut speculative_state)?; + if let Some(newline) = parse_newline(py, config, &mut speculative_state)? { + // speculative parsing succeeded + *state = speculative_state; + Ok(Some(new_empty_line( + py, indent, whitespace, comment, newline, + )?)) + } else { + // no newline found, speculative parsing failed + Ok(None) + } + } else { + // we aren't on a new line, speculative parsing failed + // TODO: Don't rely on a python exception for this, use a rust error type + Ok(None) + } +} + +/// Returns true if indentation was found, otherwise False. +pub fn parse_indent( + _py: Python, + config: &Config, + state: &mut State, + override_absolute_indent: Option<&str>, +) -> PyResult { + let absolute_indent = override_absolute_indent.unwrap_or(&state.absolute_indent[..]); + if state.column != 0 { + if state.column == config.get_line(state.line)?.len() && state.line == config.lines.len() { + // we're at EOF, treat this as a failed speculative parse + Ok(false) + } else { + Err(PyException::new_err( + "Internal Error: Column should not be 0 when parsing an indent", + )) + } + } else if config + .get_line_after_column(state.line, state.column)? + .starts_with(absolute_indent) + { + state.column += absolute_indent.len(); + Ok(true) + } else { + Ok(false) + } +} + +pub fn parse_comment<'py>( + py: Python<'py>, + config: &Config, + state: &mut State, +) -> PyResult> { + if let Some(comment_match) = + COMMENT_RE.find(config.get_line_after_column(state.line, state.column)?) + { + let comment_str = comment_match.as_str(); + state.column += comment_str.len(); + Ok(Some(new_comment(py, comment_str)?)) + } else { + Ok(None) + } +} + +pub fn parse_newline<'py>( + py: Python<'py>, + config: &Config, + state: &mut State, +) -> PyResult> { + // begin speculative parsing + if let Some(newline_match) = + NEWLINE_RE.find(config.get_line_after_column(state.line, state.column)?) + { + // speculative parsing succeeded + let newline_str = newline_match.as_str(); + state.column += newline_str.len(); + if state.column != config.get_line(state.line)?.len() { + return Err(PyException::new_err( + "Internal Error: Found a newline, but it wasn't the EOL.", + )); + } + if state.line < config.lines.len() { + state.line += 1; + state.column = 0; + } + if newline_str == config.default_newline.as_ref() { + Ok(Some(new_newline(py, None)?)) + } else { + Ok(Some(new_newline(py, Some(newline_str))?)) + } + } else { + // no newline was found, speculative parsing failed + Ok(None) + } +} + +pub fn parse_optional_trailing_whitespace<'py>( + py: Python<'py>, + config: &Config, + state: &mut State, +) -> PyResult> { + // begin speculative parsing + let mut speculative_state = state.clone(); + let whitespace = parse_simple_whitespace(py, config, &mut speculative_state)?; + let comment = parse_comment(py, config, &mut speculative_state)?; + if let Some(newline) = parse_newline(py, config, &mut speculative_state)? { + // speculative parsing succeeded + *state = speculative_state; + Ok(Some(new_trailing_whitespace( + py, whitespace, comment, newline, + )?)) + } else { + // speculative parsing failed + Ok(None) + } +} + +pub fn parse_parenthesized_whitespace<'py>( + py: Python<'py>, + config: &Config, + state: &mut State, +) -> PyResult> { + if let Some(first_line) = parse_optional_trailing_whitespace(py, config, state)? { + let mut empty_lines = Vec::new(); + while let Some(empty_line) = parse_empty_line(py, config, state, None)? { + empty_lines.push(empty_line); + } + let indent = parse_indent(py, config, state, None)?; + let last_line = parse_simple_whitespace(py, config, state)?; + Ok(Some(new_parenthesized_whitespace( + py, + first_line, + empty_lines, + indent, + last_line, + )?)) + } else { + Ok(None) + } +} + +pub fn init_module(_py: Python, m: &PyModule) -> PyResult<()> { + m.add_function(wrap_pyfunction!(parse_simple_whitespace, m)?) + .unwrap(); + m.add_function(wrap_pyfunction!(parse_empty_lines, m)?) + .unwrap(); + m.add_function(wrap_pyfunction!(parse_trailing_whitespace, m)?) + .unwrap(); + m.add_function(wrap_pyfunction!(parse_parenthesizable_whitespace, m)?) + .unwrap(); + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + + use crate::test_utils::py_assert_deep_equals; + + type ParseFn<'py> = dyn FnOnce(Python<'py>, &Config, &mut State) -> PyResult<&'py PyAny>; + + struct TestCase<'py, 't> { + parser: Box>, + // We could accept a Config instead of lines and default_newline, but Config is a + // little awkward to construct from `&str`, so we do it in `.test()`. + lines: Vec<&'t str>, + default_newline: &'t str, + start_state: State, + end_state: State, + expected_node: &'py PyAny, + } + + impl<'py, 't> TestCase<'py, 't> { + fn test(mut self, py: Python<'py>) { + let config = Config { + lines: self + .lines + .iter() + .map(|l| l.to_string()) + .collect::>() + .into(), + default_newline: self.default_newline.to_string().into(), + }; + let parsed_node = (self.parser)(py, &config, &mut self.start_state) + .unwrap() + .into_py(py); + py_assert_deep_equals(py, &parsed_node, &self.expected_node); + assert_eq!(&self.start_state, &self.end_state); + } + } + + mod simple_whitespace { + use super::*; + + #[test] + fn test_empty() { + Python::with_gil(|py| { + TestCase { + parser: Box::new(parse_simple_whitespace), + lines: vec!["not whitespace\n", " another line\n"], + default_newline: "\n", + start_state: State { + line: 1, + column: 0, + absolute_indent: "".to_string(), + is_parenthesized: false, + }, + end_state: State { + line: 1, + column: 0, + absolute_indent: "".to_string(), + is_parenthesized: false, + }, + expected_node: new_simple_whitespace(py, "").unwrap(), + } + .test(py) + }) + } + + #[test] + fn test_start_of_line() { + Python::with_gil(|py| { + TestCase { + parser: Box::new(parse_simple_whitespace), + lines: vec!["\t <-- There's some whitespace there\n"], + default_newline: "\n", + start_state: State { + line: 1, + column: 0, + absolute_indent: "".to_string(), + is_parenthesized: false, + }, + end_state: State { + line: 1, + column: 3, + absolute_indent: "".to_string(), + is_parenthesized: false, + }, + expected_node: new_simple_whitespace(py, "\t ").unwrap(), + } + .test(py) + }) + } + + #[test] + fn test_end_of_line() { + Python::with_gil(|py| { + TestCase { + parser: Box::new(parse_simple_whitespace), + lines: vec!["prefix "], + default_newline: "\n", + start_state: State { + line: 1, + column: 6, + absolute_indent: "".to_string(), + is_parenthesized: false, + }, + end_state: State { + line: 1, + column: 9, + absolute_indent: "".to_string(), + is_parenthesized: false, + }, + expected_node: new_simple_whitespace(py, " ").unwrap(), + } + .test(py) + }) + } + + #[test] + fn test_line_continuation() { + Python::with_gil(|py| { + TestCase { + parser: Box::new(parse_simple_whitespace), + lines: vec!["prefix \\\n", " \\\n", " # suffix\n"], + default_newline: "\n", + start_state: State { + line: 1, + column: 6, + absolute_indent: "".to_string(), + is_parenthesized: false, + }, + end_state: State { + line: 3, + column: 4, + absolute_indent: "".to_string(), + is_parenthesized: false, + }, + expected_node: new_simple_whitespace(py, " \\\n \\\n ").unwrap(), + } + .test(py) + }) + } + } + + mod empty_lines { + use super::*; + + fn parse_empty_lines_no_override<'py>( + py: Python<'py>, + config: &Config, + state: &mut State, + ) -> PyResult<&'py PyAny> { + parse_empty_lines(py, config, state, None).map(|lines| lines.into()) + } + + #[test] + fn test_empty_list() { + Python::with_gil(|py| { + TestCase { + parser: Box::new(parse_empty_lines_no_override), + lines: vec!["this is not an empty line"], + default_newline: "\n", + start_state: State { + line: 1, + column: 0, + absolute_indent: "".to_string(), + is_parenthesized: false, + }, + end_state: State { + line: 1, + column: 0, + absolute_indent: "".to_string(), + is_parenthesized: false, + }, + expected_node: PyTuple::new(py, Vec::<&PyAny>::new()), + } + .test(py) + }) + } + + #[test] + fn test_single_line() { + Python::with_gil(|py| { + TestCase { + parser: Box::new(parse_empty_lines_no_override), + lines: vec![" # comment\n", "this is not an empty line\n"], + default_newline: "\n", + start_state: State { + line: 1, + column: 0, + absolute_indent: " ".to_string(), + is_parenthesized: false, + }, + end_state: State { + line: 2, + column: 0, + absolute_indent: " ".to_string(), + is_parenthesized: false, + }, + expected_node: PyTuple::new( + py, + vec![new_empty_line( + py, + /* indent */ true, + /* whitespace */ new_simple_whitespace(py, "").unwrap(), + /* comment */ Some(new_comment(py, "# comment").unwrap()), + /* newline */ new_newline(py, None).unwrap(), + ) + .unwrap()], + ), + } + .test(py) + }) + } + + #[test] + fn test_multiple_lines() { + Python::with_gil(|py| { + TestCase { + parser: Box::new(parse_empty_lines_no_override), + lines: vec![ + "\n", + " \n", + " # comment with indent and whitespace\n", + "# comment without indent\n", + " # comment with no indent but some whitespace\n", + ], + default_newline: "\n", + start_state: State { + line: 1, + column: 0, + absolute_indent: " ".to_string(), + is_parenthesized: false, + }, + end_state: State { + line: 5, + column: 47, + absolute_indent: " ".to_string(), + is_parenthesized: false, + }, + expected_node: PyTuple::new( + py, + vec![ + new_empty_line( + py, + /* indent */ false, + new_simple_whitespace(py, "").unwrap(), + /* comment */ None, + new_newline(py, None).unwrap(), + ) + .unwrap(), + new_empty_line( + py, + /* indent */ true, + new_simple_whitespace(py, "").unwrap(), + /* comment */ None, + new_newline(py, None).unwrap(), + ) + .unwrap(), + new_empty_line( + py, + /* indent */ true, + new_simple_whitespace(py, " ").unwrap(), + Some( + new_comment(py, "# comment with indent and whitespace") + .unwrap(), + ), + new_newline(py, None).unwrap(), + ) + .unwrap(), + new_empty_line( + py, + /* indent */ false, + new_simple_whitespace(py, "").unwrap(), + Some(new_comment(py, "# comment without indent").unwrap()), + new_newline(py, None).unwrap(), + ) + .unwrap(), + new_empty_line( + py, + /* indent */ false, + new_simple_whitespace(py, " ").unwrap(), + Some( + new_comment(py, "# comment with no indent but some whitespace") + .unwrap(), + ), + new_newline(py, None).unwrap(), + ) + .unwrap(), + ], + ), + } + .test(py) + }) + } + + #[test] + fn test_non_default_newline() { + Python::with_gil(|py| { + TestCase { + parser: Box::new(parse_empty_lines_no_override), + lines: vec!["\n", "\r\n", "\r"], + default_newline: "\n", + start_state: State { + line: 1, + column: 0, + absolute_indent: "".to_string(), + is_parenthesized: false, + }, + end_state: State { + line: 3, + column: 1, + absolute_indent: "".to_string(), + is_parenthesized: false, + }, + expected_node: PyTuple::new( + py, + vec![ + new_empty_line( + py, + /* indent */ true, + new_simple_whitespace(py, "").unwrap(), + /* comment */ None, + new_newline(py, None).unwrap(), // default newline + ) + .unwrap(), + new_empty_line( + py, + /* indent */ true, + new_simple_whitespace(py, "").unwrap(), + /* comment */ None, + new_newline(py, Some("\r\n")).unwrap(), + ) + .unwrap(), + new_empty_line( + py, + /* indent */ true, + new_simple_whitespace(py, "").unwrap(), + /* comment */ None, + new_newline(py, Some("\r")).unwrap(), + ) + .unwrap(), + ], + ), + } + .test(py) + }) + } + } + + mod trailing_whitespace { + use super::*; + + #[test] + fn test_with_whitespace_and_comment() { + Python::with_gil(|py| { + TestCase { + parser: Box::new(parse_trailing_whitespace), + lines: vec!["some code # comment\n"], + default_newline: "\n", + start_state: State { + line: 1, + column: 9, + absolute_indent: "".to_string(), + is_parenthesized: false, + }, + end_state: State { + line: 1, + column: 21, + absolute_indent: "".to_string(), + is_parenthesized: false, + }, + expected_node: new_trailing_whitespace( + py, + /* whitespace */ new_simple_whitespace(py, " ").unwrap(), + /* comment */ Some(new_comment(py, "# comment").unwrap()), + /* newline */ new_newline(py, None).unwrap(), + ) + .unwrap(), + } + .test(py) + }) + } + } +} diff --git a/libcst_native/src/whitespace_state.rs b/libcst_native/src/whitespace_state.rs new file mode 100644 index 000000000..68d2f0233 --- /dev/null +++ b/libcst_native/src/whitespace_state.rs @@ -0,0 +1,80 @@ +// Copyright (c) Facebook, Inc. and its affiliates. +// +// This source code is licensed under the MIT license found in the +// LICENSE file in the root directory of this source tree. + +use pyo3::class::basic::CompareOp; +use pyo3::prelude::*; +use pyo3::PyObjectProtocol; + +#[pyclass(module = "libcst_native.whitespace_state")] +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct WhitespaceState { + #[pyo3(get, set)] + pub line: usize, // one-indexed (to match parso's behavior) + #[pyo3(get, set)] + pub column: usize, // zero-indexed (to match parso's behavior) + #[pyo3(get, set)] + pub absolute_indent: String, + #[pyo3(get, set)] + pub is_parenthesized: bool, +} + +impl Default for WhitespaceState { + fn default() -> Self { + Self { + line: 1, + column: 0, + absolute_indent: "".to_string(), + is_parenthesized: false, + } + } +} + +#[pymethods] +impl WhitespaceState { + #[new] + fn new(line: usize, column: usize, absolute_indent: String, is_parenthesized: bool) -> Self { + WhitespaceState { + line, + column, + absolute_indent, + is_parenthesized, + } + } +} + +#[pyproto] +impl PyObjectProtocol for WhitespaceState { + fn __repr__(&self) -> PyResult { + Python::with_gil(|py| { + Ok(format!( + "WhitespaceState({}, {}, {}, {})", + self.line, + self.column, + self.absolute_indent + .to_object(py) + .as_ref(py) + .repr()? + .to_str()?, + self.is_parenthesized + )) + }) + } + + // The python unit tests need to be able to compare WhitespaceState + fn __richcmp__(&self, other: PyRef, op: CompareOp) -> PyResult { + Python::with_gil(|py| { + Ok(match op { + CompareOp::Eq => (self == &*other).into_py(py), + CompareOp::Ne => (self != &*other).into_py(py), + _ => Python::NotImplemented(py), + }) + }) + } +} + +pub fn init_module(_py: Python, m: &PyModule) -> PyResult<()> { + m.add_class::()?; + Ok(()) +} diff --git a/requirements-dev.txt b/requirements-dev.txt index 2f86e9b3d..d44effd6f 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -8,6 +8,7 @@ hypothesmith>=0.0.4 git+https://github.com/jimmylai/sphinx.git@slots_type_annotation isort==5.5.3 jupyter>=1.0.0 +maturin>=0.8.3,<0.9 nbsphinx>=0.4.2 pyre-check==0.0.41 sphinx-rtd-theme>=0.4.3 diff --git a/setup.py b/setup.py index 4dd024dd5..39a4b587b 100644 --- a/setup.py +++ b/setup.py @@ -49,6 +49,7 @@ install_requires=[dep.strip() for dep in open("requirements.txt").readlines()], extras_require={ "dev": [dep.strip() for dep in open("requirements-dev.txt").readlines() if "=" in dep], + "native": ["libcst_native==0.1.0"], }, classifiers=[ "License :: OSI Approved :: MIT License", diff --git a/stubs/libcst_native/parser_config.pyi b/stubs/libcst_native/parser_config.pyi new file mode 100644 index 000000000..ac1cce21c --- /dev/null +++ b/stubs/libcst_native/parser_config.pyi @@ -0,0 +1,45 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from typing import Any, FrozenSet, Mapping, Sequence + +from libcst._parser.parso.utils import PythonVersionInfo + + +class BaseWhitespaceParserConfig: + def __new__( + cls, + *, + lines: Sequence[str], + default_newline: str, + ) -> BaseWhitespaceParserConfig: ... + + lines: Sequence[str] + default_newline: str + + +class ParserConfig(BaseWhitespaceParserConfig): + def __new__( + cls, + *, + lines: Sequence[str], + encoding: str, + default_indent: str, + default_newline: str, + has_trailing_newline: bool, + version: PythonVersionInfo, + future_imports: FrozenSet[str], + ) -> BaseWhitespaceParserConfig: ... + + # lines is inherited + encoding: str + default_indent: str + # default_newline is inherited + has_trailing_newline: bool + version: PythonVersionInfo + future_imports: FrozenSet[str] + + +def parser_config_asdict(config: ParserConfig) -> Mapping[str, Any]: ... diff --git a/stubs/libcst_native/whitespace_parser.pyi b/stubs/libcst_native/whitespace_parser.pyi new file mode 100644 index 000000000..1e8ebb253 --- /dev/null +++ b/stubs/libcst_native/whitespace_parser.pyi @@ -0,0 +1,29 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from typing import Optional, Sequence, Union + +from libcst._nodes.whitespace import ( + EmptyLine, + Newline, + ParenthesizedWhitespace, + SimpleWhitespace, + TrailingWhitespace, +) +from libcst._parser.types.config import BaseWhitespaceParserConfig as Config +from libcst._parser.types.whitespace_state import WhitespaceState as State + + +def parse_simple_whitespace(config: Config, state: State) -> SimpleWhitespace: ... +def parse_empty_lines( + config: Config, + state: State, + *, + override_absolute_indent: Optional[str] = None, +) -> Sequence[EmptyLine]: ... +def parse_trailing_whitespace(config: Config, state: State) -> TrailingWhitespace: ... +def parse_parenthesizable_whitespace( + config: Config, state: State +) -> Union[SimpleWhitespace, ParenthesizedWhitespace]: ... diff --git a/stubs/libcst_native/whitespace_state.pyi b/stubs/libcst_native/whitespace_state.pyi new file mode 100644 index 000000000..da43bd542 --- /dev/null +++ b/stubs/libcst_native/whitespace_state.pyi @@ -0,0 +1,15 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +class WhitespaceState: + def __new__( + cls, line: int, column: int, absolute_indent: str, is_parenthesized: bool + ) -> WhitespaceState: ... + + line: int # one-indexed (to match parso's behavior) + column: int # zero-indexed (to match parso's behavior) + # What to look for when executing `_parse_indent`. + absolute_indent: str + is_parenthesized: bool