diff --git a/.editorconfig b/.editorconfig index 0824f6693..c4f3c65d8 100644 --- a/.editorconfig +++ b/.editorconfig @@ -1,6 +1,6 @@ root = true -[*.{py,pyi,toml,md}] +[*.{py,pyi,rs,toml,md}] charset = "utf-8" end_of_line = lf indent_size = 4 @@ -8,3 +8,7 @@ indent_style = space insert_final_newline = true trim_trailing_whitespace = true max_line_length = 88 + +[*.rs] +# https://github.com/rust-dev-tools/fmt-rfcs/blob/master/guide/guide.md +max_line_length = 100 diff --git a/.gitignore b/.gitignore index 85fb5573a..e302b1db3 100644 --- a/.gitignore +++ b/.gitignore @@ -13,3 +13,4 @@ build/ .coverage .hypothesis/ .pyre_configuration +libcst_native/target/ diff --git a/libcst/_parser/py_whitespace_parser.py b/libcst/_parser/py_whitespace_parser.py new file mode 100644 index 000000000..d1421f1da --- /dev/null +++ b/libcst/_parser/py_whitespace_parser.py @@ -0,0 +1,261 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from typing import List, Optional, Sequence, Tuple, Union + +from libcst._nodes.whitespace import ( + COMMENT_RE, + NEWLINE_RE, + SIMPLE_WHITESPACE_RE, + Comment, + EmptyLine, + Newline, + ParenthesizedWhitespace, + SimpleWhitespace, + TrailingWhitespace, +) +from libcst._parser.types.config import BaseWhitespaceParserConfig +from libcst._parser.types.whitespace_state import WhitespaceState as State + + +# BEGIN PARSER ENTRYPOINTS + + +def parse_simple_whitespace( + config: BaseWhitespaceParserConfig, state: State +) -> SimpleWhitespace: + # The match never fails because the pattern can match an empty string + lines = config.lines + # pyre-fixme[16]: Optional type has no attribute `group`. + ws_line = SIMPLE_WHITESPACE_RE.match(lines[state.line - 1], state.column).group(0) + ws_line_list = [ws_line] + while "\\" in ws_line: + # continuation character + state.line += 1 + state.column = 0 + ws_line = SIMPLE_WHITESPACE_RE.match(lines[state.line - 1], state.column).group( + 0 + ) + ws_line_list.append(ws_line) + + # TODO: we could special-case the common case where there's no continuation + # character to avoid list construction and joining. + + # once we've finished collecting continuation characters + state.column += len(ws_line) + return SimpleWhitespace("".join(ws_line_list)) + + +def parse_empty_lines( + config: BaseWhitespaceParserConfig, + state: State, + *, + override_absolute_indent: Optional[str] = None, +) -> Sequence[EmptyLine]: + # If override_absolute_indent is true, then we need to parse all lines up + # to and including the last line that is indented at our level. These all + # belong to the footer and not to the next line's leading_lines. All lines + # that have indent=False and come after the last line where indent=True + # do not belong to this node. + state_for_line = State( + state.line, state.column, state.absolute_indent, state.is_parenthesized + ) + lines: List[Tuple[State, EmptyLine]] = [] + while True: + el = _parse_empty_line( + config, state_for_line, override_absolute_indent=override_absolute_indent + ) + if el is None: + break + + # Store the updated state with the element we parsed. Then make a new state + # clone for the next element. + lines.append((state_for_line, el)) + state_for_line = State( + state_for_line.line, + state_for_line.column, + state.absolute_indent, + state.is_parenthesized, + ) + + if override_absolute_indent is not None: + # We need to find the last element that is indented, and then split the list + # at that point. + for i in range(len(lines) - 1, -1, -1): + if lines[i][1].indent: + lines = lines[: (i + 1)] + break + else: + # We didn't find any lines, throw them all away + lines = [] + + if lines: + # Update the state line and column to match the last line actually parsed. + final_state: State = lines[-1][0] + state.line = final_state.line + state.column = final_state.column + return [r[1] for r in lines] + + +def parse_trailing_whitespace( + config: BaseWhitespaceParserConfig, state: State +) -> TrailingWhitespace: + trailing_whitespace = _parse_trailing_whitespace(config, state) + if trailing_whitespace is None: + raise Exception( + "Internal Error: Failed to parse TrailingWhitespace. This should never " + + "happen because a TrailingWhitespace is never optional in the grammar, " + + "so this error should've been caught by parso first." + ) + return trailing_whitespace + + +def parse_parenthesizable_whitespace( + config: BaseWhitespaceParserConfig, state: State +) -> Union[SimpleWhitespace, ParenthesizedWhitespace]: + if state.is_parenthesized: + # First, try parenthesized (don't need speculation because it either + # parses or doesn't modify state). + parenthesized_whitespace = _parse_parenthesized_whitespace(config, state) + if parenthesized_whitespace is not None: + return parenthesized_whitespace + # Now, just parse and return a simple whitespace + return parse_simple_whitespace(config, state) + + +# END PARSER ENTRYPOINTS +# BEGIN PARSER INTERNAL PRODUCTIONS + + +def _parse_empty_line( + config: BaseWhitespaceParserConfig, + state: State, + *, + override_absolute_indent: Optional[str] = None, +) -> Optional[EmptyLine]: + # begin speculative parsing + speculative_state = State( + state.line, state.column, state.absolute_indent, state.is_parenthesized + ) + try: + indent = _parse_indent( + config, speculative_state, override_absolute_indent=override_absolute_indent + ) + except Exception: + # We aren't on a new line, speculative parsing failed + return None + whitespace = parse_simple_whitespace(config, speculative_state) + comment = _parse_comment(config, speculative_state) + newline = _parse_newline(config, speculative_state) + if newline is None: + # speculative parsing failed + return None + # speculative parsing succeeded + state.line = speculative_state.line + state.column = speculative_state.column + # don't need to copy absolute_indent/is_parenthesized because they don't change. + return EmptyLine(indent, whitespace, comment, newline) + + +def _parse_indent( + config: BaseWhitespaceParserConfig, + state: State, + *, + override_absolute_indent: Optional[str] = None, +) -> bool: + """ + Returns True if indentation was found, otherwise False. + """ + absolute_indent = ( + override_absolute_indent + if override_absolute_indent is not None + else state.absolute_indent + ) + line_str = config.lines[state.line - 1] + if state.column != 0: + if state.column == len(line_str) and state.line == len(config.lines): + # We're at EOF, treat this as a failed speculative parse + return False + raise Exception("Internal Error: Column should be 0 when parsing an indent.") + if line_str.startswith(absolute_indent, state.column): + state.column += len(absolute_indent) + return True + return False + + +def _parse_comment( + config: BaseWhitespaceParserConfig, state: State +) -> Optional[Comment]: + comment_match = COMMENT_RE.match(config.lines[state.line - 1], state.column) + if comment_match is None: + return None + comment = comment_match.group(0) + state.column += len(comment) + return Comment(comment) + + +def _parse_newline( + config: BaseWhitespaceParserConfig, state: State +) -> Optional[Newline]: + # begin speculative parsing + line_str = config.lines[state.line - 1] + newline_match = NEWLINE_RE.match(line_str, state.column) + if newline_match is not None: + # speculative parsing succeeded + newline_str = newline_match.group(0) + state.column += len(newline_str) + if state.column != len(line_str): + raise Exception("Internal Error: Found a newline, but it wasn't the EOL.") + if state.line < len(config.lines): + # this newline was the end of a line, and there's another line, + # therefore we should move to the next line + state.line += 1 + state.column = 0 + if newline_str == config.default_newline: + # Just inherit it from the Module instead of explicitly setting it. + return Newline() + else: + return Newline(newline_str) + else: # no newline was found, speculative parsing failed + return None + + +def _parse_trailing_whitespace( + config: BaseWhitespaceParserConfig, state: State +) -> Optional[TrailingWhitespace]: + # Begin speculative parsing + speculative_state = State( + state.line, state.column, state.absolute_indent, state.is_parenthesized + ) + whitespace = parse_simple_whitespace(config, speculative_state) + comment = _parse_comment(config, speculative_state) + newline = _parse_newline(config, speculative_state) + if newline is None: + # Speculative parsing failed + return None + # Speculative parsing succeeded + state.line = speculative_state.line + state.column = speculative_state.column + # don't need to copy absolute_indent/is_parenthesized because they don't change. + return TrailingWhitespace(whitespace, comment, newline) + + +def _parse_parenthesized_whitespace( + config: BaseWhitespaceParserConfig, state: State +) -> Optional[ParenthesizedWhitespace]: + first_line = _parse_trailing_whitespace(config, state) + if first_line is None: + # Speculative parsing failed + return None + empty_lines = () + while True: + empty_line = _parse_empty_line(config, state) + if empty_line is None: + # This isn't an empty line, so parse it below + break + empty_lines = empty_lines + (empty_line,) + indent = _parse_indent(config, state) + last_line = parse_simple_whitespace(config, state) + return ParenthesizedWhitespace(first_line, empty_lines, indent, last_line) diff --git a/libcst/_parser/tests/test_detect_config.py b/libcst/_parser/tests/test_detect_config.py index b17c9fe58..fdda965b8 100644 --- a/libcst/_parser/tests/test_detect_config.py +++ b/libcst/_parser/tests/test_detect_config.py @@ -3,12 +3,15 @@ # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. -import dataclasses from typing import Union from libcst._parser.detect_config import detect_config from libcst._parser.parso.utils import PythonVersionInfo -from libcst._parser.types.config import ParserConfig, PartialParserConfig +from libcst._parser.types.config import ( + ParserConfig, + PartialParserConfig, + parser_config_asdict, +) from libcst.testing.utils import UnitTest, data_provider @@ -316,7 +319,7 @@ def test_detect_module_config( expected_config: ParserConfig, ) -> None: self.assertEqual( - dataclasses.asdict( + parser_config_asdict( detect_config( source, partial=partial, @@ -324,5 +327,5 @@ def test_detect_module_config( detect_default_newline=detect_default_newline, ).config ), - dataclasses.asdict(expected_config), + parser_config_asdict(expected_config), ) diff --git a/libcst/_parser/tests/test_whitespace_parser.py b/libcst/_parser/tests/test_whitespace_parser.py index dcbafa7e0..f5bd338b1 100644 --- a/libcst/_parser/tests/test_whitespace_parser.py +++ b/libcst/_parser/tests/test_whitespace_parser.py @@ -3,12 +3,11 @@ # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. -from dataclasses import dataclass -from typing import Callable, Sequence, TypeVar +from typing import Callable, TypeVar import libcst as cst from libcst._nodes.deep_equals import deep_equals -from libcst._parser.types.config import BaseWhitespaceParserConfig +from libcst._parser.types.config import MockWhitespaceParserConfig as Config from libcst._parser.types.whitespace_state import WhitespaceState as State from libcst._parser.whitespace_parser import ( parse_empty_lines, @@ -21,12 +20,6 @@ _T = TypeVar("_T") -@dataclass(frozen=True) -class Config(BaseWhitespaceParserConfig): - lines: Sequence[str] - default_newline: str - - class WhitespaceParserTest(UnitTest): @data_provider( { diff --git a/libcst/_parser/types/config.py b/libcst/_parser/types/config.py index 7c76e4c71..2d6371292 100644 --- a/libcst/_parser/types/config.py +++ b/libcst/_parser/types/config.py @@ -3,14 +3,12 @@ # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. - -import abc import codecs import re import sys from dataclasses import dataclass, field, fields from enum import Enum -from typing import FrozenSet, List, Optional, Pattern, Sequence, Union +from typing import Any, Callable, FrozenSet, List, Mapping, Optional, Pattern, Union from libcst._add_slots import add_slots from libcst._nodes.whitespace import NEWLINE_RE @@ -19,33 +17,21 @@ _INDENT_RE: Pattern[str] = re.compile(r"[ \t]+") +try: + from libcst_native import parser_config as config_mod -class BaseWhitespaceParserConfig(abc.ABC): - """ - Represents the subset of ParserConfig that the whitespace parser requires. This - makes calling the whitespace parser in tests with a mocked configuration easier. - """ - - lines: Sequence[str] - default_newline: str - + MockWhitespaceParserConfig = config_mod.BaseWhitespaceParserConfig +except ImportError: + from libcst._parser.types import py_config as config_mod -@add_slots # We'll access these properties frequently, so use slots -@dataclass(frozen=True) -class ParserConfig(BaseWhitespaceParserConfig): - """ - An internal configuration object that the python parser passes around. These values - are global to the parsed code and should not change during the lifetime of the - parser object. - """ + # pyre-fixme[9]: This is a small implementation difference between native and python + MockWhitespaceParserConfig = config_mod.MockWhitespaceParserConfig - lines: Sequence[str] - encoding: str - default_indent: str - default_newline: str - has_trailing_newline: bool - version: PythonVersionInfo - future_imports: FrozenSet[str] +BaseWhitespaceParserConfig = config_mod.BaseWhitespaceParserConfig +ParserConfig = config_mod.ParserConfig +parser_config_asdict: Callable[ + [ParserConfig], Mapping[str, Any] +] = config_mod.parser_config_asdict class AutoConfig(Enum): diff --git a/libcst/_parser/types/py_config.py b/libcst/_parser/types/py_config.py new file mode 100644 index 000000000..6722a9eae --- /dev/null +++ b/libcst/_parser/types/py_config.py @@ -0,0 +1,54 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import abc +from dataclasses import asdict, dataclass +from typing import Any, FrozenSet, Mapping, Sequence + +from libcst._parser.parso.utils import PythonVersionInfo + + +class BaseWhitespaceParserConfig(abc.ABC): + """ + Represents the subset of ParserConfig that the whitespace parser requires. This + makes calling the whitespace parser in tests with a mocked configuration easier. + """ + + lines: Sequence[str] + default_newline: str + + +@dataclass(frozen=True) +class MockWhitespaceParserConfig(BaseWhitespaceParserConfig): + """ + An internal type used by unit tests. + """ + + lines: Sequence[str] + default_newline: str + + +@dataclass(frozen=True) +class ParserConfig(BaseWhitespaceParserConfig): + """ + An internal configuration object that the python parser passes around. These + values are global to the parsed code and should not change during the lifetime + of the parser object. + """ + + lines: Sequence[str] + encoding: str + default_indent: str + default_newline: str + has_trailing_newline: bool + version: PythonVersionInfo + future_imports: FrozenSet[str] + + +def parser_config_asdict(config: ParserConfig) -> Mapping[str, Any]: + """ + An internal helper function used by unit tests to compare configs. + """ + return asdict(config) diff --git a/libcst/_parser/types/py_whitespace_state.py b/libcst/_parser/types/py_whitespace_state.py new file mode 100644 index 000000000..41244b98a --- /dev/null +++ b/libcst/_parser/types/py_whitespace_state.py @@ -0,0 +1,36 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from dataclasses import dataclass + +from libcst._add_slots import add_slots + + +@add_slots +@dataclass(frozen=False) +class WhitespaceState: + """ + A frequently mutated store of the whitespace parser's current state. This object + must be cloned prior to speculative parsing. + + This is in contrast to the `config` object each whitespace parser function takes, + which is frozen and never mutated. + + Whitespace parsing works by mutating this state object. By encapsulating saving, and + re-using state objects inside the top-level python parser, the whitespace parser is + able to be reentrant. One 'convert' function can consume part of the whitespace, and + another 'convert' function can consume the rest, depending on who owns what + whitespace. + + This is similar to the approach you might take to parse nested languages (e.g. + JavaScript inside of HTML). We're treating whitespace as a separate language and + grammar from the rest of Python's grammar. + """ + + line: int # one-indexed (to match parso's behavior) + column: int # zero-indexed (to match parso's behavior) + # What to look for when executing `_parse_indent`. + absolute_indent: str + is_parenthesized: bool diff --git a/libcst/_parser/types/whitespace_state.py b/libcst/_parser/types/whitespace_state.py index b5554a2bc..a9798054c 100644 --- a/libcst/_parser/types/whitespace_state.py +++ b/libcst/_parser/types/whitespace_state.py @@ -7,34 +7,9 @@ Defines the state object used by the whitespace parser. """ -from dataclasses import dataclass +try: + from libcst_native import whitespace_state as mod +except ImportError: + from libcst._parser.types import py_whitespace_state as mod -from libcst._add_slots import add_slots - - -@add_slots -@dataclass(frozen=False) -class WhitespaceState: - """ - A frequently mutated store of the whitespace parser's current state. This object - must be cloned prior to speculative parsing. - - This is in contrast to the `config` object each whitespace parser function takes, - which is frozen and never mutated. - - Whitespace parsing works by mutating this state object. By encapsulating saving, and - re-using state objects inside the top-level python parser, the whitespace parser is - able to be reentrant. One 'convert' function can consume part of the whitespace, and - another 'convert' function can consume the rest, depending on who owns what - whitespace. - - This is similar to the approach you might take to parse nested languages (e.g. - JavaScript inside of HTML). We're treating whitespace as a separate language and - grammar from the rest of Python's grammar. - """ - - line: int # one-indexed (to match parso's behavior) - column: int # zero-indexed (to match parso's behavior) - # What to look for when executing `_parse_indent`. - absolute_indent: str - is_parenthesized: bool +WhitespaceState = mod.WhitespaceState diff --git a/libcst/_parser/whitespace_parser.py b/libcst/_parser/whitespace_parser.py index b9df6c7e1..9ffb6a7dc 100644 --- a/libcst/_parser/whitespace_parser.py +++ b/libcst/_parser/whitespace_parser.py @@ -5,7 +5,7 @@ """ Parso doesn't attempt to parse (or even emit tokens for) whitespace or comments that -isn't syntatically important. Instead, we're just given the whitespace as a "prefix" of +aren't syntatically important. Instead, we're just given the whitespace as a "prefix" of the token. However, in our CST, whitespace is gathered into far more detailed objects than a simple @@ -15,259 +15,19 @@ hand-rolled recursive descent parser. """ -from typing import List, Optional, Sequence, Tuple, Union - -from libcst._nodes.whitespace import ( - COMMENT_RE, - NEWLINE_RE, - SIMPLE_WHITESPACE_RE, - Comment, - EmptyLine, - Newline, - ParenthesizedWhitespace, - SimpleWhitespace, - TrailingWhitespace, -) -from libcst._parser.types.config import BaseWhitespaceParserConfig -from libcst._parser.types.whitespace_state import WhitespaceState as State - - -# BEGIN PARSER ENTRYPOINTS - - -def parse_simple_whitespace( - config: BaseWhitespaceParserConfig, state: State -) -> SimpleWhitespace: - # The match never fails because the pattern can match an empty string - lines = config.lines - # pyre-fixme[16]: Optional type has no attribute `group`. - ws_line = SIMPLE_WHITESPACE_RE.match(lines[state.line - 1], state.column).group(0) - ws_line_list = [ws_line] - while "\\" in ws_line: - # continuation character - state.line += 1 - state.column = 0 - ws_line = SIMPLE_WHITESPACE_RE.match(lines[state.line - 1], state.column).group( - 0 - ) - ws_line_list.append(ws_line) - - # TODO: we could special-case the common case where there's no continuation - # character to avoid list construction and joining. - - # once we've finished collecting continuation characters - state.column += len(ws_line) - return SimpleWhitespace("".join(ws_line_list)) - - -def parse_empty_lines( - config: BaseWhitespaceParserConfig, - state: State, - *, - override_absolute_indent: Optional[str] = None, -) -> Sequence[EmptyLine]: - # If override_absolute_indent is true, then we need to parse all lines up - # to and including the last line that is indented at our level. These all - # belong to the footer and not to the next line's leading_lines. All lines - # that have indent=False and come after the last line where indent=True - # do not belong to this node. - state_for_line = State( - state.line, state.column, state.absolute_indent, state.is_parenthesized - ) - lines: List[Tuple[State, EmptyLine]] = [] - while True: - el = _parse_empty_line( - config, state_for_line, override_absolute_indent=override_absolute_indent - ) - if el is None: - break - - # Store the updated state with the element we parsed. Then make a new state - # clone for the next element. - lines.append((state_for_line, el)) - state_for_line = State( - state_for_line.line, - state_for_line.column, - state.absolute_indent, - state.is_parenthesized, - ) - - if override_absolute_indent is not None: - # We need to find the last element that is indented, and then split the list - # at that point. - for i in range(len(lines) - 1, -1, -1): - if lines[i][1].indent: - lines = lines[: (i + 1)] - break - else: - # We didn't find any lines, throw them all away - lines = [] - - if lines: - # Update the state line and column to match the last line actually parsed. - final_state: State = lines[-1][0] - state.line = final_state.line - state.column = final_state.column - return [r[1] for r in lines] - - -def parse_trailing_whitespace( - config: BaseWhitespaceParserConfig, state: State -) -> TrailingWhitespace: - trailing_whitespace = _parse_trailing_whitespace(config, state) - if trailing_whitespace is None: - raise Exception( - "Internal Error: Failed to parse TrailingWhitespace. This should never " - + "happen because a TrailingWhitespace is never optional in the grammar, " - + "so this error should've been caught by parso first." - ) - return trailing_whitespace - - -def parse_parenthesizable_whitespace( - config: BaseWhitespaceParserConfig, state: State -) -> Union[SimpleWhitespace, ParenthesizedWhitespace]: - if state.is_parenthesized: - # First, try parenthesized (don't need speculation because it either - # parses or doesn't modify state). - parenthesized_whitespace = _parse_parenthesized_whitespace(config, state) - if parenthesized_whitespace is not None: - return parenthesized_whitespace - # Now, just parse and return a simple whitespace - return parse_simple_whitespace(config, state) - - -# END PARSER ENTRYPOINTS -# BEGIN PARSER INTERNAL PRODUCTIONS - - -def _parse_empty_line( - config: BaseWhitespaceParserConfig, - state: State, - *, - override_absolute_indent: Optional[str] = None, -) -> Optional[EmptyLine]: - # begin speculative parsing - speculative_state = State( - state.line, state.column, state.absolute_indent, state.is_parenthesized - ) - try: - indent = _parse_indent( - config, speculative_state, override_absolute_indent=override_absolute_indent - ) - except Exception: - # We aren't on a new line, speculative parsing failed - return None - whitespace = parse_simple_whitespace(config, speculative_state) - comment = _parse_comment(config, speculative_state) - newline = _parse_newline(config, speculative_state) - if newline is None: - # speculative parsing failed - return None - # speculative parsing succeeded - state.line = speculative_state.line - state.column = speculative_state.column - # don't need to copy absolute_indent/is_parenthesized because they don't change. - return EmptyLine(indent, whitespace, comment, newline) - - -def _parse_indent( - config: BaseWhitespaceParserConfig, - state: State, - *, - override_absolute_indent: Optional[str] = None, -) -> bool: - """ - Returns True if indentation was found, otherwise False. - """ - absolute_indent = ( - override_absolute_indent - if override_absolute_indent is not None - else state.absolute_indent - ) - line_str = config.lines[state.line - 1] - if state.column != 0: - if state.column == len(line_str) and state.line == len(config.lines): - # We're at EOF, treat this as a failed speculative parse - return False - raise Exception("Internal Error: Column should be 0 when parsing an indent.") - if line_str.startswith(absolute_indent, state.column): - state.column += len(absolute_indent) - return True - return False - - -def _parse_comment( - config: BaseWhitespaceParserConfig, state: State -) -> Optional[Comment]: - comment_match = COMMENT_RE.match(config.lines[state.line - 1], state.column) - if comment_match is None: - return None - comment = comment_match.group(0) - state.column += len(comment) - return Comment(comment) - - -def _parse_newline( - config: BaseWhitespaceParserConfig, state: State -) -> Optional[Newline]: - # begin speculative parsing - line_str = config.lines[state.line - 1] - newline_match = NEWLINE_RE.match(line_str, state.column) - if newline_match is not None: - # speculative parsing succeeded - newline_str = newline_match.group(0) - state.column += len(newline_str) - if state.column != len(line_str): - raise Exception("Internal Error: Found a newline, but it wasn't the EOL.") - if state.line < len(config.lines): - # this newline was the end of a line, and there's another line, - # therefore we should move to the next line - state.line += 1 - state.column = 0 - if newline_str == config.default_newline: - # Just inherit it from the Module instead of explicitly setting it. - return Newline() - else: - return Newline(newline_str) - else: # no newline was found, speculative parsing failed - return None - - -def _parse_trailing_whitespace( - config: BaseWhitespaceParserConfig, state: State -) -> Optional[TrailingWhitespace]: - # Begin speculative parsing - speculative_state = State( - state.line, state.column, state.absolute_indent, state.is_parenthesized - ) - whitespace = parse_simple_whitespace(config, speculative_state) - comment = _parse_comment(config, speculative_state) - newline = _parse_newline(config, speculative_state) - if newline is None: - # Speculative parsing failed - return None - # Speculative parsing succeeded - state.line = speculative_state.line - state.column = speculative_state.column - # don't need to copy absolute_indent/is_parenthesized because they don't change. - return TrailingWhitespace(whitespace, comment, newline) - - -def _parse_parenthesized_whitespace( - config: BaseWhitespaceParserConfig, state: State -) -> Optional[ParenthesizedWhitespace]: - first_line = _parse_trailing_whitespace(config, state) - if first_line is None: - # Speculative parsing failed - return None - empty_lines = () - while True: - empty_line = _parse_empty_line(config, state) - if empty_line is None: - # This isn't an empty line, so parse it below - break - empty_lines = empty_lines + (empty_line,) - indent = _parse_indent(config, state) - last_line = parse_simple_whitespace(config, state) - return ParenthesizedWhitespace(first_line, empty_lines, indent, last_line) +try: + # It'd be better to do `from libcst_native.whitespace_parser import *`, but we're + # blocked on https://github.com/PyO3/pyo3/issues/759 + # (which ultimately seems to be a limitation of how importlib works) + from libcst_native import whitespace_parser as mod +except ImportError: + from libcst._parser import py_whitespace_parser as mod + +# pyre-fixme[5]: There's no sane way to type these re-exports +parse_simple_whitespace = mod.parse_simple_whitespace +# pyre-fixme[5]: There's no sane way to type these re-exports +parse_empty_lines = mod.parse_empty_lines +# pyre-fixme[5]: There's no sane way to type these re-exports +parse_trailing_whitespace = mod.parse_trailing_whitespace +# pyre-fixme[5]: There's no sane way to type these re-exports +parse_parenthesizable_whitespace = mod.parse_parenthesizable_whitespace diff --git a/libcst_native/Cargo.lock b/libcst_native/Cargo.lock new file mode 100644 index 000000000..ad7168ac9 --- /dev/null +++ b/libcst_native/Cargo.lock @@ -0,0 +1,312 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +[[package]] +name = "aho-corasick" +version = "0.7.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7404febffaa47dac81aa44dba71523c9d069b1bdc50a77db41195149e17f68e5" +dependencies = [ + "memchr", +] + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "ctor" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "10bcb9d7dcbf7002aaffbb53eac22906b64cdcc127971dcc387d8eb7c95d5560" +dependencies = [ + "quote", + "syn", +] + +[[package]] +name = "ghost" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a5bcf1bbeab73aa4cf2fde60a846858dc036163c7c33bec309f8d17de785479" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "indoc" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5a75aeaaef0ce18b58056d306c27b07436fbb34b8816c53094b76dd81803136" +dependencies = [ + "unindent", +] + +[[package]] +name = "instant" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61124eeebbd69b8190558df225adf7e4caafce0d743919e5d6b19652314ec5ec" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "inventory" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f0f7efb804ec95e33db9ad49e4252f049e37e8b0a4652e3cd61f7999f2eff7f" +dependencies = [ + "ctor", + "ghost", + "inventory-impl", +] + +[[package]] +name = "inventory-impl" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75c094e94816723ab936484666968f5b58060492e880f3c8d00489a1e244fa51" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "libc" +version = "0.2.82" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89203f3fba0a3795506acaad8ebce3c80c0af93f994d5a1d7a0b1eeb23271929" + +[[package]] +name = "libcst_native" +version = "0.1.0" +dependencies = [ + "once_cell", + "paste", + "pyo3", + "regex", + "test-case", +] + +[[package]] +name = "lock_api" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd96ffd135b2fd7b973ac026d28085defbe8983df057ced3eb4f2130b0831312" +dependencies = [ + "scopeguard", +] + +[[package]] +name = "memchr" +version = "2.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ee1c47aaa256ecabcaea351eae4a9b01ef39ed810004e298d2511ed284b1525" + +[[package]] +name = "once_cell" +version = "1.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13bd41f508810a131401606d54ac32a467c97172d74ba7662562ebba5ad07fa0" + +[[package]] +name = "parking_lot" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d7744ac029df22dca6284efe4e898991d28e3085c706c972bcd7da4a27a15eb" +dependencies = [ + "instant", + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ccb628cad4f84851442432c60ad8e1f607e29752d0bf072cbd0baf28aa34272" +dependencies = [ + "cfg-if", + "instant", + "libc", + "redox_syscall", + "smallvec", + "winapi", +] + +[[package]] +name = "paste" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c5d65c4d95931acda4498f675e332fcbdc9a06705cd07086c510e9b6009cd1c1" + +[[package]] +name = "proc-macro2" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e0704ee1a7e00d7bb417d0770ea303c1bccbabf0ef1667dae92b5967f5f8a71" +dependencies = [ + "unicode-xid", +] + +[[package]] +name = "pyo3" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "00ca634cf3acd58a599b535ed6cb188223298977d471d146121792bfa23b754c" +dependencies = [ + "cfg-if", + "ctor", + "indoc", + "inventory", + "libc", + "parking_lot", + "paste", + "pyo3-macros", + "unindent", +] + +[[package]] +name = "pyo3-macros" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "483ac516dbda6789a5b4be0271e7a31b9ad4ec8c0a5955050e8076f72bdbef8f" +dependencies = [ + "pyo3-macros-backend", + "quote", + "syn", +] + +[[package]] +name = "pyo3-macros-backend" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15230cabcda008f03565ed8bac40f094cbb5ee1b46e6551f1ec3a0e922cf7df9" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "quote" +version = "1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "991431c3519a3f36861882da93630ce66b52918dcf1b8e2fd66b397fc96f28df" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "redox_syscall" +version = "0.1.57" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41cc0f7e4d5d4544e8861606a285bb08d3e70712ccc7d2b84d7c0ccfaf4b05ce" + +[[package]] +name = "regex" +version = "1.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9251239e129e16308e70d853559389de218ac275b515068abc96829d05b948a" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", + "thread_local", +] + +[[package]] +name = "regex-syntax" +version = "0.6.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5eb417147ba9860a96cfe72a0b93bf88fee1744b5636ec99ab20c1aa9376581" + +[[package]] +name = "scopeguard" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" + +[[package]] +name = "smallvec" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe0f37c9e8f3c5a4a66ad655a93c74daac4ad00c441533bf5c6e7990bb42604e" + +[[package]] +name = "syn" +version = "1.0.59" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07cb8b1b4ebf86a89ee88cbd201b022b94138c623644d035185c84d3f41b7e66" +dependencies = [ + "proc-macro2", + "quote", + "unicode-xid", +] + +[[package]] +name = "test-case" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "956044ef122917dde830c19dec5f76d0670329fde4104836d62ebcb14f4865f1" +dependencies = [ + "cfg-if", + "proc-macro2", + "quote", + "syn", + "version_check", +] + +[[package]] +name = "thread_local" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "301bdd13d23c49672926be451130892d274d3ba0b410c18e00daa7990ff38d99" +dependencies = [ + "once_cell", +] + +[[package]] +name = "unicode-xid" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7fe0bb3479651439c9112f72b6c505038574c9fbb575ed1bf3b797fa39dd564" + +[[package]] +name = "unindent" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f14ee04d9415b52b3aeab06258a3f07093182b88ba0f9b8d203f211a7a7d41c7" + +[[package]] +name = "version_check" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5a972e5669d67ba988ce3dc826706fb0a8b01471c088cb0b6110b805cc36aed" + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" diff --git a/libcst_native/Cargo.toml b/libcst_native/Cargo.toml new file mode 100644 index 000000000..ae517537d --- /dev/null +++ b/libcst_native/Cargo.toml @@ -0,0 +1,31 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +[package] +name = "libcst_native" +version = "0.1.0" +authors = ["LibCST Developers"] +edition = "2018" + +[lib] +name = "libcst_native" +crate-type = ["cdylib", "rlib"] + +[features] +# This is a bit of a hack, since `cargo test` doesn't work with `extension-module`. +# To run tests, use `cargo test --no-default-features`. +# +# Once https://github.com/PyO3/pyo3/pull/1123 lands, it may be better to use +# `-Zextra-link-arg` for this instead. +default = ["pyo3/extension-module"] + +[dependencies] +once_cell = "1.5.2" +paste = "1.0.4" +pyo3 = "0.13.0" +regex = "1.4.3" + +[dev-dependencies] +test-case = "1.1.0" diff --git a/libcst_native/README.md b/libcst_native/README.md new file mode 100644 index 000000000..f33563b2e --- /dev/null +++ b/libcst_native/README.md @@ -0,0 +1,66 @@ +# libcst_native + +A very experimental native extension to speed up LibCST. This does not currently provide +much performance benefit and is therefore not recommended for general use. + +The extension is written in Rust using [PyO3](https://pyo3.rs/). + +This installs as a separate python package that LibCST looks for and will import if it's +available. + + +## Using with LibCST + +[Set up a rust development environment](https://www.rust-lang.org/tools/install). Using +`rustup` is recommended, but not necessary. Rust 1.45.0+ should work. + +Follow the instructions for setting up a virtualenv in the top-level README, then: + +``` +cd libcst_native +maturin develop # install libcst_native to the virtualenv +cd .. # cd back into the main project +python -m unittest +``` + +This will run the python test suite. Nothing special is required to use `libcst_native`, +since `libcst` will automatically use the native extension when it's installed. + +When benchmarking this code, make sure to run `maturin develop` with the `--release` +flag to enable compiler optimizations. + +You can disable the native extension by uninstalling the package from your virtualenv: + +``` +pip uninstall libcst_native +``` + + +## Rust Tests + +In addition to running the python test suite, you can run some tests written in rust +with + +``` +cargo test --no-default-features +``` + +The `--no-default-features` flag needed to work around an incompatibility between tests +and pyo3's `extension-module` feature. + + +## Code Formatting + +Use `cargo fmt` to format your code. + + +## Release + +This isn't currently supported, so there's no releases available, but the end-goal would +be to publish this on PyPI. + +Because this is a native extension, it must be re-built for each platform/architecture. +The per-platform build could be automated using a CI system, [like github +actions][gh-actions]. + +[gh-actions]: https://github.com/PyO3/maturin/blob/master/.github/workflows/release.yml diff --git a/libcst_native/src/lib.rs b/libcst_native/src/lib.rs new file mode 100644 index 000000000..6a7a01d51 --- /dev/null +++ b/libcst_native/src/lib.rs @@ -0,0 +1,36 @@ +// Copyright (c) Facebook, Inc. and its affiliates. +// +// This source code is licensed under the MIT license found in the +// LICENSE file in the root directory of this source tree. + +#[macro_use] +mod macros; + +// Our submodules are all pub so that they're shown in `cargo doc`'s output. +// This crate isn't intended to be used from within rust, and won't be published to `crates.io`. +pub mod parser_config; +pub mod py_cached; +pub mod whitespace_parser; +pub mod whitespace_state; + +#[cfg(any(test, doc))] +pub mod test_utils; + +use pyo3::prelude::*; + +#[pymodule] +fn libcst_native(py: Python, m: &PyModule) -> PyResult<()> { + let parser_config_mod = PyModule::new(py, "parser_config")?; + parser_config::init_module(py, parser_config_mod)?; + m.add_submodule(parser_config_mod)?; + + let whitespace_state_mod = PyModule::new(py, "whitespace_state")?; + whitespace_state::init_module(py, whitespace_state_mod)?; + m.add_submodule(whitespace_state_mod)?; + + let whitespace_parser_mod = PyModule::new(py, "whitespace_parser")?; + whitespace_parser::init_module(py, whitespace_parser_mod)?; + m.add_submodule(whitespace_parser_mod)?; + + Ok(()) +} diff --git a/libcst_native/src/macros.rs b/libcst_native/src/macros.rs new file mode 100644 index 000000000..1c47e3fad --- /dev/null +++ b/libcst_native/src/macros.rs @@ -0,0 +1,33 @@ +// Copyright (c) Facebook, Inc. and its affiliates. +// +// This source code is licensed under the MIT license found in the +// LICENSE file in the root directory of this source tree. + +/// Generates a function that lazily imports and caches a module's member. This will hold a +/// permanent reference to the imported member. Python's module cache is rarely purged though, so +/// it typically won't matter. +/// +/// This cache is cheaper than looking up the module in python's module cache inspecting the +/// module's `__dict__` each time you want access to the member. +/// +/// If you have multiple imports from the same module, we'll call `py.import` once for each member +/// of the module. +#[macro_export] +macro_rules! py_import { + ( $module_name:expr, $member_name:expr, $getter_fn:ident ) => { + paste::paste! { + static [] + : pyo3::once_cell::GILOnceCell> + = pyo3::once_cell::GILOnceCell::new(); + + fn $getter_fn<'py>(py: pyo3::Python<'py>) -> pyo3::PyResult<&'py pyo3::PyAny> { + Ok([].get_or_init(py, || { + Ok(py.import($module_name)?.get($member_name)?.to_object(py)) + }) + .as_ref() + .map_err(|err| err.clone_ref(py))? + .as_ref(py)) + } + } + }; +} diff --git a/libcst_native/src/parser_config.rs b/libcst_native/src/parser_config.rs new file mode 100644 index 000000000..93fe24742 --- /dev/null +++ b/libcst_native/src/parser_config.rs @@ -0,0 +1,137 @@ +// Copyright (c) Facebook, Inc. and its affiliates. +// +// This source code is licensed under the MIT license found in the +// LICENSE file in the root directory of this source tree. + +use pyo3::exceptions::PyIndexError; +use pyo3::prelude::*; +use pyo3::types::{IntoPyDict, PyDict, PySequence, PyString}; +use pyo3::wrap_pyfunction; + +use crate::py_cached::PyCached; + +#[pyclass(subclass, module = "libcst_native.parser_config")] +#[text_signature = "(*, lines, default_newline)"] +pub struct BaseWhitespaceParserConfig { + pub lines: PyCached>, + pub default_newline: PyCached, +} + +#[pymethods] +impl BaseWhitespaceParserConfig { + #[new] + fn new(lines: &PySequence, default_newline: &PyString) -> PyResult { + // These fields will get initialized when ParserConfig.__init__ (our subclass) runs + Ok(Self { + lines: lines.extract()?, + default_newline: default_newline.extract()?, + }) + } + + #[getter] + fn get_lines(&self, py: Python) -> PyObject { + self.lines.to_object(py) + } + + #[getter] + fn get_default_newline(&self, py: Python) -> PyObject { + self.default_newline.to_object(py) + } +} + +impl BaseWhitespaceParserConfig { + /// Equivalent to `config.lines.unwrap()[line_number - 1]`, but it return a PyErr when we get + /// an index that's out of range, instead of panicing. + pub fn get_line(&self, line_number: usize) -> PyResult<&str> { + let err_fn = + || PyIndexError::new_err(format!("line number of {} is out of range", line_number)); + self.lines + .get(line_number.checked_sub(1).ok_or_else(err_fn)?) + .map(|l| &l[..]) + .ok_or_else(err_fn) + } + + /// Equivalent to `config.get_line(line_number)[column_index..]`, but it return a PyErr when + /// we get an column index that's out of range, instead of panicing. + pub fn get_line_after_column(&self, line_number: usize, column_index: usize) -> PyResult<&str> { + self.get_line(line_number)? + .get(column_index..) + .ok_or_else(|| { + PyIndexError::new_err(format!("column index of {} is out of range", column_index)) + }) + } +} + +// These fields are private and PyObject, since we don't currently care about using them from +// within rust. +#[pyclass(extends=BaseWhitespaceParserConfig, module="libcst_native.parser_config")] +#[text_signature = "(*, lines, encoding, default_indent, default_newline, has_trailing_newline, version, future_imports)"] +pub struct ParserConfig { + // lines is inherited + #[pyo3(get)] + encoding: PyObject, + #[pyo3(get)] + default_indent: PyObject, + // default_newline is inherited + #[pyo3(get)] + has_trailing_newline: PyObject, + #[pyo3(get)] + version: PyObject, + #[pyo3(get)] + future_imports: PyObject, +} + +#[pymethods] +impl ParserConfig { + #[new] + fn new( + lines: &PySequence, + encoding: PyObject, + default_indent: PyObject, + default_newline: &PyString, + has_trailing_newline: PyObject, + version: PyObject, + future_imports: PyObject, + ) -> PyResult<(Self, BaseWhitespaceParserConfig)> { + Ok(( + Self { + encoding, + default_indent, + has_trailing_newline, + version, + future_imports, + }, + BaseWhitespaceParserConfig::new(lines, default_newline)?, + )) + } +} + +/// An internal helper function used by python unit tests to compare configs. +#[pyfunction] +fn parser_config_asdict<'py>(py: Python<'py>, config: PyRef<'py, ParserConfig>) -> &'py PyDict { + let super_config: &BaseWhitespaceParserConfig = config.as_ref(); + vec![ + ("lines", super_config.lines.to_object(py)), + ("encoding", config.encoding.clone_ref(py)), + ("default_indent", config.default_indent.clone_ref(py)), + ( + "default_newline", + super_config.default_newline.to_object(py), + ), + ( + "has_trailing_newline", + config.has_trailing_newline.clone_ref(py), + ), + ("version", config.version.clone_ref(py)), + ("future_imports", config.future_imports.clone_ref(py)), + ] + .into_py_dict(py) +} + +pub fn init_module(_py: Python, m: &PyModule) -> PyResult<()> { + m.add_class::()?; + m.add_class::()?; + m.add_function(wrap_pyfunction!(parser_config_asdict, m)?) + .unwrap(); + Ok(()) +} diff --git a/libcst_native/src/py_cached.rs b/libcst_native/src/py_cached.rs new file mode 100644 index 000000000..e8a4dfd4a --- /dev/null +++ b/libcst_native/src/py_cached.rs @@ -0,0 +1,76 @@ +// Copyright (c) Facebook, Inc. and its affiliates. +// +// This source code is licensed under the MIT license found in the +// LICENSE file in the root directory of this source tree. + +use pyo3::prelude::*; +use std::convert::AsRef; +use std::ops::Deref; + +/// An immutable wrapper around a rust type T and it's PyObject equivalent. Caches the conversion +/// to and from the PyObject. +pub struct PyCached { + native: T, + py_object: PyObject, +} + +impl PyCached +where + T: ToPyObject, +{ + pub fn new(py: Python, native: T) -> Self { + Self { + py_object: native.to_object(py), + native, + } + } +} + +impl<'source, T> FromPyObject<'source> for PyCached +where + T: FromPyObject<'source>, +{ + fn extract(ob: &'source PyAny) -> PyResult { + Python::with_gil(|py| { + Ok(PyCached { + native: ob.extract()?, + py_object: ob.to_object(py), + }) + }) + } +} + +impl IntoPy for PyCached { + fn into_py(self, _py: Python) -> PyObject { + self.py_object + } +} + +impl ToPyObject for PyCached { + fn to_object(&self, py: Python) -> PyObject { + self.py_object.clone_ref(py) + } +} + +impl AsRef for PyCached { + fn as_ref(&self) -> &T { + &self.native + } +} + +impl Deref for PyCached { + type Target = T; + + fn deref(&self) -> &Self::Target { + &self.native + } +} + +impl From for PyCached +where + T: ToPyObject, +{ + fn from(val: T) -> Self { + Python::with_gil(|py| Self::new(py, val)) + } +} diff --git a/libcst_native/src/test_utils.rs b/libcst_native/src/test_utils.rs new file mode 100644 index 000000000..6a462c8df --- /dev/null +++ b/libcst_native/src/test_utils.rs @@ -0,0 +1,42 @@ +// Copyright (c) Facebook, Inc. and its affiliates. +// +// This source code is licensed under the MIT license found in the +// LICENSE file in the root directory of this source tree. + +use pyo3::prelude::*; + +py_import!("libcst._nodes.deep_equals", "deep_equals", get_deep_equals); + +pub fn repr_or_panic(py: Python, value: T) -> String +where + T: ToPyObject, +{ + value + .to_object(py) + .as_ref(py) + .repr() + .expect("failed to call repr") + .extract() + .expect("repr should've returned str") +} + +pub fn py_assert_deep_equals(py: Python, left: L, right: R) +where + L: ToPyObject, + R: ToPyObject, +{ + let (left, right) = (left.to_object(py), right.to_object(py)); + let equals = get_deep_equals(py) + .expect("failed to import deep_equals") + .call1((&left, &right)) + .expect("failed to call deep_equals") + .extract::() + .expect("deep_equals should return a bool"); + if !equals { + panic!( + "assertion failed: {} was not deeply equal to {}", + repr_or_panic(py, &left), + repr_or_panic(py, &right), + ); + } +} diff --git a/libcst_native/src/whitespace_parser.rs b/libcst_native/src/whitespace_parser.rs new file mode 100644 index 000000000..22579bd3e --- /dev/null +++ b/libcst_native/src/whitespace_parser.rs @@ -0,0 +1,745 @@ +// Copyright (c) Facebook, Inc. and its affiliates. +// +// This source code is licensed under the MIT license found in the +// LICENSE file in the root directory of this source tree. + +/// Parso doesn't attempt to parse (or even emit tokens for) whitespace or comments that aren't +/// syntatically important. Instead, we're just given the whitespace as a "prefix" of the token. +/// +/// However, in our CST, whitespace is gathered into far more detailed objects than a simple str. +/// +/// Fortunately this isn't hard for us to parse ourselves, so we just use our own hand-rolled +/// recursive descent parser. +use once_cell::sync::Lazy; +use pyo3::exceptions::PyException; +use pyo3::prelude::*; +use pyo3::types::PyTuple; +use pyo3::wrap_pyfunction; +use regex::Regex; + +use crate::parser_config::BaseWhitespaceParserConfig as Config; +use crate::whitespace_state::WhitespaceState as State; + +static SIMPLE_WHITESPACE_RE: Lazy = + Lazy::new(|| Regex::new(r"\A([ \f\t]|\\(\r\n?|\n))*").expect("regex")); +static NEWLINE_RE: Lazy = Lazy::new(|| Regex::new(r"\A(\r\n?|\n)").expect("regex")); +static COMMENT_RE: Lazy = Lazy::new(|| Regex::new(r"\A#[^\r\n]*").expect("regex")); + +py_import!( + "libcst._nodes.whitespace", + "SimpleWhitespace", + get_simple_whitespace_cls +); +py_import!("libcst._nodes.whitespace", "EmptyLine", get_empty_line_cls); +py_import!("libcst._nodes.whitespace", "Comment", get_comment_cls); +py_import!("libcst._nodes.whitespace", "Newline", get_newline_cls); +py_import!( + "libcst._nodes.whitespace", + "TrailingWhitespace", + get_trailing_whitespace_cls +); +py_import!( + "libcst._nodes.whitespace", + "ParenthesizedWhitespace", + get_parenthesized_whitespace_cls +); + +fn new_simple_whitespace<'py>(py: Python<'py>, value: &str) -> PyResult<&'py PyAny> { + Ok(get_simple_whitespace_cls(py)?.call1((value,))?) +} + +fn new_empty_line<'py>( + py: Python<'py>, + indent: bool, + whitespace: &'py PyAny, + comment: Option<&'py PyAny>, + newline: &'py PyAny, +) -> PyResult<&'py PyAny> { + Ok(get_empty_line_cls(py)?.call1((indent, whitespace, comment, newline))?) +} + +fn new_comment<'py>(py: Python<'py>, value: &str) -> PyResult<&'py PyAny> { + Ok(get_comment_cls(py)?.call1((value,))?) +} + +fn new_newline<'py>(py: Python<'py>, value: Option<&str>) -> PyResult<&'py PyAny> { + Ok(get_newline_cls(py)?.call1((value,))?) +} + +fn new_trailing_whitespace<'py>( + py: Python<'py>, + whitespace: &'py PyAny, + comment: Option<&'py PyAny>, + newline: &'py PyAny, +) -> PyResult<&'py PyAny> { + Ok(get_trailing_whitespace_cls(py)?.call1((whitespace, comment, newline))?) +} + +fn new_parenthesized_whitespace<'py>( + py: Python<'py>, + first_line: &'py PyAny, + empty_lines: Vec<&'py PyAny>, + indent: bool, + last_line: &'py PyAny, +) -> PyResult<&'py PyAny> { + Ok(get_parenthesized_whitespace_cls(py)?.call1(( + first_line, + PyTuple::new(py, empty_lines), + indent, + last_line, + ))?) +} + +// BEGIN PARSER ENTRYPOINTS + +#[pyfunction] +pub fn parse_simple_whitespace<'py>( + py: Python<'py>, + config: &Config, + state: &mut State, +) -> PyResult<&'py PyAny> { + let capture_ws = |line, column| -> PyResult<&str> { + Ok(SIMPLE_WHITESPACE_RE + .find(config.get_line_after_column(line, column)?) + .expect("SIMPLE_WHITESPACE_RE supports 0-length matches, so it must always match") + .as_str()) + }; + let mut prev_line = capture_ws(state.line, state.column)?; + let mut ws = prev_line.to_string(); + while prev_line.contains('\\') { + // continuation character + state.line += 1; + state.column = 0; + prev_line = capture_ws(state.line, state.column)?; + ws.push_str(prev_line); + } + state.column += prev_line.len(); + new_simple_whitespace(py, &ws[..]) +} + +#[pyfunction] +#[text_signature = "(config, state, *, override_absolute_indent)"] +pub fn parse_empty_lines<'py>( + py: Python<'py>, + config: &Config, + state: &mut State, + override_absolute_indent: Option<&str>, +) -> PyResult<&'py PyTuple> { + // If override_absolute_indent is Some, then we need to parse all lines up to and including the + // last line that is indented at our level. These all belong to the footer and not to the next + // line's leading_lines. + // + // We don't know what the last line with indent=True is, and there could be indent=False lines + // interspersed with indent=True lines, so we need to speculatively parse all possible empty + // lines, and then unwind to find the last empty line with indent=True. + let mut speculative_state = state.clone(); + let mut lines = Vec::new(); + while let Some(empty_line) = + parse_empty_line(py, config, &mut speculative_state, override_absolute_indent)? + { + lines.push((speculative_state.clone(), empty_line)); + } + + if override_absolute_indent.is_some() { + // Remove elements from the end until we find an indented line. + while let Some((_, empty_line)) = lines.last() { + if empty_line.getattr("indent")?.is_true()? { + break; + } + lines.pop(); + } + } + + if let Some((final_state, _)) = lines.last() { + // update the state to match the last line that we captured + *state = final_state.clone(); + } + + Ok(PyTuple::new( + py, + lines.iter().map(|(_, empty_line)| empty_line), + )) +} + +#[pyfunction] +pub fn parse_trailing_whitespace<'py>( + py: Python<'py>, + config: &Config, + state: &mut State, +) -> PyResult<&'py PyAny> { + if let Some(trailing_whitespace) = parse_optional_trailing_whitespace(py, config, state)? { + Ok(trailing_whitespace) + } else { + Err(PyException::new_err(concat!( + "Internal Error: Failed to parse TrailingWhitespace. This should never ", + "happen because a TrailingWhitespace is never optional in the grammar, ", + "so this error should've been caught by parso first.", + ))) + } +} + +#[pyfunction] +pub fn parse_parenthesizable_whitespace<'py>( + py: Python<'py>, + config: &Config, + state: &mut State, +) -> PyResult<&'py PyAny> { + if state.is_parenthesized { + // First, try parenthesized (don't need speculation because it either parses or doesn't + // modify state). + if let Some(parenthesized_whitespace) = parse_parenthesized_whitespace(py, config, state)? { + return Ok(parenthesized_whitespace); + } + } + // It's not parenthesized, or ParenthesizedWhitespace didn't parse. Just parse and return a + // SimpleWhitespace. + parse_simple_whitespace(py, config, state) +} + +// END PARSER ENTRYPOINTS +// BEGIN PARSER INTERNAL PRODUCTIONS + +pub fn parse_empty_line<'py>( + py: Python<'py>, + config: &Config, + state: &mut State, + override_absolute_indent: Option<&str>, +) -> PyResult> { + // begin speculative parsing + let mut speculative_state = state.clone(); + if let Ok(indent) = parse_indent(py, config, &mut speculative_state, override_absolute_indent) { + let whitespace = parse_simple_whitespace(py, config, &mut speculative_state)?; + let comment = parse_comment(py, config, &mut speculative_state)?; + if let Some(newline) = parse_newline(py, config, &mut speculative_state)? { + // speculative parsing succeeded + *state = speculative_state; + Ok(Some(new_empty_line( + py, indent, whitespace, comment, newline, + )?)) + } else { + // no newline found, speculative parsing failed + Ok(None) + } + } else { + // we aren't on a new line, speculative parsing failed + // TODO: Don't rely on a python exception for this, use a rust error type + Ok(None) + } +} + +/// Returns true if indentation was found, otherwise False. +pub fn parse_indent( + _py: Python, + config: &Config, + state: &mut State, + override_absolute_indent: Option<&str>, +) -> PyResult { + let absolute_indent = override_absolute_indent.unwrap_or(&state.absolute_indent[..]); + if state.column != 0 { + if state.column == config.get_line(state.line)?.len() && state.line == config.lines.len() { + // we're at EOF, treat this as a failed speculative parse + Ok(false) + } else { + Err(PyException::new_err( + "Internal Error: Column should not be 0 when parsing an indent", + )) + } + } else if config + .get_line_after_column(state.line, state.column)? + .starts_with(absolute_indent) + { + state.column += absolute_indent.len(); + Ok(true) + } else { + Ok(false) + } +} + +pub fn parse_comment<'py>( + py: Python<'py>, + config: &Config, + state: &mut State, +) -> PyResult> { + if let Some(comment_match) = + COMMENT_RE.find(config.get_line_after_column(state.line, state.column)?) + { + let comment_str = comment_match.as_str(); + state.column += comment_str.len(); + Ok(Some(new_comment(py, comment_str)?)) + } else { + Ok(None) + } +} + +pub fn parse_newline<'py>( + py: Python<'py>, + config: &Config, + state: &mut State, +) -> PyResult> { + // begin speculative parsing + if let Some(newline_match) = + NEWLINE_RE.find(config.get_line_after_column(state.line, state.column)?) + { + // speculative parsing succeeded + let newline_str = newline_match.as_str(); + state.column += newline_str.len(); + if state.column != config.get_line(state.line)?.len() { + return Err(PyException::new_err( + "Internal Error: Found a newline, but it wasn't the EOL.", + )); + } + if state.line < config.lines.len() { + state.line += 1; + state.column = 0; + } + if newline_str == config.default_newline.as_ref() { + Ok(Some(new_newline(py, None)?)) + } else { + Ok(Some(new_newline(py, Some(newline_str))?)) + } + } else { + // no newline was found, speculative parsing failed + Ok(None) + } +} + +pub fn parse_optional_trailing_whitespace<'py>( + py: Python<'py>, + config: &Config, + state: &mut State, +) -> PyResult> { + // begin speculative parsing + let mut speculative_state = state.clone(); + let whitespace = parse_simple_whitespace(py, config, &mut speculative_state)?; + let comment = parse_comment(py, config, &mut speculative_state)?; + if let Some(newline) = parse_newline(py, config, &mut speculative_state)? { + // speculative parsing succeeded + *state = speculative_state; + Ok(Some(new_trailing_whitespace( + py, whitespace, comment, newline, + )?)) + } else { + // speculative parsing failed + Ok(None) + } +} + +pub fn parse_parenthesized_whitespace<'py>( + py: Python<'py>, + config: &Config, + state: &mut State, +) -> PyResult> { + if let Some(first_line) = parse_optional_trailing_whitespace(py, config, state)? { + let mut empty_lines = Vec::new(); + while let Some(empty_line) = parse_empty_line(py, config, state, None)? { + empty_lines.push(empty_line); + } + let indent = parse_indent(py, config, state, None)?; + let last_line = parse_simple_whitespace(py, config, state)?; + Ok(Some(new_parenthesized_whitespace( + py, + first_line, + empty_lines, + indent, + last_line, + )?)) + } else { + Ok(None) + } +} + +pub fn init_module(_py: Python, m: &PyModule) -> PyResult<()> { + m.add_function(wrap_pyfunction!(parse_simple_whitespace, m)?) + .unwrap(); + m.add_function(wrap_pyfunction!(parse_empty_lines, m)?) + .unwrap(); + m.add_function(wrap_pyfunction!(parse_trailing_whitespace, m)?) + .unwrap(); + m.add_function(wrap_pyfunction!(parse_parenthesizable_whitespace, m)?) + .unwrap(); + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + + use crate::test_utils::py_assert_deep_equals; + + type ParseFn<'py> = dyn FnOnce(Python<'py>, &Config, &mut State) -> PyResult<&'py PyAny>; + + struct TestCase<'py, 't> { + parser: Box>, + // We could accept a Config instead of lines and default_newline, but Config is a + // little awkward to construct from `&str`, so we do it in `.test()`. + lines: Vec<&'t str>, + default_newline: &'t str, + start_state: State, + end_state: State, + expected_node: &'py PyAny, + } + + impl<'py, 't> TestCase<'py, 't> { + fn test(mut self, py: Python<'py>) { + let config = Config { + lines: self + .lines + .iter() + .map(|l| l.to_string()) + .collect::>() + .into(), + default_newline: self.default_newline.to_string().into(), + }; + let parsed_node = (self.parser)(py, &config, &mut self.start_state) + .unwrap() + .into_py(py); + py_assert_deep_equals(py, &parsed_node, &self.expected_node); + assert_eq!(&self.start_state, &self.end_state); + } + } + + mod simple_whitespace { + use super::*; + + #[test] + fn test_empty() { + Python::with_gil(|py| { + TestCase { + parser: Box::new(parse_simple_whitespace), + lines: vec!["not whitespace\n", " another line\n"], + default_newline: "\n", + start_state: State { + line: 1, + column: 0, + absolute_indent: "".to_string(), + is_parenthesized: false, + }, + end_state: State { + line: 1, + column: 0, + absolute_indent: "".to_string(), + is_parenthesized: false, + }, + expected_node: new_simple_whitespace(py, "").unwrap(), + } + .test(py) + }) + } + + #[test] + fn test_start_of_line() { + Python::with_gil(|py| { + TestCase { + parser: Box::new(parse_simple_whitespace), + lines: vec!["\t <-- There's some whitespace there\n"], + default_newline: "\n", + start_state: State { + line: 1, + column: 0, + absolute_indent: "".to_string(), + is_parenthesized: false, + }, + end_state: State { + line: 1, + column: 3, + absolute_indent: "".to_string(), + is_parenthesized: false, + }, + expected_node: new_simple_whitespace(py, "\t ").unwrap(), + } + .test(py) + }) + } + + #[test] + fn test_end_of_line() { + Python::with_gil(|py| { + TestCase { + parser: Box::new(parse_simple_whitespace), + lines: vec!["prefix "], + default_newline: "\n", + start_state: State { + line: 1, + column: 6, + absolute_indent: "".to_string(), + is_parenthesized: false, + }, + end_state: State { + line: 1, + column: 9, + absolute_indent: "".to_string(), + is_parenthesized: false, + }, + expected_node: new_simple_whitespace(py, " ").unwrap(), + } + .test(py) + }) + } + + #[test] + fn test_line_continuation() { + Python::with_gil(|py| { + TestCase { + parser: Box::new(parse_simple_whitespace), + lines: vec!["prefix \\\n", " \\\n", " # suffix\n"], + default_newline: "\n", + start_state: State { + line: 1, + column: 6, + absolute_indent: "".to_string(), + is_parenthesized: false, + }, + end_state: State { + line: 3, + column: 4, + absolute_indent: "".to_string(), + is_parenthesized: false, + }, + expected_node: new_simple_whitespace(py, " \\\n \\\n ").unwrap(), + } + .test(py) + }) + } + } + + mod empty_lines { + use super::*; + + fn parse_empty_lines_no_override<'py>( + py: Python<'py>, + config: &Config, + state: &mut State, + ) -> PyResult<&'py PyAny> { + parse_empty_lines(py, config, state, None).map(|lines| lines.into()) + } + + #[test] + fn test_empty_list() { + Python::with_gil(|py| { + TestCase { + parser: Box::new(parse_empty_lines_no_override), + lines: vec!["this is not an empty line"], + default_newline: "\n", + start_state: State { + line: 1, + column: 0, + absolute_indent: "".to_string(), + is_parenthesized: false, + }, + end_state: State { + line: 1, + column: 0, + absolute_indent: "".to_string(), + is_parenthesized: false, + }, + expected_node: PyTuple::new(py, Vec::<&PyAny>::new()), + } + .test(py) + }) + } + + #[test] + fn test_single_line() { + Python::with_gil(|py| { + TestCase { + parser: Box::new(parse_empty_lines_no_override), + lines: vec![" # comment\n", "this is not an empty line\n"], + default_newline: "\n", + start_state: State { + line: 1, + column: 0, + absolute_indent: " ".to_string(), + is_parenthesized: false, + }, + end_state: State { + line: 2, + column: 0, + absolute_indent: " ".to_string(), + is_parenthesized: false, + }, + expected_node: PyTuple::new( + py, + vec![new_empty_line( + py, + /* indent */ true, + /* whitespace */ new_simple_whitespace(py, "").unwrap(), + /* comment */ Some(new_comment(py, "# comment").unwrap()), + /* newline */ new_newline(py, None).unwrap(), + ) + .unwrap()], + ), + } + .test(py) + }) + } + + #[test] + fn test_multiple_lines() { + Python::with_gil(|py| { + TestCase { + parser: Box::new(parse_empty_lines_no_override), + lines: vec![ + "\n", + " \n", + " # comment with indent and whitespace\n", + "# comment without indent\n", + " # comment with no indent but some whitespace\n", + ], + default_newline: "\n", + start_state: State { + line: 1, + column: 0, + absolute_indent: " ".to_string(), + is_parenthesized: false, + }, + end_state: State { + line: 5, + column: 47, + absolute_indent: " ".to_string(), + is_parenthesized: false, + }, + expected_node: PyTuple::new( + py, + vec![ + new_empty_line( + py, + /* indent */ false, + new_simple_whitespace(py, "").unwrap(), + /* comment */ None, + new_newline(py, None).unwrap(), + ) + .unwrap(), + new_empty_line( + py, + /* indent */ true, + new_simple_whitespace(py, "").unwrap(), + /* comment */ None, + new_newline(py, None).unwrap(), + ) + .unwrap(), + new_empty_line( + py, + /* indent */ true, + new_simple_whitespace(py, " ").unwrap(), + Some( + new_comment(py, "# comment with indent and whitespace") + .unwrap(), + ), + new_newline(py, None).unwrap(), + ) + .unwrap(), + new_empty_line( + py, + /* indent */ false, + new_simple_whitespace(py, "").unwrap(), + Some(new_comment(py, "# comment without indent").unwrap()), + new_newline(py, None).unwrap(), + ) + .unwrap(), + new_empty_line( + py, + /* indent */ false, + new_simple_whitespace(py, " ").unwrap(), + Some( + new_comment(py, "# comment with no indent but some whitespace") + .unwrap(), + ), + new_newline(py, None).unwrap(), + ) + .unwrap(), + ], + ), + } + .test(py) + }) + } + + #[test] + fn test_non_default_newline() { + Python::with_gil(|py| { + TestCase { + parser: Box::new(parse_empty_lines_no_override), + lines: vec!["\n", "\r\n", "\r"], + default_newline: "\n", + start_state: State { + line: 1, + column: 0, + absolute_indent: "".to_string(), + is_parenthesized: false, + }, + end_state: State { + line: 3, + column: 1, + absolute_indent: "".to_string(), + is_parenthesized: false, + }, + expected_node: PyTuple::new( + py, + vec![ + new_empty_line( + py, + /* indent */ true, + new_simple_whitespace(py, "").unwrap(), + /* comment */ None, + new_newline(py, None).unwrap(), // default newline + ) + .unwrap(), + new_empty_line( + py, + /* indent */ true, + new_simple_whitespace(py, "").unwrap(), + /* comment */ None, + new_newline(py, Some("\r\n")).unwrap(), + ) + .unwrap(), + new_empty_line( + py, + /* indent */ true, + new_simple_whitespace(py, "").unwrap(), + /* comment */ None, + new_newline(py, Some("\r")).unwrap(), + ) + .unwrap(), + ], + ), + } + .test(py) + }) + } + } + + mod trailing_whitespace { + use super::*; + + #[test] + fn test_with_whitespace_and_comment() { + Python::with_gil(|py| { + TestCase { + parser: Box::new(parse_trailing_whitespace), + lines: vec!["some code # comment\n"], + default_newline: "\n", + start_state: State { + line: 1, + column: 9, + absolute_indent: "".to_string(), + is_parenthesized: false, + }, + end_state: State { + line: 1, + column: 21, + absolute_indent: "".to_string(), + is_parenthesized: false, + }, + expected_node: new_trailing_whitespace( + py, + /* whitespace */ new_simple_whitespace(py, " ").unwrap(), + /* comment */ Some(new_comment(py, "# comment").unwrap()), + /* newline */ new_newline(py, None).unwrap(), + ) + .unwrap(), + } + .test(py) + }) + } + } +} diff --git a/libcst_native/src/whitespace_state.rs b/libcst_native/src/whitespace_state.rs new file mode 100644 index 000000000..68d2f0233 --- /dev/null +++ b/libcst_native/src/whitespace_state.rs @@ -0,0 +1,80 @@ +// Copyright (c) Facebook, Inc. and its affiliates. +// +// This source code is licensed under the MIT license found in the +// LICENSE file in the root directory of this source tree. + +use pyo3::class::basic::CompareOp; +use pyo3::prelude::*; +use pyo3::PyObjectProtocol; + +#[pyclass(module = "libcst_native.whitespace_state")] +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct WhitespaceState { + #[pyo3(get, set)] + pub line: usize, // one-indexed (to match parso's behavior) + #[pyo3(get, set)] + pub column: usize, // zero-indexed (to match parso's behavior) + #[pyo3(get, set)] + pub absolute_indent: String, + #[pyo3(get, set)] + pub is_parenthesized: bool, +} + +impl Default for WhitespaceState { + fn default() -> Self { + Self { + line: 1, + column: 0, + absolute_indent: "".to_string(), + is_parenthesized: false, + } + } +} + +#[pymethods] +impl WhitespaceState { + #[new] + fn new(line: usize, column: usize, absolute_indent: String, is_parenthesized: bool) -> Self { + WhitespaceState { + line, + column, + absolute_indent, + is_parenthesized, + } + } +} + +#[pyproto] +impl PyObjectProtocol for WhitespaceState { + fn __repr__(&self) -> PyResult { + Python::with_gil(|py| { + Ok(format!( + "WhitespaceState({}, {}, {}, {})", + self.line, + self.column, + self.absolute_indent + .to_object(py) + .as_ref(py) + .repr()? + .to_str()?, + self.is_parenthesized + )) + }) + } + + // The python unit tests need to be able to compare WhitespaceState + fn __richcmp__(&self, other: PyRef, op: CompareOp) -> PyResult { + Python::with_gil(|py| { + Ok(match op { + CompareOp::Eq => (self == &*other).into_py(py), + CompareOp::Ne => (self != &*other).into_py(py), + _ => Python::NotImplemented(py), + }) + }) + } +} + +pub fn init_module(_py: Python, m: &PyModule) -> PyResult<()> { + m.add_class::()?; + Ok(()) +} diff --git a/requirements-dev.txt b/requirements-dev.txt index 2f86e9b3d..d44effd6f 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -8,6 +8,7 @@ hypothesmith>=0.0.4 git+https://github.com/jimmylai/sphinx.git@slots_type_annotation isort==5.5.3 jupyter>=1.0.0 +maturin>=0.8.3,<0.9 nbsphinx>=0.4.2 pyre-check==0.0.41 sphinx-rtd-theme>=0.4.3 diff --git a/setup.py b/setup.py index 4dd024dd5..39a4b587b 100644 --- a/setup.py +++ b/setup.py @@ -49,6 +49,7 @@ install_requires=[dep.strip() for dep in open("requirements.txt").readlines()], extras_require={ "dev": [dep.strip() for dep in open("requirements-dev.txt").readlines() if "=" in dep], + "native": ["libcst_native==0.1.0"], }, classifiers=[ "License :: OSI Approved :: MIT License", diff --git a/stubs/libcst_native/parser_config.pyi b/stubs/libcst_native/parser_config.pyi new file mode 100644 index 000000000..ac1cce21c --- /dev/null +++ b/stubs/libcst_native/parser_config.pyi @@ -0,0 +1,45 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from typing import Any, FrozenSet, Mapping, Sequence + +from libcst._parser.parso.utils import PythonVersionInfo + + +class BaseWhitespaceParserConfig: + def __new__( + cls, + *, + lines: Sequence[str], + default_newline: str, + ) -> BaseWhitespaceParserConfig: ... + + lines: Sequence[str] + default_newline: str + + +class ParserConfig(BaseWhitespaceParserConfig): + def __new__( + cls, + *, + lines: Sequence[str], + encoding: str, + default_indent: str, + default_newline: str, + has_trailing_newline: bool, + version: PythonVersionInfo, + future_imports: FrozenSet[str], + ) -> BaseWhitespaceParserConfig: ... + + # lines is inherited + encoding: str + default_indent: str + # default_newline is inherited + has_trailing_newline: bool + version: PythonVersionInfo + future_imports: FrozenSet[str] + + +def parser_config_asdict(config: ParserConfig) -> Mapping[str, Any]: ... diff --git a/stubs/libcst_native/whitespace_parser.pyi b/stubs/libcst_native/whitespace_parser.pyi new file mode 100644 index 000000000..1e8ebb253 --- /dev/null +++ b/stubs/libcst_native/whitespace_parser.pyi @@ -0,0 +1,29 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from typing import Optional, Sequence, Union + +from libcst._nodes.whitespace import ( + EmptyLine, + Newline, + ParenthesizedWhitespace, + SimpleWhitespace, + TrailingWhitespace, +) +from libcst._parser.types.config import BaseWhitespaceParserConfig as Config +from libcst._parser.types.whitespace_state import WhitespaceState as State + + +def parse_simple_whitespace(config: Config, state: State) -> SimpleWhitespace: ... +def parse_empty_lines( + config: Config, + state: State, + *, + override_absolute_indent: Optional[str] = None, +) -> Sequence[EmptyLine]: ... +def parse_trailing_whitespace(config: Config, state: State) -> TrailingWhitespace: ... +def parse_parenthesizable_whitespace( + config: Config, state: State +) -> Union[SimpleWhitespace, ParenthesizedWhitespace]: ... diff --git a/stubs/libcst_native/whitespace_state.pyi b/stubs/libcst_native/whitespace_state.pyi new file mode 100644 index 000000000..da43bd542 --- /dev/null +++ b/stubs/libcst_native/whitespace_state.pyi @@ -0,0 +1,15 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +class WhitespaceState: + def __new__( + cls, line: int, column: int, absolute_indent: str, is_parenthesized: bool + ) -> WhitespaceState: ... + + line: int # one-indexed (to match parso's behavior) + column: int # zero-indexed (to match parso's behavior) + # What to look for when executing `_parse_indent`. + absolute_indent: str + is_parenthesized: bool