Skip to content

Commit

Permalink
Parse triple quoted string annotations as if parenthesized (#15387)
Browse files Browse the repository at this point in the history
## Summary

Resolves #9467 

Parse quoted annotations as if the string content is inside parenthesis.
With this logic `x` and `y` in this example are equal:

```python
y: """
   int |
   str
"""

z: """(
    int |
    str
)
"""
```

Also this rule only applies to triple
quotes([link](python/typing-council#9 (comment))).

This PR is based on the
[comments](#9467 (comment))
on the issue.

I did one extra change, since we don't want any indentation tokens I am
setting the `State::Other` as the initial state of the Lexer.

Remaining work:

- [x] Add a test case for red-knot.
- [x] Add more tests.

## Test Plan

Added a test which previously failed because quoted annotation contained
indentation.
Added an mdtest for red-knot.
Updated previous test.

Co-authored-by: Dhruv Manilawala <dhruvmanila@gmail.com>
Co-authored-by: Micha Reiser <micha@reiser.io>
  • Loading branch information
3 people authored Jan 16, 2025
1 parent d2656e8 commit cf4ab7c
Show file tree
Hide file tree
Showing 10 changed files with 295 additions and 31 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -173,3 +173,40 @@ p: "call()"
r: "[1, 2]"
s: "(1, 2)"
```

## Multi line annotation

Quoted type annotations should be parsed as if surrounded by parentheses.

```py
def valid(
a1: """(
int |
str
)
""",
a2: """
int |
str
""",
):
reveal_type(a1) # revealed: int | str
reveal_type(a2) # revealed: int | str

def invalid(
# error: [invalid-syntax-in-forward-annotation]
a1: """
int |
str)
""",
# error: [invalid-syntax-in-forward-annotation]
a2: """
int) |
str
""",
# error: [invalid-syntax-in-forward-annotation]
a3: """
(int)) """,
):
pass
```
20 changes: 5 additions & 15 deletions crates/red_knot_python_semantic/src/types/string_annotation.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use ruff_db::source::source_text;
use ruff_python_ast::str::raw_contents;
use ruff_python_ast::{self as ast, ModExpression, StringFlags};
use ruff_python_parser::{parse_expression_range, Parsed};
use ruff_python_ast::{self as ast, ModExpression};
use ruff_python_parser::Parsed;
use ruff_text_size::Ranged;

use crate::declare_lint;
Expand Down Expand Up @@ -153,19 +153,9 @@ pub(crate) fn parse_string_annotation(
} else if raw_contents(node_text)
.is_some_and(|raw_contents| raw_contents == string_literal.as_str())
{
let range_excluding_quotes = string_literal
.range()
.add_start(string_literal.flags.opener_len())
.sub_end(string_literal.flags.closer_len());

// TODO: Support multiline strings like:
// ```py
// x: """
// int
// | float
// """ = 1
// ```
match parse_expression_range(source.as_str(), range_excluding_quotes) {
let parsed =
ruff_python_parser::parse_string_annotation(source.as_str(), string_literal);
match parsed {
Ok(parsed) => return Some(parsed),
Err(parse_error) => context.report_lint(
&INVALID_SYNTAX_IN_FORWARD_ANNOTATION,
Expand Down
31 changes: 31 additions & 0 deletions crates/ruff_linter/resources/test/fixtures/pyflakes/F722.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,34 @@ def g() -> "///":


X: """List[int]"""'☃' = []

# Type annotations with triple quotes can contain newlines and indentation
# https://github.com/python/typing-council/issues/9
y: """
int |
str
"""

z: """(
int |
str
)
"""

invalid1: """
int |
str)
"""

invalid2: """
int) |
str
"""
invalid3: """
((int)
"""
invalid4: """
(int
"""
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,6 @@ a: '''\\
list[int]''' = [42]


# TODO: These are valid too. String annotations are assumed to be enclosed in parentheses.
# https://github.com/astral-sh/ruff/issues/9467

def f(a: '''
list[int]
''' = []): ...
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,68 @@ F722.py:13:4: F722 Syntax error in forward annotation: `List[int]☃`
|
13 | X: """List[int]"""'' = []
| ^^^^^^^^^^^^^^^^^^ F722
14 |
15 | # Type annotations with triple quotes can contain newlines and indentation
|

F722.py:30:11: F722 Syntax error in forward annotation: `
int |
str)
`
|
28 | """
29 |
30 | invalid1: """
| ___________^
31 | | int |
32 | | str)
33 | | """
| |___^ F722
34 |
35 | invalid2: """
|

F722.py:35:11: F722 Syntax error in forward annotation: `
int) |
str
`
|
33 | """
34 |
35 | invalid2: """
| ___________^
36 | | int) |
37 | | str
38 | | """
| |___^ F722
39 | invalid3: """
40 | ((int)
|

F722.py:39:11: F722 Syntax error in forward annotation: `
((int)
`
|
37 | str
38 | """
39 | invalid3: """
| ___________^
40 | | ((int)
41 | | """
| |___^ F722
42 | invalid4: """
43 | (int
|

F722.py:42:11: F722 Syntax error in forward annotation: `
(int
`
|
40 | ((int)
41 | """
42 | invalid4: """
| ___________^
43 | | (int
44 | | """
| |___^ F722
|
Original file line number Diff line number Diff line change
Expand Up @@ -158,4 +158,75 @@ UP037_2.pyi:32:4: UP037 [*] Remove quotes from type annotation
33 |+list[int]) = [42]
34 34 |
35 35 |
36 36 | # TODO: These are valid too. String annotations are assumed to be enclosed in parentheses.
36 36 | def f(a: '''
UP037_2.pyi:36:10: UP037 [*] Remove quotes from type annotation
|
36 | def f(a: '''
| __________^
37 | | list[int]
38 | | ''' = []): ...
| |_______^ UP037
|
= help: Remove quotes

Safe fix
33 33 | list[int]''' = [42]
34 34 |
35 35 |
36 |-def f(a: '''
36 |+def f(a:
37 37 | list[int]
38 |- ''' = []): ...
38 |+ = []): ...
39 39 |
40 40 |
41 41 | def f(a: Foo['''
UP037_2.pyi:41:14: UP037 [*] Remove quotes from type annotation
|
41 | def f(a: Foo['''
| ______________^
42 | | Bar
43 | | [
44 | | Multi |
45 | | Line
46 | | ] # Comment''']): ...
| |___________________^ UP037
|
= help: Remove quotes

Safe fix
38 38 | ''' = []): ...
39 39 |
40 40 |
41 |-def f(a: Foo['''
41 |+def f(a: Foo[(
42 42 | Bar
43 43 | [
44 44 | Multi |
45 45 | Line
46 |- ] # Comment''']): ...
46 |+ ] # Comment
47 |+)]): ...
47 48 |
48 49 |
49 50 | a: '''list

UP037_2.pyi:49:4: UP037 [*] Remove quotes from type annotation
|
49 | a: '''list
| ____^
50 | | [int]''' = [42]
| |________^ UP037
|
= help: Remove quotes

Safe fix
46 46 | ] # Comment''']): ...
47 47 |
48 48 |
49 |-a: '''list
50 |-[int]''' = [42]
49 |+a: (list
50 |+[int]) = [42]
16 changes: 13 additions & 3 deletions crates/ruff_python_parser/src/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -84,15 +84,21 @@ impl<'src> Lexer<'src> {
"Lexer only supports files with a size up to 4GB"
);

let (state, nesting) = if mode == Mode::ParenthesizedExpression {
(State::Other, 1)
} else {
(State::AfterNewline, 0)
};

let mut lexer = Lexer {
source,
cursor: Cursor::new(source),
state: State::AfterNewline,
state,
current_kind: TokenKind::EndOfFile,
current_range: TextRange::empty(start_offset),
current_value: TokenValue::None,
current_flags: TokenFlags::empty(),
nesting: 0,
nesting,
indentations: Indentations::default(),
pending_indentation: None,
mode,
Expand Down Expand Up @@ -1309,7 +1315,11 @@ impl<'src> Lexer<'src> {
fn consume_end(&mut self) -> TokenKind {
// We reached end of file.
// First of all, we need all nestings to be finished.
if self.nesting > 0 {
// For Mode::ParenthesizedExpression we start with nesting level 1.
// So we check if we end with that level.
let init_nesting = u32::from(self.mode == Mode::ParenthesizedExpression);

if self.nesting > init_nesting {
// Reset the nesting to avoid going into infinite loop.
self.nesting = 0;
return self.push_error(LexicalError::new(LexicalErrorType::Eof, self.token_range()));
Expand Down
68 changes: 67 additions & 1 deletion crates/ruff_python_parser/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,9 @@ pub use crate::token::{Token, TokenKind};

use crate::parser::Parser;

use ruff_python_ast::{Expr, Mod, ModExpression, ModModule, PySourceType, Suite};
use ruff_python_ast::{
Expr, Mod, ModExpression, ModModule, PySourceType, StringFlags, StringLiteral, Suite,
};
use ruff_python_trivia::CommentRanges;
use ruff_text_size::{Ranged, TextRange, TextSize};

Expand Down Expand Up @@ -166,6 +168,65 @@ pub fn parse_expression_range(
.into_result()
}

/// Parses a Python expression as if it is parenthesized.
///
/// It behaves similarly to [`parse_expression_range`] but allows what would be valid within parenthesis
///
/// # Example
///
/// Parsing an expression that would be valid within parenthesis:
///
/// ```
/// use ruff_python_parser::parse_parenthesized_expression_range;
/// # use ruff_text_size::{TextRange, TextSize};
///
/// let parsed = parse_parenthesized_expression_range("'''\n int | str'''", TextRange::new(TextSize::new(3), TextSize::new(14)));
/// assert!(parsed.is_ok());
pub fn parse_parenthesized_expression_range(
source: &str,
range: TextRange,
) -> Result<Parsed<ModExpression>, ParseError> {
let source = &source[..range.end().to_usize()];
let parsed =
Parser::new_starts_at(source, Mode::ParenthesizedExpression, range.start()).parse();
parsed.try_into_expression().unwrap().into_result()
}

/// Parses a Python expression from a string annotation.
///
/// # Example
///
/// Parsing a string annotation:
///
/// ```
/// use ruff_python_parser::parse_string_annotation;
/// use ruff_python_ast::{StringLiteral, StringLiteralFlags};
/// use ruff_text_size::{TextRange, TextSize};
///
/// let string = StringLiteral {
/// value: "'''\n int | str'''".to_string().into_boxed_str(),
/// flags: StringLiteralFlags::default(),
/// range: TextRange::new(TextSize::new(0), TextSize::new(16)),
/// };
/// let parsed = parse_string_annotation("'''\n int | str'''", &string);
/// assert!(!parsed.is_ok());
/// ```
pub fn parse_string_annotation(
source: &str,
string: &StringLiteral,
) -> Result<Parsed<ModExpression>, ParseError> {
let range = string
.range()
.add_start(string.flags.opener_len())
.sub_end(string.flags.closer_len());
let source = &source[..range.end().to_usize()];
if string.flags.is_triple_quoted() {
parse_parenthesized_expression_range(source, range)
} else {
parse_expression_range(source, range)
}
}

/// Parse the given Python source code using the specified [`Mode`].
///
/// This function is the most general function to parse Python code. Based on the [`Mode`] supplied,
Expand Down Expand Up @@ -582,6 +643,11 @@ pub enum Mode {
/// The code consists of a single expression.
Expression,

/// The code consists of a single expression and is parsed as if it is parenthesized. The parentheses themselves aren't required.
/// This allows for having valid multiline expression without the need of parentheses
/// and is specifically useful for parsing string annotations.
ParenthesizedExpression,

/// The code consists of a sequence of statements which can include the
/// escape commands that are part of IPython syntax.
///
Expand Down
Loading

0 comments on commit cf4ab7c

Please sign in to comment.