Skip to content

Commit

Permalink
Move string-prefix enumerations to a separate submodule (#11425)
Browse files Browse the repository at this point in the history
## Summary

This moves the string-prefix enumerations in `ruff_python_ast` to a
separate submodule. I think this helps clarify that these prefixes are
purely abstract: they only depend on each other, and do not depend on
any of the other code in `nodes.rs` in any way. Moreover, while various
AST nodes _use_ them, they're not really nodes themselves, so they feel
slightly out of place in `nodes.rs`.

I considered moving all of them to `str.rs`, but it felt like enough
code that it could be a separate submodule.

## Test Plan

`cargo test`
  • Loading branch information
AlexWaygood authored May 15, 2024
1 parent effe3ad commit 6963f75
Show file tree
Hide file tree
Showing 7 changed files with 256 additions and 240 deletions.
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use ast::{StringLiteralFlags, StringLiteralPrefix};
use ruff_python_ast::{self as ast, Arguments, Expr};
use ruff_python_ast::{
self as ast, str_prefix::StringLiteralPrefix, Arguments, Expr, StringLiteralFlags,
};
use ruff_text_size::Ranged;

use crate::fix::snippet::SourceCodeSnippet;
Expand Down
1 change: 1 addition & 0 deletions crates/ruff_python_ast/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ pub mod relocate;
pub mod statement_visitor;
pub mod stmt_if;
pub mod str;
pub mod str_prefix;
pub mod traversal;
pub mod types;
pub mod visitor;
Expand Down
268 changes: 36 additions & 232 deletions crates/ruff_python_ast/src/nodes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,12 @@ use itertools::Itertools;

use ruff_text_size::{Ranged, TextLen, TextRange, TextSize};

use crate::{int, str::Quote, LiteralExpressionRef};
use crate::{
int,
str::Quote,
str_prefix::{AnyStringPrefix, ByteStringPrefix, FStringPrefix, StringLiteralPrefix},
LiteralExpressionRef,
};

/// See also [mod](https://docs.python.org/3/library/ast.html#ast.mod)
#[derive(Clone, Debug, PartialEq, is_macro::Is)]
Expand Down Expand Up @@ -1372,40 +1377,6 @@ bitflags! {
}
}

/// Enumeration of the valid prefixes an f-string literal can have.
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
pub enum FStringPrefix {
/// Just a regular f-string with no other prefixes, e.g. f"{bar}"
Regular,

/// A "raw" format-string, that has an `r` or `R` prefix,
/// e.g. `rf"{bar}"` or `Rf"{bar}"`
Raw { uppercase_r: bool },
}

impl FStringPrefix {
/// Return a `str` representation of the prefix
pub const fn as_str(self) -> &'static str {
match self {
Self::Regular => "f",
Self::Raw { uppercase_r: true } => "Rf",
Self::Raw { uppercase_r: false } => "rf",
}
}

/// Return true if this prefix indicates a "raw f-string",
/// e.g. `rf"{bar}"` or `Rf"{bar}"`
pub const fn is_raw(self) -> bool {
matches!(self, Self::Raw { .. })
}
}

impl fmt::Display for FStringPrefix {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(self.as_str())
}
}

/// Flags that can be queried to obtain information
/// regarding the prefixes and quotes used for an f-string.
#[derive(Default, Copy, Clone, Eq, PartialEq, Hash)]
Expand Down Expand Up @@ -1890,47 +1861,6 @@ impl fmt::Debug for StringLiteralFlags {
}
}

/// Enumerations of the valid prefixes a string literal can have.
///
/// Bytestrings and f-strings are excluded from this enumeration,
/// as they are represented by different AST nodes.
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash, is_macro::Is)]
pub enum StringLiteralPrefix {
/// Just a regular string with no prefixes
Empty,

/// A string with a `u` or `U` prefix, e.g. `u"foo"`.
/// Note that, despite this variant's name,
/// it is in fact a no-op at runtime to use the `u` or `U` prefix
/// in Python. All Python-3 strings are unicode strings;
/// this prefix is only allowed in Python 3 for backwards compatibility
/// with Python 2. However, using this prefix in a Python string
/// is mutually exclusive with an `r` or `R` prefix.
Unicode,

/// A "raw" string, that has an `r` or `R` prefix,
/// e.g. `r"foo\."` or `R'bar\d'`.
Raw { uppercase: bool },
}

impl StringLiteralPrefix {
/// Return a `str` representation of the prefix
pub const fn as_str(self) -> &'static str {
match self {
Self::Empty => "",
Self::Unicode => "u",
Self::Raw { uppercase: true } => "R",
Self::Raw { uppercase: false } => "r",
}
}
}

impl fmt::Display for StringLiteralPrefix {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(self.as_str())
}
}

/// An AST node that represents a single string literal which is part of an
/// [`ExprStringLiteral`].
#[derive(Clone, Debug, Default, PartialEq)]
Expand Down Expand Up @@ -2189,40 +2119,6 @@ bitflags! {
}
}

/// Enumeration of the valid prefixes a bytestring literal can have.
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
pub enum ByteStringPrefix {
/// Just a regular bytestring with no other prefixes, e.g. `b"foo"`
Regular,

/// A "raw" bytestring, that has an `r` or `R` prefix,
/// e.g. `Rb"foo"` or `rb"foo"`
Raw { uppercase_r: bool },
}

impl ByteStringPrefix {
/// Return a `str` representation of the prefix
pub const fn as_str(self) -> &'static str {
match self {
Self::Regular => "b",
Self::Raw { uppercase_r: true } => "Rb",
Self::Raw { uppercase_r: false } => "rb",
}
}

/// Return true if this prefix indicates a "raw bytestring",
/// e.g. `rb"foo"` or `Rb"foo"`
pub const fn is_raw(self) -> bool {
matches!(self, Self::Raw { .. })
}
}

impl fmt::Display for ByteStringPrefix {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(self.as_str())
}
}

/// Flags that can be queried to obtain information
/// regarding the prefixes and quotes used for a bytes literal.
#[derive(Default, Copy, Clone, Eq, PartialEq, Hash)]
Expand Down Expand Up @@ -2406,172 +2302,80 @@ bitflags! {
}
}

/// Enumeration of all the possible valid prefixes
/// prior to a Python string literal.
///
/// Using the `as_flags()` method on variants of this enum
/// is the recommended way to set `*_PREFIX` flags from the
/// `StringFlags` bitflag, as it means that you cannot accidentally
/// set a combination of `*_PREFIX` flags that would be invalid
/// at runtime in Python.
///
/// [String and Bytes literals]: https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals
/// [PEP 701]: https://peps.python.org/pep-0701/
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
pub enum AnyStringPrefix {
/// Prefixes that indicate the string is a bytestring
Bytes(ByteStringPrefix),

/// Prefixes that indicate the string is an f-string
Format(FStringPrefix),

/// All other prefixes
Regular(StringLiteralPrefix),
}

impl TryFrom<char> for AnyStringPrefix {
type Error = String;

fn try_from(value: char) -> Result<Self, String> {
let result = match value {
'r' => Self::Regular(StringLiteralPrefix::Raw { uppercase: false }),
'R' => Self::Regular(StringLiteralPrefix::Raw { uppercase: true }),
'u' | 'U' => Self::Regular(StringLiteralPrefix::Unicode),
'b' | 'B' => Self::Bytes(ByteStringPrefix::Regular),
'f' | 'F' => Self::Format(FStringPrefix::Regular),
_ => return Err(format!("Unexpected prefix '{value}'")),
};
Ok(result)
}
}

impl TryFrom<[char; 2]> for AnyStringPrefix {
type Error = String;

fn try_from(value: [char; 2]) -> Result<Self, String> {
let result = match value {
['r', 'f' | 'F'] | ['f' | 'F', 'r'] => {
Self::Format(FStringPrefix::Raw { uppercase_r: false })
}
['R', 'f' | 'F'] | ['f' | 'F', 'R'] => {
Self::Format(FStringPrefix::Raw { uppercase_r: true })
}
['r', 'b' | 'B'] | ['b' | 'B', 'r'] => {
Self::Bytes(ByteStringPrefix::Raw { uppercase_r: false })
}
['R', 'b' | 'B'] | ['b' | 'B', 'R'] => {
Self::Bytes(ByteStringPrefix::Raw { uppercase_r: true })
}
_ => return Err(format!("Unexpected prefix '{}{}'", value[0], value[1])),
};
Ok(result)
}
}
#[derive(Default, Clone, Copy, PartialEq, Eq, Hash)]
pub struct AnyStringFlags(AnyStringFlagsInner);

impl AnyStringPrefix {
const fn as_flags(self) -> AnyStringFlagsInner {
match self {
impl AnyStringFlags {
#[must_use]
pub fn with_prefix(mut self, prefix: AnyStringPrefix) -> Self {
self.0 |= match prefix {
// regular strings
Self::Regular(StringLiteralPrefix::Empty) => AnyStringFlagsInner::empty(),
Self::Regular(StringLiteralPrefix::Unicode) => AnyStringFlagsInner::U_PREFIX,
Self::Regular(StringLiteralPrefix::Raw { uppercase: false }) => {
AnyStringPrefix::Regular(StringLiteralPrefix::Empty) => AnyStringFlagsInner::empty(),
AnyStringPrefix::Regular(StringLiteralPrefix::Unicode) => AnyStringFlagsInner::U_PREFIX,
AnyStringPrefix::Regular(StringLiteralPrefix::Raw { uppercase: false }) => {
AnyStringFlagsInner::R_PREFIX_LOWER
}
Self::Regular(StringLiteralPrefix::Raw { uppercase: true }) => {
AnyStringPrefix::Regular(StringLiteralPrefix::Raw { uppercase: true }) => {
AnyStringFlagsInner::R_PREFIX_UPPER
}

// bytestrings
Self::Bytes(ByteStringPrefix::Regular) => AnyStringFlagsInner::B_PREFIX,
Self::Bytes(ByteStringPrefix::Raw { uppercase_r: false }) => {
AnyStringPrefix::Bytes(ByteStringPrefix::Regular) => AnyStringFlagsInner::B_PREFIX,
AnyStringPrefix::Bytes(ByteStringPrefix::Raw { uppercase_r: false }) => {
AnyStringFlagsInner::B_PREFIX.union(AnyStringFlagsInner::R_PREFIX_LOWER)
}
Self::Bytes(ByteStringPrefix::Raw { uppercase_r: true }) => {
AnyStringPrefix::Bytes(ByteStringPrefix::Raw { uppercase_r: true }) => {
AnyStringFlagsInner::B_PREFIX.union(AnyStringFlagsInner::R_PREFIX_UPPER)
}

// f-strings
Self::Format(FStringPrefix::Regular) => AnyStringFlagsInner::F_PREFIX,
Self::Format(FStringPrefix::Raw { uppercase_r: false }) => {
AnyStringPrefix::Format(FStringPrefix::Regular) => AnyStringFlagsInner::F_PREFIX,
AnyStringPrefix::Format(FStringPrefix::Raw { uppercase_r: false }) => {
AnyStringFlagsInner::F_PREFIX.union(AnyStringFlagsInner::R_PREFIX_LOWER)
}
Self::Format(FStringPrefix::Raw { uppercase_r: true }) => {
AnyStringPrefix::Format(FStringPrefix::Raw { uppercase_r: true }) => {
AnyStringFlagsInner::F_PREFIX.union(AnyStringFlagsInner::R_PREFIX_UPPER)
}
}
};
self
}

const fn from_kind(kind: AnyStringFlags) -> Self {
let AnyStringFlags(flags) = kind;
pub const fn prefix(self) -> AnyStringPrefix {
let AnyStringFlags(flags) = self;

// f-strings
if flags.contains(AnyStringFlagsInner::F_PREFIX) {
if flags.contains(AnyStringFlagsInner::R_PREFIX_LOWER) {
return Self::Format(FStringPrefix::Raw { uppercase_r: false });
return AnyStringPrefix::Format(FStringPrefix::Raw { uppercase_r: false });
}
if flags.contains(AnyStringFlagsInner::R_PREFIX_UPPER) {
return Self::Format(FStringPrefix::Raw { uppercase_r: true });
return AnyStringPrefix::Format(FStringPrefix::Raw { uppercase_r: true });
}
return Self::Format(FStringPrefix::Regular);
return AnyStringPrefix::Format(FStringPrefix::Regular);
}

// bytestrings
if flags.contains(AnyStringFlagsInner::B_PREFIX) {
if flags.contains(AnyStringFlagsInner::R_PREFIX_LOWER) {
return Self::Bytes(ByteStringPrefix::Raw { uppercase_r: false });
return AnyStringPrefix::Bytes(ByteStringPrefix::Raw { uppercase_r: false });
}
if flags.contains(AnyStringFlagsInner::R_PREFIX_UPPER) {
return Self::Bytes(ByteStringPrefix::Raw { uppercase_r: true });
return AnyStringPrefix::Bytes(ByteStringPrefix::Raw { uppercase_r: true });
}
return Self::Bytes(ByteStringPrefix::Regular);
return AnyStringPrefix::Bytes(ByteStringPrefix::Regular);
}

// all other strings
if flags.contains(AnyStringFlagsInner::R_PREFIX_LOWER) {
return Self::Regular(StringLiteralPrefix::Raw { uppercase: false });
return AnyStringPrefix::Regular(StringLiteralPrefix::Raw { uppercase: false });
}
if flags.contains(AnyStringFlagsInner::R_PREFIX_UPPER) {
return Self::Regular(StringLiteralPrefix::Raw { uppercase: true });
return AnyStringPrefix::Regular(StringLiteralPrefix::Raw { uppercase: true });
}
if flags.contains(AnyStringFlagsInner::U_PREFIX) {
return Self::Regular(StringLiteralPrefix::Unicode);
return AnyStringPrefix::Regular(StringLiteralPrefix::Unicode);
}
Self::Regular(StringLiteralPrefix::Empty)
}

pub const fn as_str(self) -> &'static str {
match self {
Self::Regular(regular_prefix) => regular_prefix.as_str(),
Self::Bytes(bytestring_prefix) => bytestring_prefix.as_str(),
Self::Format(fstring_prefix) => fstring_prefix.as_str(),
}
}
}

impl fmt::Display for AnyStringPrefix {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(self.as_str())
}
}

impl Default for AnyStringPrefix {
fn default() -> Self {
Self::Regular(StringLiteralPrefix::Empty)
}
}

#[derive(Default, Clone, Copy, PartialEq, Eq, Hash)]
pub struct AnyStringFlags(AnyStringFlagsInner);

impl AnyStringFlags {
#[must_use]
pub fn with_prefix(mut self, prefix: AnyStringPrefix) -> Self {
self.0 |= prefix.as_flags();
self
}

pub const fn prefix(self) -> AnyStringPrefix {
AnyStringPrefix::from_kind(self)
AnyStringPrefix::Regular(StringLiteralPrefix::Empty)
}

pub fn new(prefix: AnyStringPrefix, quotes: Quote, triple_quoted: bool) -> Self {
Expand Down
Loading

0 comments on commit 6963f75

Please sign in to comment.