Skip to content

Commit

Permalink
Several fixes for parsing display names
Browse files Browse the repository at this point in the history
* Fix error message text for input addresses without @-signs. The incorrect message was "There must be something after the @-sign.". This was broken by the changes to parse display names. Prior to that, the message was "The email address is not valid. It must have exactly one @-sign.".
* Move the allow_display_name check to the end of the syntax checks. The optional checks should be the last to occur so that fatal syntax errors are raised first.
* Check that display name email addresses have a closing angle bracket and nothing after.
* Don't treat < + U+0338 (Combining Long Solidus Overlay) as the start of a bracketed email address. This would already be rejected because the combining character would be reported as an unsafe character at the start of the address, but it may be confusing since the caller won't see the address that way. When splitting the address into parts, skip the other special characters (@, quote, backslash) that have meaningful combining characters after them (i.e. they change under NFC normalization), although I don't think there are any such cases.
  • Loading branch information
JoshData committed Jun 19, 2024
1 parent 0b22c13 commit 3426885
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 5 deletions.
27 changes: 24 additions & 3 deletions email_validator/syntax.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,12 +48,22 @@ def split_email(email: str) -> Tuple[Optional[str], str, str, bool]:

def split_string_at_unquoted_special(text: str, specials: Tuple[str, ...]) -> Tuple[str, str]:
# Split the string at the first character in specials (an @-sign
# or left angle bracket) that does not occur within quotes.
# or left angle bracket) that does not occur within quotes and
# is not followed by a Unicode combining character.
# If no special character is found, raise an error.
inside_quote = False
escaped = False
left_part = ""
for c in text:
if inside_quote:
for i, c in enumerate(text):
# < plus U+0338 (Combining Long Solidus Overlay) normalizes to
# ≮ U+226E (Not Less-Than), and it would be confusing to treat
# the < as the start of "<email>" syntax in that case. Liekwise,
# if anything combines with an @ or ", we should probably not
# treat it as a special character.
if unicodedata.normalize("NFC", text[i:])[0] != c:
left_part += c

elif inside_quote:
left_part += c
if c == '\\' and not escaped:
escaped = True
Expand All @@ -72,6 +82,9 @@ def split_string_at_unquoted_special(text: str, specials: Tuple[str, ...]) -> Tu
else:
left_part += c

if len(left_part) == len(text):
raise EmailSyntaxError("An email address must have an @-sign.")

# The right part is whatever is left.
right_part = text[len(left_part):]

Expand Down Expand Up @@ -134,6 +147,14 @@ def unquote_quoted_string(text: str) -> Tuple[str, bool]:
# Check for other unsafe characters.
check_unsafe_chars(display_name, allow_space=True)

# Check that the right part ends with an angle bracket
# but allow spaces after it, I guess.
if ">" not in right_part:
raise EmailSyntaxError("An open angle bracket at the start of the email address has to be followed by a close angle bracket at the end.")
right_part = right_part.rstrip(" ")
if right_part[-1] != ">":
raise EmailSyntaxError("There can't be anything after the email address.")

# Remove the initial and trailing angle brackets.
addr_spec = right_part[1:].rstrip(">")

Expand Down
7 changes: 5 additions & 2 deletions email_validator/validate_email.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,6 @@ def validate_email(
# part if the local part is quoted.
display_name, local_part, domain_part, is_quoted_local_part \
= split_email(email)
if display_name is not None and not allow_display_name:
raise EmailSyntaxError("A display name and angle brackets around the email address are not permitted here.")

# Collect return values in this instance.
ret = ValidatedEmail()
Expand Down Expand Up @@ -139,6 +137,11 @@ def validate_email(
# Check the length of the address.
validate_email_length(ret)

# Check that a display name is permitted. It's the last syntax check
# because we always check against optional parsing features last.
if display_name is not None and not allow_display_name:
raise EmailSyntaxError("A display name and angle brackets around the email address are not permitted here.")

if check_deliverability and not test_environment:
# Validate the email address's deliverability using DNS
# and update the returned ValidatedEmail object with metadata.
Expand Down
5 changes: 5 additions & 0 deletions tests/test_syntax.py
Original file line number Diff line number Diff line change
Expand Up @@ -352,6 +352,7 @@ def test_domain_literal() -> None:
@pytest.mark.parametrize(
'email_input,error_msg',
[
('hello.world', 'An email address must have an @-sign.'),
('my@localhost', 'The part after the @-sign is not valid. It should have a period.'),
('my@.leadingdot.com', 'An email address cannot have a period immediately after the @-sign.'),
('my@.leadingfwdot.com', 'An email address cannot have a period immediately after the @-sign.'),
Expand Down Expand Up @@ -413,6 +414,10 @@ def test_domain_literal() -> None:
('me@[untaggedtext]', 'The part after the @-sign in brackets is not an IPv4 address and has no address literal tag.'),
('me@[tag:invalid space]', 'The part after the @-sign contains invalid characters in brackets: SPACE.'),
('<me@example.com>', 'A display name and angle brackets around the email address are not permitted here.'),
('<me@example.com', 'An open angle bracket at the start of the email address has to be followed by a close angle bracket at the end.'),
('<me@example.com> !', 'There can\'t be anything after the email address.'),
('<\u0338me@example.com', 'The email address contains invalid characters before the @-sign: \'<\'.'),
('DisplayName <me@-example.com>', 'An email address cannot have a hyphen immediately after the @-sign.'),
('DisplayName <me@example.com>', 'A display name and angle brackets around the email address are not permitted here.'),
('Display Name <me@example.com>', 'A display name and angle brackets around the email address are not permitted here.'),
('\"Display Name\" <me@example.com>', 'A display name and angle brackets around the email address are not permitted here.'),
Expand Down

0 comments on commit 3426885

Please sign in to comment.