diff --git a/email_validator/syntax.py b/email_validator/syntax.py index efbcd73..5d7af41 100644 --- a/email_validator/syntax.py +++ b/email_validator/syntax.py @@ -48,12 +48,22 @@ def split_email(email: str) -> Tuple[Optional[str], str, str, bool]: def split_string_at_unquoted_special(text: str, specials: Tuple[str, ...]) -> Tuple[str, str]: # Split the string at the first character in specials (an @-sign - # or left angle bracket) that does not occur within quotes. + # or left angle bracket) that does not occur within quotes and + # is not followed by a Unicode combining character. + # If no special character is found, raise an error. inside_quote = False escaped = False left_part = "" - for c in text: - if inside_quote: + for i, c in enumerate(text): + # < plus U+0338 (Combining Long Solidus Overlay) normalizes to + # ≮ U+226E (Not Less-Than), and it would be confusing to treat + # the < as the start of "" syntax in that case. Liekwise, + # if anything combines with an @ or ", we should probably not + # treat it as a special character. + if unicodedata.normalize("NFC", text[i:])[0] != c: + left_part += c + + elif inside_quote: left_part += c if c == '\\' and not escaped: escaped = True @@ -72,6 +82,9 @@ def split_string_at_unquoted_special(text: str, specials: Tuple[str, ...]) -> Tu else: left_part += c + if len(left_part) == len(text): + raise EmailSyntaxError("An email address must have an @-sign.") + # The right part is whatever is left. right_part = text[len(left_part):] @@ -134,6 +147,14 @@ def unquote_quoted_string(text: str) -> Tuple[str, bool]: # Check for other unsafe characters. check_unsafe_chars(display_name, allow_space=True) + # Check that the right part ends with an angle bracket + # but allow spaces after it, I guess. + if ">" not in right_part: + raise EmailSyntaxError("An open angle bracket at the start of the email address has to be followed by a close angle bracket at the end.") + right_part = right_part.rstrip(" ") + if right_part[-1] != ">": + raise EmailSyntaxError("There can't be anything after the email address.") + # Remove the initial and trailing angle brackets. addr_spec = right_part[1:].rstrip(">") diff --git a/email_validator/validate_email.py b/email_validator/validate_email.py index 2adda2a..19db902 100644 --- a/email_validator/validate_email.py +++ b/email_validator/validate_email.py @@ -68,8 +68,6 @@ def validate_email( # part if the local part is quoted. display_name, local_part, domain_part, is_quoted_local_part \ = split_email(email) - if display_name is not None and not allow_display_name: - raise EmailSyntaxError("A display name and angle brackets around the email address are not permitted here.") # Collect return values in this instance. ret = ValidatedEmail() @@ -139,6 +137,11 @@ def validate_email( # Check the length of the address. validate_email_length(ret) + # Check that a display name is permitted. It's the last syntax check + # because we always check against optional parsing features last. + if display_name is not None and not allow_display_name: + raise EmailSyntaxError("A display name and angle brackets around the email address are not permitted here.") + if check_deliverability and not test_environment: # Validate the email address's deliverability using DNS # and update the returned ValidatedEmail object with metadata. diff --git a/tests/test_syntax.py b/tests/test_syntax.py index 6d8dc72..d4a9844 100644 --- a/tests/test_syntax.py +++ b/tests/test_syntax.py @@ -352,6 +352,7 @@ def test_domain_literal() -> None: @pytest.mark.parametrize( 'email_input,error_msg', [ + ('hello.world', 'An email address must have an @-sign.'), ('my@localhost', 'The part after the @-sign is not valid. It should have a period.'), ('my@.leadingdot.com', 'An email address cannot have a period immediately after the @-sign.'), ('my@.leadingfwdot.com', 'An email address cannot have a period immediately after the @-sign.'), @@ -413,6 +414,10 @@ def test_domain_literal() -> None: ('me@[untaggedtext]', 'The part after the @-sign in brackets is not an IPv4 address and has no address literal tag.'), ('me@[tag:invalid space]', 'The part after the @-sign contains invalid characters in brackets: SPACE.'), ('', 'A display name and angle brackets around the email address are not permitted here.'), + (' !', 'There can\'t be anything after the email address.'), + ('<\u0338me@example.com', 'The email address contains invalid characters before the @-sign: \'<\'.'), + ('DisplayName ', 'An email address cannot have a hyphen immediately after the @-sign.'), ('DisplayName ', 'A display name and angle brackets around the email address are not permitted here.'), ('Display Name ', 'A display name and angle brackets around the email address are not permitted here.'), ('\"Display Name\" ', 'A display name and angle brackets around the email address are not permitted here.'),