Skip to content

Commit

Permalink
fix: account for second-level domain names (#16586)
Browse files Browse the repository at this point in the history
* fix: account for second-level domain names

With the current implementation, we miss any second-level domains.

Using `tldextract`, we can extract the correct registered domain name.

Signed-off-by: Mike Fiedler <miketheman@gmail.com>

* make translations

Signed-off-by: Mike Fiedler <miketheman@gmail.com>

* feat: get updated lists during build step

Signed-off-by: Mike Fiedler <miketheman@gmail.com>

---------

Signed-off-by: Mike Fiedler <miketheman@gmail.com>
  • Loading branch information
miketheman committed Aug 29, 2024
1 parent 70d6f8d commit 05eebe9
Show file tree
Hide file tree
Showing 6 changed files with 52 additions and 31 deletions.
2 changes: 2 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,8 @@ COPY --from=static /opt/warehouse/src/warehouse/admin/static/dist/ /opt/warehous
COPY --from=build /opt/warehouse/ /opt/warehouse/
COPY . /opt/warehouse/src/

# Pre-cache TLD list
RUN tldextract --update
# Load our module to pre-compile as much bytecode as we can easily.
# Saves time collectively on container boot!
RUN python -m warehouse
1 change: 1 addition & 0 deletions requirements/main.in
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ sqlalchemy[asyncio]>=2.0,<3.0
stdlib-list
stripe
structlog
tldextract
transaction
trove-classifiers
ua-parser
Expand Down
15 changes: 15 additions & 0 deletions requirements/main.txt
Original file line number Diff line number Diff line change
Expand Up @@ -514,6 +514,10 @@ email-validator==2.2.0 \
events==0.5 \
--hash=sha256:a7286af378ba3e46640ac9825156c93bdba7502174dd696090fdfcd4d80a1abd
# via opensearch-py
filelock==3.15.4 \
--hash=sha256:2207938cbc1844345cb01a5a95524dae30f0ce089eba5b00378295a17e3e90cb \
--hash=sha256:6ca1fffae96225dab4c6eaf1c4f4f28cd2568d3ec2a44e15a08520504de468e7
# via tldextract
first==2.0.2 \
--hash=sha256:8d8e46e115ea8ac652c76123c0865e3ff18372aef6f03c22809ceefcea9dec86 \
--hash=sha256:ff285b08c55f8c97ce4ea7012743af2495c9f1291785f163722bd36f6af6d3bf
Expand Down Expand Up @@ -901,6 +905,7 @@ idna==3.8 \
# via
# email-validator
# requests
# tldextract
importlib-resources==6.4.4 \
--hash=sha256:20600c8b7361938dc0bb2d5ec0297802e575df486f5a544fa414da65e13721f7 \
--hash=sha256:dda242603d1c9cd836c3368b1174ed74cb4049ecd209e7a1a0104620c18c5c11
Expand Down Expand Up @@ -1934,13 +1939,19 @@ requests==2.32.3 \
# opensearch-py
# premailer
# requests-aws4auth
# requests-file
# sigstore
# stripe
# tldextract
# tuf
requests-aws4auth==1.3.1 \
--hash=sha256:2969b5379ae6e60ee666638caf6cb94a32d67033f6bfcf0d50c95cd5474f2419 \
--hash=sha256:b6ad4882310e03ba2538ebf94d1f001ca9feabc5c52618539cf1eb6d5af76791
# via -r requirements/main.in
requests-file==2.1.0 \
--hash=sha256:0f549a3f3b0699415ac04d167e9cb39bccfb730cb832b4d20be3d9867356e658 \
--hash=sha256:cf270de5a4c5874e84599fc5778303d496c10ae5e870bfa378818f35d21bda5c
# via tldextract
rfc3339-validator==0.1.4 \
--hash=sha256:138a2abdf93304ad60530167e51d2dfb9549521a836871b88d7f4695d0022f6b \
--hash=sha256:24f6ec1eda14ef823da9e36ec7113124b39c04d50a4d3d3a3c2859577e7791fa
Expand Down Expand Up @@ -2181,6 +2192,10 @@ text-unidecode==1.3 \
--hash=sha256:1311f10e8b895935241623731c2ba64f4c455287888b18189350b67134a822e8 \
--hash=sha256:bad6603bb14d279193107714b288be206cac565dfa49aa5b105294dd5c4aab93
# via python-slugify
tldextract==5.1.2 \
--hash=sha256:4dfc4c277b6b97fa053899fcdb892d2dc27295851ab5fac4e07797b6a21b2e46 \
--hash=sha256:c9e17f756f05afb5abac04fe8f766e7e70f9fe387adb1859f0f52408ee060200
# via -r requirements/main.in
transaction==4.0 \
--hash=sha256:68035db913f60d1be12f6563d201daab36c83e763de15899ff8338f26e5e62f2 \
--hash=sha256:e2519a316a05b14b3d483ac777df311087daaffeeafd3e9f7de62fc087ce3209
Expand Down
15 changes: 8 additions & 7 deletions tests/unit/accounts/test_forms.py
Original file line number Diff line number Diff line change
Expand Up @@ -562,16 +562,17 @@ def test_disposable_email_error(self, pyramid_request):
)

@pytest.mark.parametrize(
"email",
("email", "prohibited_domain"),
[
"foo@wutang.net",
"foo@clan.wutang.net",
"foo@one.two.wutang.net",
"foo@wUtAnG.net",
("foo@wutang.net", "wutang.net"),
("foo@clan.wutang.net", "wutang.net"),
("foo@one.two.wutang.net", "wutang.net"),
("foo@wUtAnG.net", "wutang.net"),
("foo@one.wutang.co.uk", "wutang.co.uk"),
],
)
def test_prohibited_email_error(self, db_request, email):
domain = ProhibitedEmailDomain(domain="wutang.net")
def test_prohibited_email_error(self, db_request, email, prohibited_domain):
domain = ProhibitedEmailDomain(domain=prohibited_domain)
db_request.db.add(domain)

form = forms.RegistrationForm(
Expand Down
4 changes: 3 additions & 1 deletion warehouse/accounts/forms.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import wtforms.fields

from sqlalchemy import exists
from tldextract import TLDExtract

import warehouse.utils.otp as otp
import warehouse.utils.webauthn as webauthn
Expand Down Expand Up @@ -285,7 +286,8 @@ def validate_email(self, field):
)

# Check if the domain is valid
domain = ".".join(address.domain.split(".")[-2:]).lower()
extractor = TLDExtract(suffix_list_urls=()) # Updated during image build
domain = extractor(address.domain.lower()).registered_domain

if (
domain in disposable_email_domains.blocklist
Expand Down
46 changes: 23 additions & 23 deletions warehouse/locale/messages.pot
Original file line number Diff line number Diff line change
Expand Up @@ -14,103 +14,103 @@ msgstr ""
msgid "Locale updated"
msgstr ""

#: warehouse/accounts/forms.py:51
#: warehouse/accounts/forms.py:52
msgid "The password is invalid. Try again."
msgstr ""

#: warehouse/accounts/forms.py:52
#: warehouse/accounts/forms.py:53
msgid ""
"The username is invalid. Usernames must be composed of letters, numbers, "
"dots, hyphens and underscores. And must also start and finish with a "
"letter or number. Choose a different username."
msgstr ""

#: warehouse/accounts/forms.py:70
#: warehouse/accounts/forms.py:71
msgid "Null bytes are not allowed."
msgstr ""

#: warehouse/accounts/forms.py:93
#: warehouse/accounts/forms.py:94
msgid "No user found with that username"
msgstr ""

#: warehouse/accounts/forms.py:104
#: warehouse/accounts/forms.py:105
msgid "TOTP code must be ${totp_length} digits."
msgstr ""

#: warehouse/accounts/forms.py:124
#: warehouse/accounts/forms.py:125
msgid "Recovery Codes must be ${recovery_code_length} characters."
msgstr ""

#: warehouse/accounts/forms.py:139
#: warehouse/accounts/forms.py:140
msgid "Choose a username with 50 characters or less."
msgstr ""

#: warehouse/accounts/forms.py:156
#: warehouse/accounts/forms.py:157
msgid ""
"This username is already being used by another account. Choose a "
"different username."
msgstr ""

#: warehouse/accounts/forms.py:170 warehouse/accounts/forms.py:219
#: warehouse/accounts/forms.py:232
#: warehouse/accounts/forms.py:171 warehouse/accounts/forms.py:220
#: warehouse/accounts/forms.py:233
msgid "Password too long."
msgstr ""

#: warehouse/accounts/forms.py:206
#: warehouse/accounts/forms.py:207
msgid ""
"There have been too many unsuccessful login attempts. You have been "
"locked out for ${time}. Please try again later."
msgstr ""

#: warehouse/accounts/forms.py:235
#: warehouse/accounts/forms.py:236
msgid "Your passwords don't match. Try again."
msgstr ""

#: warehouse/accounts/forms.py:269
#: warehouse/accounts/forms.py:270
msgid "The email address is too long. Try again."
msgstr ""

#: warehouse/accounts/forms.py:284
#: warehouse/accounts/forms.py:285
msgid "The email address isn't valid. Try again."
msgstr ""

#: warehouse/accounts/forms.py:297
#: warehouse/accounts/forms.py:299
msgid "You can't use an email address from this domain. Use a different email."
msgstr ""

#: warehouse/accounts/forms.py:308
#: warehouse/accounts/forms.py:310
msgid ""
"This email address is already being used by this account. Use a different"
" email."
msgstr ""

#: warehouse/accounts/forms.py:315
#: warehouse/accounts/forms.py:317
msgid ""
"This email address is already being used by another account. Use a "
"different email."
msgstr ""

#: warehouse/accounts/forms.py:348 warehouse/manage/forms.py:139
#: warehouse/accounts/forms.py:350 warehouse/manage/forms.py:139
msgid "The name is too long. Choose a name with 100 characters or less."
msgstr ""

#: warehouse/accounts/forms.py:440
#: warehouse/accounts/forms.py:442
msgid "Invalid TOTP code."
msgstr ""

#: warehouse/accounts/forms.py:457
#: warehouse/accounts/forms.py:459
msgid "Invalid WebAuthn assertion: Bad payload"
msgstr ""

#: warehouse/accounts/forms.py:526
#: warehouse/accounts/forms.py:528
msgid "Invalid recovery code."
msgstr ""

#: warehouse/accounts/forms.py:535
#: warehouse/accounts/forms.py:537
msgid "Recovery code has been previously used."
msgstr ""

#: warehouse/accounts/forms.py:565
#: warehouse/accounts/forms.py:567
msgid "The username isn't valid. Try again."
msgstr ""

Expand Down

0 comments on commit 05eebe9

Please sign in to comment.