Skip to content

Commit

Permalink
Validate if a file with the same blake2 digest already exists (#2490) (
Browse files Browse the repository at this point in the history
…#3310)

* Validate if a file with the same blake2 digest already exists (#2490)

* Update help text for "file or contents exist"
  • Loading branch information
Yeray Diaz Diaz authored and di committed Mar 20, 2018
1 parent 99d377b commit 89efaa7
Show file tree
Hide file tree
Showing 3 changed files with 121 additions and 9 deletions.
106 changes: 105 additions & 1 deletion tests/unit/forklift/test_legacy.py
Original file line number Diff line number Diff line change
Expand Up @@ -639,10 +639,52 @@ def test_is_duplicate_none(self, pyramid_config, db_request):
),
)

hashes["blake2_256"] = "another blake2 digest"

assert legacy._is_duplicate_file(
db_request.db, requested_file_name, hashes
) is None

def test_is_duplicate_false_same_blake2(self, pyramid_config, db_request):
pyramid_config.testing_securitypolicy(userid=1)

user = UserFactory.create()
EmailFactory.create(user=user)
project = ProjectFactory.create()
release = ReleaseFactory.create(project=project, version="1.0")
RoleFactory.create(user=user, project=project)

filename = "{}-{}.tar.gz".format(project.name, release.version)
requested_file_name = "{}-{}-1.tar.gz".format(project.name,
release.version)
file_content = io.BytesIO(b"A fake file.")
file_value = file_content.getvalue()

hashes = {
"sha256": hashlib.sha256(file_value).hexdigest(),
"md5": hashlib.md5(file_value).hexdigest(),
"blake2_256": hashlib.blake2b(
file_value, digest_size=256 // 8
).hexdigest()
}
db_request.db.add(
File(
release=release,
filename=filename,
md5_digest=hashes["md5"],
sha256_digest=hashes["sha256"],
blake2_256_digest=hashes["blake2_256"],
path="source/{name[0]}/{name}/{filename}".format(
name=project.name,
filename=filename,
),
),
)

assert legacy._is_duplicate_file(
db_request.db, requested_file_name, hashes
) is False

def test_is_duplicate_false(self, pyramid_config, db_request):
pyramid_config.testing_securitypolicy(userid=1)

Expand Down Expand Up @@ -1792,7 +1834,69 @@ def test_upload_fails_with_existing_filename_diff_content(self,
]
assert resp.status_code == 400
assert resp.status == (
"400 File already exists. "
"400 The filename or contents already exist. "
"See /the/help/url/"
)

def test_upload_fails_with_diff_filename_same_blake2(self,
pyramid_config,
db_request):
pyramid_config.testing_securitypolicy(userid=1)

user = UserFactory.create()
project = ProjectFactory.create()
release = ReleaseFactory.create(project=project, version="1.0")
RoleFactory.create(user=user, project=project)

filename = "{}-{}.tar.gz".format(project.name, release.version)
file_content = io.BytesIO(b"A fake file.")

db_request.POST = MultiDict({
"metadata_version": "1.2",
"name": project.name,
"version": release.version,
"filetype": "sdist",
"md5_digest": hashlib.md5(file_content.getvalue()).hexdigest(),
"content": pretend.stub(
filename="{}-fake.tar.gz".format(project.name),
file=file_content,
type="application/tar",
),
})

db_request.db.add(
File(
release=release,
filename=filename,
md5_digest=hashlib.md5(file_content.getvalue()).hexdigest(),
sha256_digest=hashlib.sha256(
file_content.getvalue()
).hexdigest(),
blake2_256_digest=hashlib.blake2b(
file_content.getvalue(),
digest_size=256 // 8
).hexdigest(),
path="source/{name[0]}/{name}/{filename}".format(
name=project.name,
filename=filename,
),
),
)
db_request.route_url = pretend.call_recorder(
lambda route, **kw: "/the/help/url/"
)

with pytest.raises(HTTPBadRequest) as excinfo:
legacy.file_upload(db_request)

resp = excinfo.value

assert db_request.route_url.calls == [
pretend.call('help', _anchor='file-name-reuse')
]
assert resp.status_code == 400
assert resp.status == (
"400 The filename or contents already exist. "
"See /the/help/url/"
)

Expand Down
19 changes: 13 additions & 6 deletions warehouse/forklift/legacy.py
Original file line number Diff line number Diff line change
Expand Up @@ -660,7 +660,9 @@ def _is_valid_dist_file(filename, filetype):

def _is_duplicate_file(db_session, filename, hashes):
"""
Check to see if file already exists, and if it's content matches
Check to see if file already exists, and if it's content matches.
A file is considered to exist if its filename *or* blake2 digest are
present in a file row in the database.
Returns:
- True: This file is a duplicate and all further processing should halt.
Expand All @@ -670,14 +672,19 @@ def _is_duplicate_file(db_session, filename, hashes):

file_ = (
db_session.query(File)
.filter(File.filename == filename)
.filter(
(File.filename == filename) |
(File.blake2_256_digest == hashes["blake2_256"]))
.first()
)

if file_ is not None:
return (file_.sha256_digest == hashes["sha256"] and
file_.md5_digest == hashes["md5"] and
file_.blake2_256_digest == hashes["blake2_256"])
return (
file_.filename == filename and
file_.sha256_digest == hashes["sha256"] and
file_.md5_digest == hashes["md5"] and
file_.blake2_256_digest == hashes["blake2_256"]
)

return None

Expand Down Expand Up @@ -1075,7 +1082,7 @@ def file_upload(request):
return Response()
elif is_duplicate is not None:
raise _exc_with_message(
HTTPBadRequest, "File already exists. "
HTTPBadRequest, "The filename or contents already exist. "
"See " +
request.route_url(
'help', _anchor='file-name-reuse'
Expand Down
5 changes: 3 additions & 2 deletions warehouse/templates/pages/help.html
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
{% macro private_indices() %}How can I publish my private packages to PyPI?{% endmacro %}
{% macro admin_intervention() %}Why did my package or user registration get blocked?{% endmacro %}
{% macro file_size_limit() %}How do I get a file size limit exemption or increase for my project?{% endmacro %}
{% macro file_name_reuse() %}Why am I getting a "File already exists" error?{% endmacro %}
{% macro file_name_reuse() %}Why am I getting a "Filename or contents already exists" or "Filename has been previously used" errors?{% endmacro %}
{% macro project_name() %}Why isn't my desired project name available?{% endmacro %}
{% macro project_name_claim() %}How do I claim an abandoned or previously registered project name?{% endmacro %}
{% macro feedback() %}Where can I report a bug or provide feedback?{% endmacro %}
Expand Down Expand Up @@ -224,11 +224,12 @@ <h3 id="admin-intervention">{{ admin_intervention() }}</h3>

<h3 id="file-name-reuse">{{ file_name_reuse() }}</h3>
<p>
The error <i>HTTPError: 400 Client Error: File already exists</i> happens for one of two reasons:
PyPI will return these errors for one of these reasons:
</p>
<ul>
<li>Filename has been used and file exists</li>
<li>Filename has been used but file no longer exists</li>
<li>A file with the exact same content exists</li>
</ul>
<p>
PyPI does not allow for a filename to be reused, even once a project has been deleted and recreated.
Expand Down

0 comments on commit 89efaa7

Please sign in to comment.