diff --git a/news/8684.bugfix b/news/8684.bugfix new file mode 100644 index 00000000000..528291d736a --- /dev/null +++ b/news/8684.bugfix @@ -0,0 +1,2 @@ +Use UTF-8 to handle ZIP archive entries on Python 2 according to PEP 427, so +non-ASCII paths can be resolved as expected. diff --git a/src/pip/_internal/operations/install/wheel.py b/src/pip/_internal/operations/install/wheel.py index 681fc0aa8ef..e91b1b8d558 100644 --- a/src/pip/_internal/operations/install/wheel.py +++ b/src/pip/_internal/operations/install/wheel.py @@ -78,6 +78,7 @@ Union, cast, ) + from zipfile import ZipInfo from pip._vendor.pkg_resources import Distribution @@ -420,6 +421,15 @@ def __init__(self, src_record_path, dest_path, zip_file): self._zip_file = zip_file self.changed = False + def _getinfo(self): + # type: () -> ZipInfo + if not PY2: + return self._zip_file.getinfo(self.src_record_path) + # Python 2 does not expose a way to detect a ZIP's encoding, but the + # wheel specification (PEP 427) explicitly mandates that paths should + # use UTF-8, so we assume it is true. + return self._zip_file.getinfo(self.src_record_path.encode("utf-8")) + def save(self): # type: () -> None # directory creation is lazy and after file filtering @@ -439,11 +449,12 @@ def save(self): if os.path.exists(self.dest_path): os.unlink(self.dest_path) - with self._zip_file.open(self.src_record_path) as f: + zipinfo = self._getinfo() + + with self._zip_file.open(zipinfo) as f: with open(self.dest_path, "wb") as dest: shutil.copyfileobj(f, dest) - zipinfo = self._zip_file.getinfo(self.src_record_path) if zip_item_is_executable(zipinfo): set_extracted_file_to_default_mode_plus_executable(self.dest_path)