From 31c0bac5bb092c9cab634ceddffcd4c2e4061118 Mon Sep 17 00:00:00 2001 From: jizhongsheng Date: Wed, 24 Nov 2021 23:56:19 +0800 Subject: [PATCH 1/2] Fix filename decoding issue for zip files archived by macOS --- jupyter_archive/handlers.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/jupyter_archive/handlers.py b/jupyter_archive/handlers.py index 308ee9e..2601634 100644 --- a/jupyter_archive/handlers.py +++ b/jupyter_archive/handlers.py @@ -223,17 +223,32 @@ async def get(self, archive_path, include_body=False): self.finish() def extract_archive(self, archive_path): + def unzip(reader, destination): + # OSX Compress missing flag, broke zipFile + # so here we guess it + with reader as f: + for fn in f.namelist(): + extreact_path = pathlib.Path(f.extract(fn, path=destination)) + try: + correct_filename = fn.encode('cp437').decode('utf-8') + except UnicodeEncodeError: + correct_filename = fn + extreact_path.rename(os.path.join(destination, correct_filename)) archive_destination = archive_path.parent self.log.info("Begin extraction of {} to {}.".format(archive_path, archive_destination)) archive_reader = make_reader(archive_path) - with archive_reader as archive: - archive.extractall(archive_destination) + if isinstance(archive_reader, zipfile.ZipFile): + unzip(archive_reader, archive_destination) + else: + with archive_reader as archive: + archive.extractall(archive_destination) self.log.info("Finished extracting {} to {}.".format(archive_path, archive_destination)) + def setup_handlers(web_app): host_pattern = ".*$" base_url = web_app.settings["base_url"] From 83e8a7333993d2e640e10101751307f12ae2062e Mon Sep 17 00:00:00 2001 From: jizhongsheng Date: Thu, 25 Nov 2021 10:09:19 +0800 Subject: [PATCH 2/2] add gbk decoding prob --- jupyter_archive/handlers.py | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/jupyter_archive/handlers.py b/jupyter_archive/handlers.py index 2601634..92e9241 100644 --- a/jupyter_archive/handlers.py +++ b/jupyter_archive/handlers.py @@ -223,16 +223,26 @@ async def get(self, archive_path, include_body=False): self.finish() def extract_archive(self, archive_path): + def try_macos_decode(fn): + try: + return fn.encode('cp437').decode('utf-8') + except UnicodeError: + return None + + def try_windows_chinese_decode(fn): + try: + return fn.encode('cp437').decode('gbk') + except UnicodeError: + return None + def unzip(reader, destination): - # OSX Compress missing flag, broke zipFile - # so here we guess it + # most of Windows implementations use DOS (OEM) encoding + # Mac OS zip utility uses utf-8, but it doesn't set utf-8 bit flags + # *nix zip utilities silently uses system encoding(utf-8 generally) with reader as f: for fn in f.namelist(): extreact_path = pathlib.Path(f.extract(fn, path=destination)) - try: - correct_filename = fn.encode('cp437').decode('utf-8') - except UnicodeEncodeError: - correct_filename = fn + correct_filename = try_macos_decode(fn) or try_windows_chinese_decode(fn) or fn extreact_path.rename(os.path.join(destination, correct_filename)) archive_destination = archive_path.parent