From bd592a41cca763f85da40063dd4b1caae12a330b Mon Sep 17 00:00:00 2001 From: Hadrien Mary Date: Mon, 29 Jun 2020 16:51:42 -0400 Subject: [PATCH 01/11] Add DirectoryHandler and ExtractDirectoryHandler + tests --- jupyter_server/files/handlers.py | 234 ++++++++++++++-- jupyter_server/services/contents/manager.py | 286 ++++++++++++-------- tests/test_files.py | 258 +++++++++++------- 3 files changed, 557 insertions(+), 221 deletions(-) diff --git a/jupyter_server/files/handlers.py b/jupyter_server/files/handlers.py index 51b990f238..1a6d3fb137 100644 --- a/jupyter_server/files/handlers.py +++ b/jupyter_server/files/handlers.py @@ -3,11 +3,17 @@ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. +import os +import pathlib import mimetypes import json +import zipfile +import tarfile from base64 import decodebytes from tornado import web +from tornado import ioloop from jupyter_server.base.handlers import JupyterHandler +from jupyter_server.utils import url2path class FilesHandler(JupyterHandler): @@ -23,8 +29,10 @@ class FilesHandler(JupyterHandler): def content_security_policy(self): # In case we're serving HTML/SVG, confine any Javascript to a unique # origin so it can't interact with the notebook server. - return super(FilesHandler, self).content_security_policy + \ - "; sandbox allow-scripts" + return ( + super(FilesHandler, self).content_security_policy + + "; sandbox allow-scripts" + ) @web.authenticated def head(self, path): @@ -38,41 +46,229 @@ async def get(self, path, include_body=True): self.log.info("Refusing to serve hidden file, via 404 Error") raise web.HTTPError(404) - path = path.strip('/') - if '/' in path: - _, name = path.rsplit('/', 1) + path = path.strip("/") + if "/" in path: + _, name = path.rsplit("/", 1) else: name = path - model = await cm.get(path, type='file', content=include_body) + model = await cm.get(path, type="file", content=include_body) if self.get_argument("download", False): self.set_attachment_header(name) # get mimetype from filename - if name.lower().endswith('.ipynb'): - self.set_header('Content-Type', 'application/x-ipynb+json') + if name.lower().endswith(".ipynb"): + self.set_header("Content-Type", "application/x-ipynb+json") else: cur_mime = mimetypes.guess_type(name)[0] - if cur_mime == 'text/plain': - self.set_header('Content-Type', 'text/plain; charset=UTF-8') + if cur_mime == "text/plain": + self.set_header("Content-Type", "text/plain; charset=UTF-8") elif cur_mime is not None: - self.set_header('Content-Type', cur_mime) + self.set_header("Content-Type", cur_mime) else: - if model['format'] == 'base64': - self.set_header('Content-Type', 'application/octet-stream') + if model["format"] == "base64": + self.set_header("Content-Type", "application/octet-stream") else: - self.set_header('Content-Type', 'text/plain; charset=UTF-8') + self.set_header("Content-Type", "text/plain; charset=UTF-8") if include_body: - if model['format'] == 'base64': - b64_bytes = model['content'].encode('ascii') + if model["format"] == "base64": + b64_bytes = model["content"].encode("ascii") self.write(decodebytes(b64_bytes)) - elif model['format'] == 'json': - self.write(json.dumps(model['content'])) + elif model["format"] == "json": + self.write(json.dumps(model["content"])) else: - self.write(model['content']) + self.write(model["content"]) self.flush() +# The delay in ms at which we send the chunk of data +# to the client. +ARCHIVE_DOWNLOAD_FLUSH_DELAY = 100 +SUPPORTED_FORMAT = [ + "zip", + "tgz", + "tar.gz", + "tbz", + "tbz2", + "tar.bz", + "tar.bz2", + "txz", + "tar.xz", +] + + +class ArchiveStream: + def __init__(self, handler): + self.handler = handler + self.position = 0 + + def write(self, data): + self.position += len(data) + self.handler.write(data) + del data + + def tell(self): + return self.position + + def flush(self): + # Note: Flushing is done elsewhere, in the main thread + # because `write()` is called in a background thread. + # self.handler.flush() + pass + + +def make_writer(handler, archive_format="zip"): + fileobj = ArchiveStream(handler) + + if archive_format == "zip": + archive_file = zipfile.ZipFile( + fileobj, mode="w", compression=zipfile.ZIP_DEFLATED + ) + archive_file.add = archive_file.write + elif archive_format in ["tgz", "tar.gz"]: + archive_file = tarfile.open(fileobj=fileobj, mode="w|gz") + elif archive_format in ["tbz", "tbz2", "tar.bz", "tar.bz2"]: + archive_file = tarfile.open(fileobj=fileobj, mode="w|bz2") + elif archive_format in ["txz", "tar.xz"]: + archive_file = tarfile.open(fileobj=fileobj, mode="w|xz") + else: + raise ValueError("'{}' is not a valid archive format.".format(archive_format)) + return archive_file + + +def make_reader(archive_path): + + archive_format = "".join(archive_path.suffixes)[1:] + + if archive_format == "zip": + archive_file = zipfile.ZipFile(archive_path, mode="r") + elif archive_format in ["tgz", "tar.gz"]: + archive_file = tarfile.open(archive_path, mode="r|gz") + elif archive_format in ["tbz", "tbz2", "tar.bz", "tar.bz2"]: + archive_file = tarfile.open(archive_path, mode="r|bz2") + elif archive_format in ["txz", "tar.xz"]: + archive_file = tarfile.open(archive_path, mode="r|xz") + else: + raise ValueError("'{}' is not a valid archive format.".format(archive_format)) + return archive_file + + +class DirectoryHandler(JupyterHandler): + @web.authenticated + async def get(self, archive_path, include_body=False): + + # /directories/ requests must originate from the same site + self.check_xsrf_cookie() + cm = self.contents_manager + + if cm.is_hidden(archive_path) and not cm.allow_hidden: + self.log.info("Refusing to serve hidden file, via 404 Error") + raise web.HTTPError(404) + + archive_token = self.get_argument("archiveToken") + archive_format = self.get_argument("archiveFormat", "zip") + if archive_format not in SUPPORTED_FORMAT: + self.log.error("Unsupported format {}.".format(archive_format)) + raise web.HTTPError(404) + + root_dir = pathlib.Path(cm.root_dir) + archive_path = root_dir / url2path(archive_path) + archive_filename = archive_path.with_suffix(".{}".format(archive_format)).name + + self.log.info( + "Prepare {} for archiving and downloading.".format(archive_filename) + ) + self.set_header("content-type", "application/octet-stream") + self.set_header("cache-control", "no-cache") + self.set_header( + "content-disposition", "attachment; filename={}".format(archive_filename) + ) + + self.canceled = False + + # The original extension is using `ioloop.PeriodicCallback` and `ioloop.IOLoop.current().run_in_executor` + # but it does not seem to work here (500 internal HTTP error). + # This trategy is nice when compressing very large directory. Any idea how to integrate that is welcome. + + # self.flush_cb = ioloop.PeriodicCallback( + # self.flush, ARCHIVE_DOWNLOAD_FLUSH_DELAY + # ) + # self.flush_cb.start() + + self.archive_and_download(archive_path, archive_format, archive_token) + + # args = (archive_path, archive_format, archive_token) + # yield ioloop.IOLoop.current().run_in_executor( + # None, self.archive_and_download, *args + # ) + + if self.canceled: + self.log.info("Download canceled.") + else: + self.flush() + self.log.info("Finished downloading {}.".format(archive_filename)) + + self.set_cookie("archiveToken", archive_token) + # self.flush_cb.stop() + self.finish() + + def archive_and_download(self, archive_path, archive_format, archive_token): + + with make_writer(self, archive_format) as archive: + prefix = len(str(archive_path.parent)) + len(os.path.sep) + for root, _, files in os.walk(archive_path): + for file_ in files: + file_name = os.path.join(root, file_) + if not self.canceled: + self.log.debug("{}\n".format(file_name)) + archive.add(file_name, os.path.join(root[prefix:], file_)) + else: + break + + def on_connection_close(self): + super().on_connection_close() + self.canceled = True + # self.flush_cb.stop() + + +class ExtractDirectoryHandler(JupyterHandler): + @web.authenticated + def get(self, archive_path, include_body=False): + + # /extract-archive/ requests must originate from the same site + self.check_xsrf_cookie() + cm = self.contents_manager + + if cm.is_hidden(archive_path) and not cm.allow_hidden: + self.log.info("Refusing to serve hidden file, via 404 Error") + raise web.HTTPError(404) + + root_dir = pathlib.Path(cm.root_dir) + archive_path = root_dir / url2path(archive_path) + + # yield ioloop.IOLoop.current().run_in_executor( + # None, self.extract_archive, archive_path + # ) + self.extract_archive(archive_path) + + self.finish() + + def extract_archive(self, archive_path): + + archive_destination = archive_path.parent + self.log.info( + "Begin extraction of {} to {}.".format(archive_path, archive_destination) + ) + + archive_reader = make_reader(archive_path) + with archive_reader as archive: + archive.extractall(archive_destination) + + self.log.info( + "Finished extracting {} to {}.".format(archive_path, archive_destination) + ) + + default_handlers = [] diff --git a/jupyter_server/services/contents/manager.py b/jupyter_server/services/contents/manager.py index 28ac63fcce..a06e7a0412 100644 --- a/jupyter_server/services/contents/manager.py +++ b/jupyter_server/services/contents/manager.py @@ -12,6 +12,8 @@ from tornado.web import HTTPError, RequestHandler from ...files.handlers import FilesHandler +from ...files.handlers import DirectoryHandler +from ...files.handlers import ExtractDirectoryHandler from .checkpoints import Checkpoints from traitlets.config.configurable import LoggingConfigurable from nbformat import sign, validate as validate_nb, ValidationError @@ -34,7 +36,7 @@ from jupyter_server.transutils import _ -copy_pat = re.compile(r'\-Copy\d*\.') +copy_pat = re.compile(r"\-Copy\d*\.") class ContentsManager(LoggingConfigurable): @@ -56,34 +58,44 @@ class ContentsManager(LoggingConfigurable): """ - root_dir = Unicode('/', config=True) + root_dir = Unicode("/", config=True) allow_hidden = Bool(False, config=True, help="Allow access to hidden files") notary = Instance(sign.NotebookNotary) + def _notary_default(self): return sign.NotebookNotary(parent=self) - hide_globs = List(Unicode(), [ - u'__pycache__', '*.pyc', '*.pyo', - '.DS_Store', '*.so', '*.dylib', '*~', - ], config=True, help=""" + hide_globs = List( + Unicode(), + [u"__pycache__", "*.pyc", "*.pyo", ".DS_Store", "*.so", "*.dylib", "*~",], + config=True, + help=""" Glob patterns to hide in file and directory listings. - """) + """, + ) - untitled_notebook = Unicode(_("Untitled"), config=True, - help="The base name used when creating untitled notebooks." + untitled_notebook = Unicode( + _("Untitled"), + config=True, + help="The base name used when creating untitled notebooks.", ) - untitled_file = Unicode("untitled", config=True, - help="The base name used when creating untitled files." + untitled_file = Unicode( + "untitled", config=True, help="The base name used when creating untitled files." ) - untitled_directory = Unicode("Untitled Folder", config=True, - help="The base name used when creating untitled directories." + untitled_directory = Unicode( + "Untitled Folder", + config=True, + help="The base name used when creating untitled directories.", ) - pre_save_hook = Any(None, config=True, allow_none=True, + pre_save_hook = Any( + None, + config=True, + allow_none=True, help="""Python callable or importstring thereof To be called on a contents model prior to save. @@ -100,12 +112,12 @@ def _notary_default(self): Modifying this dict will affect the file that is stored. - path: the API path of the save destination - contents_manager: this ContentsManager instance - """ + """, ) - @validate('pre_save_hook') + @validate("pre_save_hook") def _validate_pre_save_hook(self, proposal): - value = proposal['value'] + value = proposal["value"] if isinstance(value, string_types): value = import_item(self.pre_save_hook) if not callable(value): @@ -117,7 +129,9 @@ def run_pre_save_hook(self, model, path, **kwargs): if self.pre_save_hook: try: self.log.debug("Running pre-save hook on %s", path) - self.pre_save_hook(model=model, path=path, contents_manager=self, **kwargs) + self.pre_save_hook( + model=model, path=path, contents_manager=self, **kwargs + ) except Exception: self.log.error("Pre-save hook failed on %s", path, exc_info=True) @@ -125,19 +139,19 @@ def run_pre_save_hook(self, model, path, **kwargs): checkpoints = Instance(Checkpoints, config=True) checkpoints_kwargs = Dict(config=True) - @default('checkpoints') + @default("checkpoints") def _default_checkpoints(self): return self.checkpoints_class(**self.checkpoints_kwargs) - @default('checkpoints_kwargs') + @default("checkpoints_kwargs") def _default_checkpoints_kwargs(self): - return dict( - parent=self, - log=self.log, - ) + return dict(parent=self, log=self.log,) files_handler_class = Type( - FilesHandler, klass=RequestHandler, allow_none=True, config=True, + FilesHandler, + klass=RequestHandler, + allow_none=True, + config=True, help="""handler class to use when serving raw file requests. Default is a fallback that talks to the ContentsManager API, @@ -147,7 +161,7 @@ def _default_checkpoints_kwargs(self): which will be much more efficient. Access to these files should be Authenticated. - """ + """, ) files_handler_params = Dict( @@ -156,7 +170,45 @@ def _default_checkpoints_kwargs(self): For example, StaticFileHandlers generally expect a `path` argument specifying the root directory from which to serve files. - """ + """, + ) + + directory_handler_class = Type( + DirectoryHandler, + klass=RequestHandler, + allow_none=True, + config=True, + help="""handler class to use when serving raw directory requests. + + TODO: complete + + Access to these files should be Authenticated. + """, + ) + + directory_handler_params = Dict( + config=True, + help="""Extra parameters to pass to directory_handler_class. + """, + ) + + extract_directory_handler_class = Type( + ExtractDirectoryHandler, + klass=RequestHandler, + allow_none=True, + config=True, + help="""handler class to use when serving raw directory requests. + + TODO: complete + + Access to these files should be Authenticated. + """, + ) + + extract_directory_handler_params = Dict( + config=True, + help="""Extra parameters to pass to extract_directory_handler_class. + """, ) def get_extra_handlers(self): @@ -169,6 +221,20 @@ def get_extra_handlers(self): handlers.append( (r"/files/(.*)", self.files_handler_class, self.files_handler_params) ) + handlers.append( + ( + r"/directories/(.*)", + self.directory_handler_class, + self.directory_handler_params, + ) + ) + handlers.append( + ( + r"/extract-directories/(.*)", + self.extract_directory_handler_class, + self.extract_directory_handler_params, + ) + ) return handlers # ContentsManager API part 1: methods that must be @@ -210,7 +276,7 @@ def is_hidden(self, path): """ raise NotImplementedError - def file_exists(self, path=''): + def file_exists(self, path=""): """Does a file exist at the given path? Like os.path.isfile @@ -227,7 +293,7 @@ def file_exists(self, path=''): exists : bool Whether the file exists. """ - raise NotImplementedError('must be implemented in a subclass') + raise NotImplementedError("must be implemented in a subclass") def exists(self, path): """Does a file or directory exist at the given path? @@ -248,7 +314,7 @@ def exists(self, path): def get(self, path, content=True, type=None, format=None): """Get a file or directory model.""" - raise NotImplementedError('must be implemented in a subclass') + raise NotImplementedError("must be implemented in a subclass") def save(self, model, path): """ @@ -258,22 +324,22 @@ def save(self, model, path): should call self.run_pre_save_hook(model=model, path=path) prior to writing any data. """ - raise NotImplementedError('must be implemented in a subclass') + raise NotImplementedError("must be implemented in a subclass") def delete_file(self, path): """Delete the file or directory at path.""" - raise NotImplementedError('must be implemented in a subclass') + raise NotImplementedError("must be implemented in a subclass") def rename_file(self, old_path, new_path): """Rename a file or directory.""" - raise NotImplementedError('must be implemented in a subclass') + raise NotImplementedError("must be implemented in a subclass") # ContentsManager API part 2: methods that have useable default # implementations, but can be overridden in subclasses. def delete(self, path): """Delete a file/directory and any associated checkpoints.""" - path = path.strip('/') + path = path.strip("/") if not path: raise HTTPError(400, "Can't delete root") self.delete_file(path) @@ -290,8 +356,8 @@ def update(self, model, path): For use in PATCH requests, to enable renaming a file without re-uploading its contents. Only used for renaming at the moment. """ - path = path.strip('/') - new_path = model.get('path', path).strip('/') + path = path.strip("/") + new_path = model.get("path", path).strip("/") if path != new_path: self.rename(path, new_path) model = self.get(new_path, content=False) @@ -310,9 +376,9 @@ def get_kernel_path(self, path, model=None): notebook server. FileContentsManager overrides this to use the directory containing the notebook. """ - return '' + return "" - def increment_filename(self, filename, path='', insert=''): + def increment_filename(self, filename, path="", insert=""): """Increment a filename until it is unique. Parameters @@ -330,96 +396,98 @@ def increment_filename(self, filename, path='', insert=''): A filename that is unique, based on the input filename. """ # Extract the full suffix from the filename (e.g. .tar.gz) - path = path.strip('/') - basename, dot, ext = filename.rpartition('.') - if ext != 'ipynb': - basename, dot, ext = filename.partition('.') - + path = path.strip("/") + basename, dot, ext = filename.rpartition(".") + if ext != "ipynb": + basename, dot, ext = filename.partition(".") + suffix = dot + ext for i in itertools.count(): if i: - insert_i = '{}{}'.format(insert, i) + insert_i = "{}{}".format(insert, i) else: - insert_i = '' - name = u'{basename}{insert}{suffix}'.format(basename=basename, - insert=insert_i, suffix=suffix) - if not self.exists(u'{}/{}'.format(path, name)): + insert_i = "" + name = u"{basename}{insert}{suffix}".format( + basename=basename, insert=insert_i, suffix=suffix + ) + if not self.exists(u"{}/{}".format(path, name)): break return name def validate_notebook_model(self, model): """Add failed-validation message to model""" try: - validate_nb(model['content']) + validate_nb(model["content"]) except ValidationError as e: - model['message'] = u'Notebook validation failed: {}:\n{}'.format( - e.message, json.dumps(e.instance, indent=1, default=lambda obj: ''), + model["message"] = u"Notebook validation failed: {}:\n{}".format( + e.message, + json.dumps(e.instance, indent=1, default=lambda obj: ""), ) return model - - def new_untitled(self, path='', type='', ext=''): + + def new_untitled(self, path="", type="", ext=""): """Create a new untitled file or directory in path - + path must be a directory - + File extension can be specified. - + Use `new` to create files with a fully specified path (including filename). """ - path = path.strip('/') + path = path.strip("/") if not self.dir_exists(path): - raise HTTPError(404, 'No such directory: %s' % path) - + raise HTTPError(404, "No such directory: %s" % path) + model = {} if type: - model['type'] = type - - if ext == '.ipynb': - model.setdefault('type', 'notebook') + model["type"] = type + + if ext == ".ipynb": + model.setdefault("type", "notebook") else: - model.setdefault('type', 'file') - - insert = '' - if model['type'] == 'directory': + model.setdefault("type", "file") + + insert = "" + if model["type"] == "directory": untitled = self.untitled_directory - insert = ' ' - elif model['type'] == 'notebook': + insert = " " + elif model["type"] == "notebook": untitled = self.untitled_notebook - ext = '.ipynb' - elif model['type'] == 'file': + ext = ".ipynb" + elif model["type"] == "file": untitled = self.untitled_file else: - raise HTTPError(400, "Unexpected model type: %r" % model['type']) - + raise HTTPError(400, "Unexpected model type: %r" % model["type"]) + name = self.increment_filename(untitled + ext, path, insert=insert) - path = u'{0}/{1}'.format(path, name) + path = u"{0}/{1}".format(path, name) return self.new(model, path) - - def new(self, model=None, path=''): + + def new(self, model=None, path=""): """Create a new file or directory and return its model with no content. - + To create a new untitled entity in a directory, use `new_untitled`. """ - path = path.strip('/') + path = path.strip("/") if model is None: model = {} - - if path.endswith('.ipynb'): - model.setdefault('type', 'notebook') + + if path.endswith(".ipynb"): + model.setdefault("type", "notebook") else: - model.setdefault('type', 'file') - + model.setdefault("type", "file") + # no content, not a directory, so fill out new-file model - if 'content' not in model and model['type'] != 'directory': - if model['type'] == 'notebook': - model['content'] = new_notebook() - model['format'] = 'json' + if "content" not in model and model["type"] != "directory": + if model["type"] == "notebook": + model["content"] = new_notebook() + model["format"] = "json" else: - model['content'] = '' - model['type'] = 'file' - model['format'] = 'text' - + model["content"] = "" + model["type"] = "file" + model["format"] = "text" + model = self.save(model, path) return model @@ -429,33 +497,33 @@ def copy(self, from_path, to_path=None): If to_path not specified, it will be the parent directory of from_path. If to_path is a directory, filename will increment `from_path-Copy#.ext`. Considering multi-part extensions, the Copy# part will be placed before the first dot for all the extensions except `ipynb`. - For easier manual searching in case of notebooks, the Copy# part will be placed before the last dot. + For easier manual searching in case of notebooks, the Copy# part will be placed before the last dot. from_path must be a full path to a file. """ - path = from_path.strip('/') + path = from_path.strip("/") if to_path is not None: - to_path = to_path.strip('/') + to_path = to_path.strip("/") - if '/' in path: - from_dir, from_name = path.rsplit('/', 1) + if "/" in path: + from_dir, from_name = path.rsplit("/", 1) else: - from_dir = '' + from_dir = "" from_name = path - + model = self.get(path) - model.pop('path', None) - model.pop('name', None) - if model['type'] == 'directory': + model.pop("path", None) + model.pop("name", None) + if model["type"] == "directory": raise HTTPError(400, "Can't copy directories") - + if to_path is None: to_path = from_dir if self.dir_exists(to_path): - name = copy_pat.sub(u'.', from_name) - to_name = self.increment_filename(name, to_path, insert='-Copy') - to_path = u'{0}/{1}'.format(to_path, to_name) - + name = copy_pat.sub(u".", from_name) + to_name = self.increment_filename(name, to_path, insert="-Copy") + to_path = u"{0}/{1}".format(to_path, to_name) + model = self.save(model, to_path) return model @@ -471,12 +539,12 @@ def trust_notebook(self, path): The path of a notebook """ model = self.get(path) - nb = model['content'] + nb = model["content"] self.log.warning("Trusting notebook %s", path) self.notary.mark_cells(nb, True) self.check_and_sign(nb, path) - def check_and_sign(self, nb, path=''): + def check_and_sign(self, nb, path=""): """Check for trusted cells, and sign the notebook. Called as a part of saving notebooks. @@ -493,7 +561,7 @@ def check_and_sign(self, nb, path=''): else: self.log.warning("Notebook %s is not trusted", path) - def mark_trusted_cells(self, nb, path=''): + def mark_trusted_cells(self, nb, path=""): """Mark cells as trusted if the notebook signature matches. Called as a part of loading notebooks. diff --git a/tests/test_files.py b/tests/test_files.py index e65dc2da93..e34e5cf3ab 100644 --- a/tests/test_files.py +++ b/tests/test_files.py @@ -1,85 +1,68 @@ import os +import io +import zipfile +import tarfile +import pathlib +import shutil import pytest import tornado from .utils import expected_http_error from nbformat import writes -from nbformat.v4 import (new_notebook, - new_markdown_cell, new_code_cell, - new_output) +from nbformat.v4 import new_notebook, new_markdown_cell, new_code_cell, new_output async def test_hidden_files(fetch, serverapp, root_dir): not_hidden = [ - u'å b', - u'å b/ç. d', + u"å b", + u"å b/ç. d", ] hidden = [ - u'.å b', - u'å b/.ç d', + u".å b", + u"å b/.ç d", ] dirs = not_hidden + hidden for d in dirs: - path = root_dir / d.replace('/', os.sep) + path = root_dir / d.replace("/", os.sep) path.mkdir(parents=True, exist_ok=True) - path.joinpath('foo').write_text('foo') - path.joinpath('.foo').write_text('.foo') - + path.joinpath("foo").write_text("foo") + path.joinpath(".foo").write_text(".foo") for d in not_hidden: - path = root_dir / d.replace('/', os.sep) + path = root_dir / d.replace("/", os.sep) - r = await fetch( - 'files', d, 'foo', - method='GET' - ) - assert r.body.decode() == 'foo' + r = await fetch("files", d, "foo", method="GET") + assert r.body.decode() == "foo" with pytest.raises(tornado.httpclient.HTTPClientError) as e: - r = await fetch( - 'files', d, '.foo', - method='GET' - ) + r = await fetch("files", d, ".foo", method="GET") assert expected_http_error(e, 404) - for d in hidden: - path = root_dir / d.replace('/', os.sep) - for foo in ('foo', '.foo'): + path = root_dir / d.replace("/", os.sep) + for foo in ("foo", ".foo"): with pytest.raises(tornado.httpclient.HTTPClientError) as e: - r = await fetch( - 'files', d, foo, - method='GET' - ) + r = await fetch("files", d, foo, method="GET") assert expected_http_error(e, 404) serverapp.contents_manager.allow_hidden = True for d in not_hidden: - path = root_dir / d.replace('/', os.sep) + path = root_dir / d.replace("/", os.sep) - r = await fetch( - 'files', d, 'foo', - method='GET' - ) - assert r.body.decode() == 'foo' + r = await fetch("files", d, "foo", method="GET") + assert r.body.decode() == "foo" - r = await fetch( - 'files', d, '.foo', - method='GET' - ) - assert r.body.decode() == '.foo' + r = await fetch("files", d, ".foo", method="GET") + assert r.body.decode() == ".foo" for d in hidden: - path = root_dir / d.replace('/', os.sep) + path = root_dir / d.replace("/", os.sep) - for foo in ('foo', '.foo'): - r = await fetch( - 'files', d, foo, - method='GET' - ) + for foo in ("foo", ".foo"): + r = await fetch("files", d, foo, method="GET") assert r.body.decode() == foo @@ -87,73 +70,56 @@ async def test_contents_manager(fetch, serverapp, root_dir): "make sure ContentsManager returns right files (ipynb, bin, txt)." nb = new_notebook( cells=[ - new_markdown_cell(u'Created by test ³'), - new_code_cell("print(2*6)", outputs=[ - new_output("stream", text="12"), - ]) + new_markdown_cell(u"Created by test ³"), + new_code_cell("print(2*6)", outputs=[new_output("stream", text="12"),]), ] ) - root_dir.joinpath('testnb.ipynb').write_text(writes(nb, version=4), encoding='utf-8') - root_dir.joinpath('test.bin').write_bytes(b'\xff' + os.urandom(5)) - root_dir.joinpath('test.txt').write_text('foobar') - - r = await fetch( - 'files/testnb.ipynb', - method='GET' + root_dir.joinpath("testnb.ipynb").write_text( + writes(nb, version=4), encoding="utf-8" ) + root_dir.joinpath("test.bin").write_bytes(b"\xff" + os.urandom(5)) + root_dir.joinpath("test.txt").write_text("foobar") + + r = await fetch("files/testnb.ipynb", method="GET") assert r.code == 200 - assert 'print(2*6)' in r.body.decode('utf-8') + assert "print(2*6)" in r.body.decode("utf-8") - r = await fetch( - 'files/test.bin', - method='GET' - ) + r = await fetch("files/test.bin", method="GET") assert r.code == 200 - assert r.headers['content-type'] == 'application/octet-stream' - assert r.body[:1] == b'\xff' + assert r.headers["content-type"] == "application/octet-stream" + assert r.body[:1] == b"\xff" assert len(r.body) == 6 - r = await fetch( - 'files/test.txt', - method='GET' - ) + r = await fetch("files/test.txt", method="GET") assert r.code == 200 - assert r.headers['content-type'] == 'text/plain; charset=UTF-8' - assert r.body.decode() == 'foobar' + assert r.headers["content-type"] == "text/plain; charset=UTF-8" + assert r.body.decode() == "foobar" async def test_download(fetch, serverapp, root_dir): - text = 'hello' - root_dir.joinpath('test.txt').write_text(text) + text = "hello" + root_dir.joinpath("test.txt").write_text(text) - r = await fetch( - 'files', 'test.txt', - method='GET' - ) - disposition = r.headers.get('Content-Disposition', '') - assert 'attachment' not in disposition + r = await fetch("files", "test.txt", method="GET") + disposition = r.headers.get("Content-Disposition", "") + assert "attachment" not in disposition - r = await fetch( - 'files', 'test.txt', - method='GET', - params={'download': True} - ) - disposition = r.headers.get('Content-Disposition', '') - assert 'attachment' in disposition + r = await fetch("files", "test.txt", method="GET", params={"download": True}) + disposition = r.headers.get("Content-Disposition", "") + assert "attachment" in disposition assert "filename*=utf-8''test.txt" in disposition async def test_old_files_redirect(fetch, serverapp, root_dir): """pre-2.0 'files/' prefixed links are properly redirected""" - root_dir.joinpath('files').mkdir(parents=True, exist_ok=True) - root_dir.joinpath('sub', 'files').mkdir(parents=True, exist_ok=True) - + root_dir.joinpath("files").mkdir(parents=True, exist_ok=True) + root_dir.joinpath("sub", "files").mkdir(parents=True, exist_ok=True) - for prefix in ('', 'sub'): - root_dir.joinpath(prefix, 'files', 'f1.txt').write_text(prefix + '/files/f1') - root_dir.joinpath(prefix, 'files', 'f2.txt').write_text(prefix + '/files/f2') - root_dir.joinpath(prefix, 'f2.txt').write_text(prefix + '/f2') - root_dir.joinpath(prefix, 'f3.txt').write_text(prefix + '/f3') + for prefix in ("", "sub"): + root_dir.joinpath(prefix, "files", "f1.txt").write_text(prefix + "/files/f1") + root_dir.joinpath(prefix, "files", "f2.txt").write_text(prefix + "/files/f2") + root_dir.joinpath(prefix, "f2.txt").write_text(prefix + "/f2") + root_dir.joinpath(prefix, "f3.txt").write_text(prefix + "/f3") # These depend on the tree handlers # @@ -171,4 +137,110 @@ async def test_old_files_redirect(fetch, serverapp, root_dir): # r = self.request('GET', 'files/test.txt?download=1') # disposition = r.headers.get('Content-Disposition', '') # self.assertIn('attachment', disposition) - # self.assertIn("filename*=utf-8''test.txt", disposition) \ No newline at end of file + # self.assertIn("filename*=utf-8''test.txt", disposition) + + +async def test_download_directory(fetch, serverapp, root_dir): + # Create a dummy directory. + root_dir = pathlib.Path(root_dir) + archive_dir_path = root_dir / "download-archive-dir" + archive_dir_path.mkdir() + with open(archive_dir_path / "test1.txt", "w") as f: + f.write("hello1") + with open(archive_dir_path / "test2.txt", "w") as f: + f.write("hello2") + with open(archive_dir_path / "test3.md", "w") as f: + f.write("hello3") + + # Try to download the created folder. + file_lists = { + "download-archive-dir/test2.txt", + "download-archive-dir/test1.txt", + "download-archive-dir/test3.md", + } + + archive_formats = { + "zip": "r", + "tgz": "r|gz", + "tar.gz": "r|gz", + "tbz": "r|bz2", + "tbz2": "r|bz2", + "tar.bz": "r|bz2", + "tar.bz2": "r|bz2", + "txz": "r|xz", + "tar.xz": "r|xz", + } + + archive_token = 59487596 + + for archive_format, mode in archive_formats.items(): + params = dict(archiveToken=archive_token, archiveFormat=archive_format) + dir_path = str(archive_dir_path.relative_to(root_dir)) + r = await fetch("directories", dir_path, method="GET", params=params) + assert r.code == 200 + assert r.headers.get("content-type") == "application/octet-stream" + assert r.headers.get("cache-control") == "no-cache" + if archive_format == "zip": + with zipfile.ZipFile(io.BytesIO(r.body), mode=mode) as zf: + assert set(zf.namelist()) == file_lists + else: + with tarfile.open(fileobj=io.BytesIO(r.body), mode=mode) as tf: + assert set(map(lambda m: m.name, tf.getmembers())) == file_lists + + +async def test_extract_directory(fetch, serverapp, root_dir): + + format_mode = { + "zip": "w", + "tgz": "w|gz", + "tar.gz": "w|gz", + "tbz": "w|bz2", + "tbz2": "w|bz2", + "tar.bz": "w|bz2", + "tar.bz2": "w|bz2", + "txz": "w|xz", + "tar.xz": "w|xz", + } + + for archive_format, mode in format_mode.items(): + + # Create a dummy directory. + root_dir = pathlib.Path(root_dir) + archive_dir_path = root_dir / "extract-archive-dir" + archive_dir_path.mkdir() + with open(archive_dir_path / "extract-test1.txt", "w") as f: + f.write("hello1") + with open(archive_dir_path / "extract-test2.txt", "w") as f: + f.write("hello2") + with open(archive_dir_path / "extract-test3.md", "w") as f: + f.write("hello3") + + # Make an archive + archive_path = archive_dir_path.with_suffix("." + archive_format) + if archive_format == "zip": + with zipfile.ZipFile(archive_path, mode=mode) as writer: + for file_path in archive_dir_path.rglob("*"): + if file_path.is_file(): + writer.write(file_path, file_path.relative_to(root_dir)) + else: + with tarfile.open(str(archive_path), mode=mode) as writer: + for file_path in archive_dir_path.rglob("*"): + if file_path.is_file(): + writer.add(str(file_path), str(file_path.relative_to(root_dir))) + + # Remove the directory + shutil.rmtree(archive_dir_path) + + # REST call to extract the archive + relative_archive_path = str(archive_path.relative_to(root_dir)) + print(archive_path) + r = await fetch("extract-directories", relative_archive_path, method="GET") + + assert r.code == 200 + assert archive_dir_path.is_dir() + + n_files = len(list(archive_dir_path.glob("*"))) + assert n_files == 3 + + # Remove the directory after extraction + shutil.rmtree(archive_dir_path) From 8a94b41ad8e210c6efd366bf0ce4e0390881d471 Mon Sep 17 00:00:00 2001 From: Hadrien Mary Date: Mon, 29 Jun 2020 17:01:32 -0400 Subject: [PATCH 02/11] fix py35 compat --- tests/test_files.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_files.py b/tests/test_files.py index e34e5cf3ab..f148aca27c 100644 --- a/tests/test_files.py +++ b/tests/test_files.py @@ -142,7 +142,7 @@ async def test_old_files_redirect(fetch, serverapp, root_dir): async def test_download_directory(fetch, serverapp, root_dir): # Create a dummy directory. - root_dir = pathlib.Path(root_dir) + root_dir = pathlib.Path(str(root_dir)) archive_dir_path = root_dir / "download-archive-dir" archive_dir_path.mkdir() with open(archive_dir_path / "test1.txt", "w") as f: @@ -205,7 +205,7 @@ async def test_extract_directory(fetch, serverapp, root_dir): for archive_format, mode in format_mode.items(): # Create a dummy directory. - root_dir = pathlib.Path(root_dir) + root_dir = pathlib.Path(str(root_dir)) archive_dir_path = root_dir / "extract-archive-dir" archive_dir_path.mkdir() with open(archive_dir_path / "extract-test1.txt", "w") as f: From 469557b49da9adb3c8093722c771afc93d0f9470 Mon Sep 17 00:00:00 2001 From: Hadrien Mary Date: Tue, 7 Jul 2020 07:59:32 -0400 Subject: [PATCH 03/11] run_in_executor for DirectoryHandler --- jupyter_server/files/handlers.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/jupyter_server/files/handlers.py b/jupyter_server/files/handlers.py index 1a6d3fb137..16e21923f9 100644 --- a/jupyter_server/files/handlers.py +++ b/jupyter_server/files/handlers.py @@ -192,17 +192,17 @@ async def get(self, archive_path, include_body=False): # but it does not seem to work here (500 internal HTTP error). # This trategy is nice when compressing very large directory. Any idea how to integrate that is welcome. - # self.flush_cb = ioloop.PeriodicCallback( - # self.flush, ARCHIVE_DOWNLOAD_FLUSH_DELAY - # ) - # self.flush_cb.start() + self.flush_cb = ioloop.PeriodicCallback( + self.flush, ARCHIVE_DOWNLOAD_FLUSH_DELAY + ) + self.flush_cb.start() - self.archive_and_download(archive_path, archive_format, archive_token) + # self.archive_and_download(archive_path, archive_format, archive_token) - # args = (archive_path, archive_format, archive_token) - # yield ioloop.IOLoop.current().run_in_executor( - # None, self.archive_and_download, *args - # ) + args = (archive_path, archive_format, archive_token) + yield ioloop.IOLoop.current().run_in_executor( + None, self.archive_and_download, *args + ) if self.canceled: self.log.info("Download canceled.") @@ -211,7 +211,7 @@ async def get(self, archive_path, include_body=False): self.log.info("Finished downloading {}.".format(archive_filename)) self.set_cookie("archiveToken", archive_token) - # self.flush_cb.stop() + self.flush_cb.stop() self.finish() def archive_and_download(self, archive_path, archive_format, archive_token): @@ -230,7 +230,7 @@ def archive_and_download(self, archive_path, archive_format, archive_token): def on_connection_close(self): super().on_connection_close() self.canceled = True - # self.flush_cb.stop() + self.flush_cb.stop() class ExtractDirectoryHandler(JupyterHandler): From 4282e4cc82d638744d095bab226949c10ab942b7 Mon Sep 17 00:00:00 2001 From: Hadrien Mary Date: Tue, 7 Jul 2020 08:05:24 -0400 Subject: [PATCH 04/11] replace yield by await --- jupyter_server/files/handlers.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/jupyter_server/files/handlers.py b/jupyter_server/files/handlers.py index 16e21923f9..be20bcd46a 100644 --- a/jupyter_server/files/handlers.py +++ b/jupyter_server/files/handlers.py @@ -200,7 +200,7 @@ async def get(self, archive_path, include_body=False): # self.archive_and_download(archive_path, archive_format, archive_token) args = (archive_path, archive_format, archive_token) - yield ioloop.IOLoop.current().run_in_executor( + await ioloop.IOLoop.current().run_in_executor( None, self.archive_and_download, *args ) @@ -235,7 +235,7 @@ def on_connection_close(self): class ExtractDirectoryHandler(JupyterHandler): @web.authenticated - def get(self, archive_path, include_body=False): + async def get(self, archive_path, include_body=False): # /extract-archive/ requests must originate from the same site self.check_xsrf_cookie() @@ -248,10 +248,9 @@ def get(self, archive_path, include_body=False): root_dir = pathlib.Path(cm.root_dir) archive_path = root_dir / url2path(archive_path) - # yield ioloop.IOLoop.current().run_in_executor( - # None, self.extract_archive, archive_path - # ) - self.extract_archive(archive_path) + await ioloop.IOLoop.current().run_in_executor( + None, self.extract_archive, archive_path + ) self.finish() From e85b9044f66ae9c5f642542a652a45c312cfd7d8 Mon Sep 17 00:00:00 2001 From: Hadrien Mary Date: Tue, 7 Jul 2020 08:07:54 -0400 Subject: [PATCH 05/11] doc --- jupyter_server/files/handlers.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/jupyter_server/files/handlers.py b/jupyter_server/files/handlers.py index be20bcd46a..b5addc643f 100644 --- a/jupyter_server/files/handlers.py +++ b/jupyter_server/files/handlers.py @@ -187,11 +187,6 @@ async def get(self, archive_path, include_body=False): ) self.canceled = False - - # The original extension is using `ioloop.PeriodicCallback` and `ioloop.IOLoop.current().run_in_executor` - # but it does not seem to work here (500 internal HTTP error). - # This trategy is nice when compressing very large directory. Any idea how to integrate that is welcome. - self.flush_cb = ioloop.PeriodicCallback( self.flush, ARCHIVE_DOWNLOAD_FLUSH_DELAY ) From 07692729c568cd23e67eaf3f0d1a96d807595001 Mon Sep 17 00:00:00 2001 From: Hadrien Mary Date: Tue, 1 Sep 2020 09:51:52 -0400 Subject: [PATCH 06/11] docstrings --- jupyter_server/files/handlers.py | 22 +++++++++++++++++++-- jupyter_server/services/contents/manager.py | 13 ++++++------ 2 files changed, 27 insertions(+), 8 deletions(-) diff --git a/jupyter_server/files/handlers.py b/jupyter_server/files/handlers.py index b5addc643f..efa040c35f 100644 --- a/jupyter_server/files/handlers.py +++ b/jupyter_server/files/handlers.py @@ -100,6 +100,10 @@ async def get(self, path, include_body=True): class ArchiveStream: + """ArchiveStream is an abstraction layer to a Python archive allowing + to stream archive files. + """ + def __init__(self, handler): self.handler = handler self.position = 0 @@ -120,6 +124,9 @@ def flush(self): def make_writer(handler, archive_format="zip"): + """Given an handler object, create an `ArchiveStream` instance + and setup an archive file object using the specified archive format. + """ fileobj = ArchiveStream(handler) if archive_format == "zip": @@ -139,6 +146,9 @@ def make_writer(handler, archive_format="zip"): def make_reader(archive_path): + """Return the appropriate archive file instance given + the extension's path of `archive_path`. + """ archive_format = "".join(archive_path.suffixes)[1:] @@ -156,6 +166,11 @@ def make_reader(archive_path): class DirectoryHandler(JupyterHandler): + """Download a directory. Since a folder can't be directly downloaded, + it is first archived with or without compression before being downloaded from the client. + The archive format (zip, tar.gz, etc) can be configured within the request. + """ + @web.authenticated async def get(self, archive_path, include_body=False): @@ -192,8 +207,6 @@ async def get(self, archive_path, include_body=False): ) self.flush_cb.start() - # self.archive_and_download(archive_path, archive_format, archive_token) - args = (archive_path, archive_format, archive_token) await ioloop.IOLoop.current().run_in_executor( None, self.archive_and_download, *args @@ -229,6 +242,11 @@ def on_connection_close(self): class ExtractDirectoryHandler(JupyterHandler): + """Extract the content of an archive on the server side. Given an archive on + the server side, this class allows to request extracting the content of the archive. + The archive format is detected from the extension of the archive. + """ + @web.authenticated async def get(self, archive_path, include_body=False): diff --git a/jupyter_server/services/contents/manager.py b/jupyter_server/services/contents/manager.py index a06e7a0412..88ff3abd98 100644 --- a/jupyter_server/services/contents/manager.py +++ b/jupyter_server/services/contents/manager.py @@ -178,11 +178,12 @@ def _default_checkpoints_kwargs(self): klass=RequestHandler, allow_none=True, config=True, - help="""handler class to use when serving raw directory requests. + help="""Handler class to use when serving raw directory requests. - TODO: complete + This handler allows to archive a folder on the server side before + downloading it from the client. - Access to these files should be Authenticated. + Access to these directories should be Authenticated. """, ) @@ -197,11 +198,11 @@ def _default_checkpoints_kwargs(self): klass=RequestHandler, allow_none=True, config=True, - help="""handler class to use when serving raw directory requests. + help=""""Handler class to use when serving raw directory requests. - TODO: complete + This handler allows to extract the content of a folder on the server side. - Access to these files should be Authenticated. + Access to these directories should be Authenticated. """, ) From 626908c542e51b225f8838afb6ead2210560161b Mon Sep 17 00:00:00 2001 From: Hadrien Mary Date: Tue, 1 Sep 2020 12:10:39 -0400 Subject: [PATCH 07/11] add size limit + tests --- jupyter_server/files/handlers.py | 40 +++++++++++++++++++++++++- tests/test_files.py | 49 ++++++++++++++++++++++++++++++-- 2 files changed, 86 insertions(+), 3 deletions(-) diff --git a/jupyter_server/files/handlers.py b/jupyter_server/files/handlers.py index efa040c35f..ff43a4c304 100644 --- a/jupyter_server/files/handlers.py +++ b/jupyter_server/files/handlers.py @@ -86,6 +86,8 @@ async def get(self, path, include_body=True): # The delay in ms at which we send the chunk of data # to the client. ARCHIVE_DOWNLOAD_FLUSH_DELAY = 100 + +# Supported archive format SUPPORTED_FORMAT = [ "zip", "tgz", @@ -99,6 +101,10 @@ async def get(self, path, include_body=True): ] +DEFAULT_DIRECTORY_SIZE_LIMIT = 1073741824 # 1GB +DEFAULT_ARCHIVE_FORMAT = "zip" + + class ArchiveStream: """ArchiveStream is an abstraction layer to a Python archive allowing to stream archive files. @@ -145,6 +151,13 @@ def make_writer(handler, archive_format="zip"): return archive_file +def get_folder_size(dir_path): + """Return the size in bytes of a given directory. + """ + dir_path = pathlib.Path(dir_path) + return sum(f.stat().st_size for f in dir_path.glob("**/*") if f.is_file()) + + def make_reader(archive_path): """Return the appropriate archive file instance given the extension's path of `archive_path`. @@ -183,15 +196,40 @@ async def get(self, archive_path, include_body=False): raise web.HTTPError(404) archive_token = self.get_argument("archiveToken") - archive_format = self.get_argument("archiveFormat", "zip") + archive_format = self.get_argument("archiveFormat", DEFAULT_ARCHIVE_FORMAT) + folder_size_limit = self.get_argument("folderSizeLimit", None) + + # Check whether the specified archive format is supported. if archive_format not in SUPPORTED_FORMAT: self.log.error("Unsupported format {}.".format(archive_format)) raise web.HTTPError(404) + # If the folder size limit is not specified in the request, a + # default size limit is used. + try: + folder_size_limit_num = int(folder_size_limit) + + except (ValueError, TypeError): + self.log.warning( + "folderSizeLimit is a not a valid number: {}.".format(folder_size_limit) + ) + folder_size_limit_num = DEFAULT_DIRECTORY_SIZE_LIMIT + root_dir = pathlib.Path(cm.root_dir) archive_path = root_dir / url2path(archive_path) archive_filename = archive_path.with_suffix(".{}".format(archive_format)).name + # Check whether the archive folder is not larger than the size limit. + folder_size = get_folder_size(archive_path) + print(folder_size) + if folder_size > folder_size_limit_num: + self.log.error( + "Archive folder size is larger than the size limit: {} bytes with a size limit of {}.".format( + folder_size, folder_size_limit_num + ) + ) + raise web.HTTPError(413) + self.log.info( "Prepare {} for archiving and downloading.".format(archive_filename) ) diff --git a/tests/test_files.py b/tests/test_files.py index f148aca27c..73f83e8038 100644 --- a/tests/test_files.py +++ b/tests/test_files.py @@ -140,7 +140,7 @@ async def test_old_files_redirect(fetch, serverapp, root_dir): # self.assertIn("filename*=utf-8''test.txt", disposition) -async def test_download_directory(fetch, serverapp, root_dir): +def make_fake_folder(root_dir): # Create a dummy directory. root_dir = pathlib.Path(str(root_dir)) archive_dir_path = root_dir / "download-archive-dir" @@ -159,6 +159,14 @@ async def test_download_directory(fetch, serverapp, root_dir): "download-archive-dir/test3.md", } + dir_path = str(archive_dir_path.relative_to(root_dir)) + + return dir_path, file_lists + + +async def test_download_directory(fetch, serverapp, root_dir): + dir_path, file_lists = make_fake_folder(root_dir) + archive_formats = { "zip": "r", "tgz": "r|gz", @@ -175,8 +183,8 @@ async def test_download_directory(fetch, serverapp, root_dir): for archive_format, mode in archive_formats.items(): params = dict(archiveToken=archive_token, archiveFormat=archive_format) - dir_path = str(archive_dir_path.relative_to(root_dir)) r = await fetch("directories", dir_path, method="GET", params=params) + assert r.code == 200 assert r.headers.get("content-type") == "application/octet-stream" assert r.headers.get("cache-control") == "no-cache" @@ -188,6 +196,43 @@ async def test_download_directory(fetch, serverapp, root_dir): assert set(map(lambda m: m.name, tf.getmembers())) == file_lists +async def test_download_directory_size_limit(fetch, serverapp, root_dir): + + dir_path, file_lists = make_fake_folder(root_dir) + archive_token = 59487596 + archive_format = "zip" + mode = "r" + + # The above fake folder has a size of 18 + folder_size_limit = 10 + + params = dict( + archiveToken=archive_token, + archiveFormat=archive_format, + folderSizeLimit=folder_size_limit, + ) + + with pytest.raises(tornado.httpclient.HTTPClientError) as e: + await fetch("directories", dir_path, method="GET", params=params) + assert expected_http_error(e, 413) + +async def test_download_wrong_archive_format(fetch, serverapp, root_dir): + + dir_path, file_lists = make_fake_folder(root_dir) + archive_token = 59487596 + archive_format = "a_format_that_does_not_exist" + mode = "r" + + + params = dict( + archiveToken=archive_token, + archiveFormat=archive_format, + ) + + with pytest.raises(tornado.httpclient.HTTPClientError) as e: + await fetch("directories", dir_path, method="GET", params=params) + assert expected_http_error(e, 404) + async def test_extract_directory(fetch, serverapp, root_dir): format_mode = { From c1c62fbc008c68f5104cfb0beb57d24d88f685b7 Mon Sep 17 00:00:00 2001 From: Hadrien Mary Date: Tue, 1 Sep 2020 12:11:29 -0400 Subject: [PATCH 08/11] formatting --- tests/test_files.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/tests/test_files.py b/tests/test_files.py index 73f83e8038..60d859995d 100644 --- a/tests/test_files.py +++ b/tests/test_files.py @@ -216,6 +216,7 @@ async def test_download_directory_size_limit(fetch, serverapp, root_dir): await fetch("directories", dir_path, method="GET", params=params) assert expected_http_error(e, 413) + async def test_download_wrong_archive_format(fetch, serverapp, root_dir): dir_path, file_lists = make_fake_folder(root_dir) @@ -223,16 +224,13 @@ async def test_download_wrong_archive_format(fetch, serverapp, root_dir): archive_format = "a_format_that_does_not_exist" mode = "r" - - params = dict( - archiveToken=archive_token, - archiveFormat=archive_format, - ) + params = dict(archiveToken=archive_token, archiveFormat=archive_format) with pytest.raises(tornado.httpclient.HTTPClientError) as e: await fetch("directories", dir_path, method="GET", params=params) assert expected_http_error(e, 404) + async def test_extract_directory(fetch, serverapp, root_dir): format_mode = { From af58ff3f02e9fe93ac7a324e64bfb6d11b4abc1f Mon Sep 17 00:00:00 2001 From: Hadrien Mary Date: Wed, 2 Sep 2020 08:29:15 -0400 Subject: [PATCH 09/11] remove print --- jupyter_server/files/handlers.py | 1 - 1 file changed, 1 deletion(-) diff --git a/jupyter_server/files/handlers.py b/jupyter_server/files/handlers.py index ff43a4c304..a065669bbb 100644 --- a/jupyter_server/files/handlers.py +++ b/jupyter_server/files/handlers.py @@ -221,7 +221,6 @@ async def get(self, archive_path, include_body=False): # Check whether the archive folder is not larger than the size limit. folder_size = get_folder_size(archive_path) - print(folder_size) if folder_size > folder_size_limit_num: self.log.error( "Archive folder size is larger than the size limit: {} bytes with a size limit of {}.".format( From 6e136520e896bc1b420c8ad1878533ec7a652410 Mon Sep 17 00:00:00 2001 From: Hadrien Mary Date: Thu, 3 Sep 2020 12:07:08 -0400 Subject: [PATCH 10/11] make ExtractDirectoryHandler a PUT method --- jupyter_server/files/handlers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jupyter_server/files/handlers.py b/jupyter_server/files/handlers.py index a065669bbb..fcc8392e4a 100644 --- a/jupyter_server/files/handlers.py +++ b/jupyter_server/files/handlers.py @@ -285,7 +285,7 @@ class ExtractDirectoryHandler(JupyterHandler): """ @web.authenticated - async def get(self, archive_path, include_body=False): + async def put(self, archive_path, include_body=False): # /extract-archive/ requests must originate from the same site self.check_xsrf_cookie() From 7099c20834e89e8cca96a0cd9d594b92f39b0787 Mon Sep 17 00:00:00 2001 From: Hadrien Mary Date: Thu, 3 Sep 2020 12:11:01 -0400 Subject: [PATCH 11/11] add check for directory_handler_class --- jupyter_server/files/handlers.py | 4 ++++ tests/test_files.py | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/jupyter_server/files/handlers.py b/jupyter_server/files/handlers.py index fcc8392e4a..932eaa5534 100644 --- a/jupyter_server/files/handlers.py +++ b/jupyter_server/files/handlers.py @@ -195,6 +195,10 @@ async def get(self, archive_path, include_body=False): self.log.info("Refusing to serve hidden file, via 404 Error") raise web.HTTPError(404) + if not hasattr(cm, "directory_handler_class"): + self.log.info("Method not allowed") + raise web.HTTPError(405) + archive_token = self.get_argument("archiveToken") archive_format = self.get_argument("archiveFormat", DEFAULT_ARCHIVE_FORMAT) folder_size_limit = self.get_argument("folderSizeLimit", None) diff --git a/tests/test_files.py b/tests/test_files.py index 60d859995d..c24bf350e8 100644 --- a/tests/test_files.py +++ b/tests/test_files.py @@ -277,7 +277,7 @@ async def test_extract_directory(fetch, serverapp, root_dir): # REST call to extract the archive relative_archive_path = str(archive_path.relative_to(root_dir)) print(archive_path) - r = await fetch("extract-directories", relative_archive_path, method="GET") + r = await fetch("extract-directories", relative_archive_path, method="PUT") assert r.code == 200 assert archive_dir_path.is_dir()