From bd592a41cca763f85da40063dd4b1caae12a330b Mon Sep 17 00:00:00 2001
From: Hadrien Mary <hadrien.mary@gmail.com>
Date: Mon, 29 Jun 2020 16:51:42 -0400
Subject: [PATCH 01/11] Add DirectoryHandler and ExtractDirectoryHandler +
 tests

---
 jupyter_server/files/handlers.py            | 234 ++++++++++++++--
 jupyter_server/services/contents/manager.py | 286 ++++++++++++--------
 tests/test_files.py                         | 258 +++++++++++-------
 3 files changed, 557 insertions(+), 221 deletions(-)

diff --git a/jupyter_server/files/handlers.py b/jupyter_server/files/handlers.py
index 51b990f238..1a6d3fb137 100644
--- a/jupyter_server/files/handlers.py
+++ b/jupyter_server/files/handlers.py
@@ -3,11 +3,17 @@
 # Copyright (c) Jupyter Development Team.
 # Distributed under the terms of the Modified BSD License.
 
+import os
+import pathlib
 import mimetypes
 import json
+import zipfile
+import tarfile
 from base64 import decodebytes
 from tornado import web
+from tornado import ioloop
 from jupyter_server.base.handlers import JupyterHandler
+from jupyter_server.utils import url2path
 
 
 class FilesHandler(JupyterHandler):
@@ -23,8 +29,10 @@ class FilesHandler(JupyterHandler):
     def content_security_policy(self):
         # In case we're serving HTML/SVG, confine any Javascript to a unique
         # origin so it can't interact with the notebook server.
-        return super(FilesHandler, self).content_security_policy + \
-               "; sandbox allow-scripts"
+        return (
+            super(FilesHandler, self).content_security_policy
+            + "; sandbox allow-scripts"
+        )
 
     @web.authenticated
     def head(self, path):
@@ -38,41 +46,229 @@ async def get(self, path, include_body=True):
             self.log.info("Refusing to serve hidden file, via 404 Error")
             raise web.HTTPError(404)
 
-        path = path.strip('/')
-        if '/' in path:
-            _, name = path.rsplit('/', 1)
+        path = path.strip("/")
+        if "/" in path:
+            _, name = path.rsplit("/", 1)
         else:
             name = path
 
-        model = await cm.get(path, type='file', content=include_body)
+        model = await cm.get(path, type="file", content=include_body)
 
         if self.get_argument("download", False):
             self.set_attachment_header(name)
 
         # get mimetype from filename
-        if name.lower().endswith('.ipynb'):
-            self.set_header('Content-Type', 'application/x-ipynb+json')
+        if name.lower().endswith(".ipynb"):
+            self.set_header("Content-Type", "application/x-ipynb+json")
         else:
             cur_mime = mimetypes.guess_type(name)[0]
-            if cur_mime == 'text/plain':
-                self.set_header('Content-Type', 'text/plain; charset=UTF-8')
+            if cur_mime == "text/plain":
+                self.set_header("Content-Type", "text/plain; charset=UTF-8")
             elif cur_mime is not None:
-                self.set_header('Content-Type', cur_mime)
+                self.set_header("Content-Type", cur_mime)
             else:
-                if model['format'] == 'base64':
-                    self.set_header('Content-Type', 'application/octet-stream')
+                if model["format"] == "base64":
+                    self.set_header("Content-Type", "application/octet-stream")
                 else:
-                    self.set_header('Content-Type', 'text/plain; charset=UTF-8')
+                    self.set_header("Content-Type", "text/plain; charset=UTF-8")
 
         if include_body:
-            if model['format'] == 'base64':
-                b64_bytes = model['content'].encode('ascii')
+            if model["format"] == "base64":
+                b64_bytes = model["content"].encode("ascii")
                 self.write(decodebytes(b64_bytes))
-            elif model['format'] == 'json':
-                self.write(json.dumps(model['content']))
+            elif model["format"] == "json":
+                self.write(json.dumps(model["content"]))
             else:
-                self.write(model['content'])
+                self.write(model["content"])
             self.flush()
 
 
+# The delay in ms at which we send the chunk of data
+# to the client.
+ARCHIVE_DOWNLOAD_FLUSH_DELAY = 100
+SUPPORTED_FORMAT = [
+    "zip",
+    "tgz",
+    "tar.gz",
+    "tbz",
+    "tbz2",
+    "tar.bz",
+    "tar.bz2",
+    "txz",
+    "tar.xz",
+]
+
+
+class ArchiveStream:
+    def __init__(self, handler):
+        self.handler = handler
+        self.position = 0
+
+    def write(self, data):
+        self.position += len(data)
+        self.handler.write(data)
+        del data
+
+    def tell(self):
+        return self.position
+
+    def flush(self):
+        # Note: Flushing is done elsewhere, in the main thread
+        # because `write()` is called in a background thread.
+        # self.handler.flush()
+        pass
+
+
+def make_writer(handler, archive_format="zip"):
+    fileobj = ArchiveStream(handler)
+
+    if archive_format == "zip":
+        archive_file = zipfile.ZipFile(
+            fileobj, mode="w", compression=zipfile.ZIP_DEFLATED
+        )
+        archive_file.add = archive_file.write
+    elif archive_format in ["tgz", "tar.gz"]:
+        archive_file = tarfile.open(fileobj=fileobj, mode="w|gz")
+    elif archive_format in ["tbz", "tbz2", "tar.bz", "tar.bz2"]:
+        archive_file = tarfile.open(fileobj=fileobj, mode="w|bz2")
+    elif archive_format in ["txz", "tar.xz"]:
+        archive_file = tarfile.open(fileobj=fileobj, mode="w|xz")
+    else:
+        raise ValueError("'{}' is not a valid archive format.".format(archive_format))
+    return archive_file
+
+
+def make_reader(archive_path):
+
+    archive_format = "".join(archive_path.suffixes)[1:]
+
+    if archive_format == "zip":
+        archive_file = zipfile.ZipFile(archive_path, mode="r")
+    elif archive_format in ["tgz", "tar.gz"]:
+        archive_file = tarfile.open(archive_path, mode="r|gz")
+    elif archive_format in ["tbz", "tbz2", "tar.bz", "tar.bz2"]:
+        archive_file = tarfile.open(archive_path, mode="r|bz2")
+    elif archive_format in ["txz", "tar.xz"]:
+        archive_file = tarfile.open(archive_path, mode="r|xz")
+    else:
+        raise ValueError("'{}' is not a valid archive format.".format(archive_format))
+    return archive_file
+
+
+class DirectoryHandler(JupyterHandler):
+    @web.authenticated
+    async def get(self, archive_path, include_body=False):
+
+        # /directories/ requests must originate from the same site
+        self.check_xsrf_cookie()
+        cm = self.contents_manager
+
+        if cm.is_hidden(archive_path) and not cm.allow_hidden:
+            self.log.info("Refusing to serve hidden file, via 404 Error")
+            raise web.HTTPError(404)
+
+        archive_token = self.get_argument("archiveToken")
+        archive_format = self.get_argument("archiveFormat", "zip")
+        if archive_format not in SUPPORTED_FORMAT:
+            self.log.error("Unsupported format {}.".format(archive_format))
+            raise web.HTTPError(404)
+
+        root_dir = pathlib.Path(cm.root_dir)
+        archive_path = root_dir / url2path(archive_path)
+        archive_filename = archive_path.with_suffix(".{}".format(archive_format)).name
+
+        self.log.info(
+            "Prepare {} for archiving and downloading.".format(archive_filename)
+        )
+        self.set_header("content-type", "application/octet-stream")
+        self.set_header("cache-control", "no-cache")
+        self.set_header(
+            "content-disposition", "attachment; filename={}".format(archive_filename)
+        )
+
+        self.canceled = False
+
+        # The original extension is using `ioloop.PeriodicCallback` and `ioloop.IOLoop.current().run_in_executor`
+        # but it does not seem to work here (500 internal HTTP error).
+        # This trategy is nice when compressing very large directory. Any idea how to integrate that is welcome.
+
+        # self.flush_cb = ioloop.PeriodicCallback(
+        #     self.flush, ARCHIVE_DOWNLOAD_FLUSH_DELAY
+        # )
+        # self.flush_cb.start()
+
+        self.archive_and_download(archive_path, archive_format, archive_token)
+
+        # args = (archive_path, archive_format, archive_token)
+        # yield ioloop.IOLoop.current().run_in_executor(
+        #     None, self.archive_and_download, *args
+        # )
+
+        if self.canceled:
+            self.log.info("Download canceled.")
+        else:
+            self.flush()
+            self.log.info("Finished downloading {}.".format(archive_filename))
+
+        self.set_cookie("archiveToken", archive_token)
+        # self.flush_cb.stop()
+        self.finish()
+
+    def archive_and_download(self, archive_path, archive_format, archive_token):
+
+        with make_writer(self, archive_format) as archive:
+            prefix = len(str(archive_path.parent)) + len(os.path.sep)
+            for root, _, files in os.walk(archive_path):
+                for file_ in files:
+                    file_name = os.path.join(root, file_)
+                    if not self.canceled:
+                        self.log.debug("{}\n".format(file_name))
+                        archive.add(file_name, os.path.join(root[prefix:], file_))
+                    else:
+                        break
+
+    def on_connection_close(self):
+        super().on_connection_close()
+        self.canceled = True
+        # self.flush_cb.stop()
+
+
+class ExtractDirectoryHandler(JupyterHandler):
+    @web.authenticated
+    def get(self, archive_path, include_body=False):
+
+        # /extract-archive/ requests must originate from the same site
+        self.check_xsrf_cookie()
+        cm = self.contents_manager
+
+        if cm.is_hidden(archive_path) and not cm.allow_hidden:
+            self.log.info("Refusing to serve hidden file, via 404 Error")
+            raise web.HTTPError(404)
+
+        root_dir = pathlib.Path(cm.root_dir)
+        archive_path = root_dir / url2path(archive_path)
+
+        # yield ioloop.IOLoop.current().run_in_executor(
+        #     None, self.extract_archive, archive_path
+        # )
+        self.extract_archive(archive_path)
+
+        self.finish()
+
+    def extract_archive(self, archive_path):
+
+        archive_destination = archive_path.parent
+        self.log.info(
+            "Begin extraction of {} to {}.".format(archive_path, archive_destination)
+        )
+
+        archive_reader = make_reader(archive_path)
+        with archive_reader as archive:
+            archive.extractall(archive_destination)
+
+        self.log.info(
+            "Finished extracting {} to {}.".format(archive_path, archive_destination)
+        )
+
+
 default_handlers = []
diff --git a/jupyter_server/services/contents/manager.py b/jupyter_server/services/contents/manager.py
index 28ac63fcce..a06e7a0412 100644
--- a/jupyter_server/services/contents/manager.py
+++ b/jupyter_server/services/contents/manager.py
@@ -12,6 +12,8 @@
 from tornado.web import HTTPError, RequestHandler
 
 from ...files.handlers import FilesHandler
+from ...files.handlers import DirectoryHandler
+from ...files.handlers import ExtractDirectoryHandler
 from .checkpoints import Checkpoints
 from traitlets.config.configurable import LoggingConfigurable
 from nbformat import sign, validate as validate_nb, ValidationError
@@ -34,7 +36,7 @@
 from jupyter_server.transutils import _
 
 
-copy_pat = re.compile(r'\-Copy\d*\.')
+copy_pat = re.compile(r"\-Copy\d*\.")
 
 
 class ContentsManager(LoggingConfigurable):
@@ -56,34 +58,44 @@ class ContentsManager(LoggingConfigurable):
 
     """
 
-    root_dir = Unicode('/', config=True)
+    root_dir = Unicode("/", config=True)
 
     allow_hidden = Bool(False, config=True, help="Allow access to hidden files")
 
     notary = Instance(sign.NotebookNotary)
+
     def _notary_default(self):
         return sign.NotebookNotary(parent=self)
 
-    hide_globs = List(Unicode(), [
-            u'__pycache__', '*.pyc', '*.pyo',
-            '.DS_Store', '*.so', '*.dylib', '*~',
-        ], config=True, help="""
+    hide_globs = List(
+        Unicode(),
+        [u"__pycache__", "*.pyc", "*.pyo", ".DS_Store", "*.so", "*.dylib", "*~",],
+        config=True,
+        help="""
         Glob patterns to hide in file and directory listings.
-    """)
+    """,
+    )
 
-    untitled_notebook = Unicode(_("Untitled"), config=True,
-        help="The base name used when creating untitled notebooks."
+    untitled_notebook = Unicode(
+        _("Untitled"),
+        config=True,
+        help="The base name used when creating untitled notebooks.",
     )
 
-    untitled_file = Unicode("untitled", config=True,
-        help="The base name used when creating untitled files."
+    untitled_file = Unicode(
+        "untitled", config=True, help="The base name used when creating untitled files."
     )
 
-    untitled_directory = Unicode("Untitled Folder", config=True,
-        help="The base name used when creating untitled directories."
+    untitled_directory = Unicode(
+        "Untitled Folder",
+        config=True,
+        help="The base name used when creating untitled directories.",
     )
 
-    pre_save_hook = Any(None, config=True, allow_none=True,
+    pre_save_hook = Any(
+        None,
+        config=True,
+        allow_none=True,
         help="""Python callable or importstring thereof
 
         To be called on a contents model prior to save.
@@ -100,12 +112,12 @@ def _notary_default(self):
           Modifying this dict will affect the file that is stored.
         - path: the API path of the save destination
         - contents_manager: this ContentsManager instance
-        """
+        """,
     )
 
-    @validate('pre_save_hook')
+    @validate("pre_save_hook")
     def _validate_pre_save_hook(self, proposal):
-        value = proposal['value']
+        value = proposal["value"]
         if isinstance(value, string_types):
             value = import_item(self.pre_save_hook)
         if not callable(value):
@@ -117,7 +129,9 @@ def run_pre_save_hook(self, model, path, **kwargs):
         if self.pre_save_hook:
             try:
                 self.log.debug("Running pre-save hook on %s", path)
-                self.pre_save_hook(model=model, path=path, contents_manager=self, **kwargs)
+                self.pre_save_hook(
+                    model=model, path=path, contents_manager=self, **kwargs
+                )
             except Exception:
                 self.log.error("Pre-save hook failed on %s", path, exc_info=True)
 
@@ -125,19 +139,19 @@ def run_pre_save_hook(self, model, path, **kwargs):
     checkpoints = Instance(Checkpoints, config=True)
     checkpoints_kwargs = Dict(config=True)
 
-    @default('checkpoints')
+    @default("checkpoints")
     def _default_checkpoints(self):
         return self.checkpoints_class(**self.checkpoints_kwargs)
 
-    @default('checkpoints_kwargs')
+    @default("checkpoints_kwargs")
     def _default_checkpoints_kwargs(self):
-        return dict(
-            parent=self,
-            log=self.log,
-        )
+        return dict(parent=self, log=self.log,)
 
     files_handler_class = Type(
-        FilesHandler, klass=RequestHandler, allow_none=True, config=True,
+        FilesHandler,
+        klass=RequestHandler,
+        allow_none=True,
+        config=True,
         help="""handler class to use when serving raw file requests.
 
         Default is a fallback that talks to the ContentsManager API,
@@ -147,7 +161,7 @@ def _default_checkpoints_kwargs(self):
         which will be much more efficient.
 
         Access to these files should be Authenticated.
-        """
+        """,
     )
 
     files_handler_params = Dict(
@@ -156,7 +170,45 @@ def _default_checkpoints_kwargs(self):
 
         For example, StaticFileHandlers generally expect a `path` argument
         specifying the root directory from which to serve files.
-        """
+        """,
+    )
+
+    directory_handler_class = Type(
+        DirectoryHandler,
+        klass=RequestHandler,
+        allow_none=True,
+        config=True,
+        help="""handler class to use when serving raw directory requests.
+
+        TODO: complete
+
+        Access to these files should be Authenticated.
+        """,
+    )
+
+    directory_handler_params = Dict(
+        config=True,
+        help="""Extra parameters to pass to directory_handler_class.
+        """,
+    )
+
+    extract_directory_handler_class = Type(
+        ExtractDirectoryHandler,
+        klass=RequestHandler,
+        allow_none=True,
+        config=True,
+        help="""handler class to use when serving raw directory requests.
+
+        TODO: complete
+
+        Access to these files should be Authenticated.
+        """,
+    )
+
+    extract_directory_handler_params = Dict(
+        config=True,
+        help="""Extra parameters to pass to extract_directory_handler_class.
+        """,
     )
 
     def get_extra_handlers(self):
@@ -169,6 +221,20 @@ def get_extra_handlers(self):
             handlers.append(
                 (r"/files/(.*)", self.files_handler_class, self.files_handler_params)
             )
+            handlers.append(
+                (
+                    r"/directories/(.*)",
+                    self.directory_handler_class,
+                    self.directory_handler_params,
+                )
+            )
+            handlers.append(
+                (
+                    r"/extract-directories/(.*)",
+                    self.extract_directory_handler_class,
+                    self.extract_directory_handler_params,
+                )
+            )
         return handlers
 
     # ContentsManager API part 1: methods that must be
@@ -210,7 +276,7 @@ def is_hidden(self, path):
         """
         raise NotImplementedError
 
-    def file_exists(self, path=''):
+    def file_exists(self, path=""):
         """Does a file exist at the given path?
 
         Like os.path.isfile
@@ -227,7 +293,7 @@ def file_exists(self, path=''):
         exists : bool
             Whether the file exists.
         """
-        raise NotImplementedError('must be implemented in a subclass')
+        raise NotImplementedError("must be implemented in a subclass")
 
     def exists(self, path):
         """Does a file or directory exist at the given path?
@@ -248,7 +314,7 @@ def exists(self, path):
 
     def get(self, path, content=True, type=None, format=None):
         """Get a file or directory model."""
-        raise NotImplementedError('must be implemented in a subclass')
+        raise NotImplementedError("must be implemented in a subclass")
 
     def save(self, model, path):
         """
@@ -258,22 +324,22 @@ def save(self, model, path):
         should call self.run_pre_save_hook(model=model, path=path) prior to
         writing any data.
         """
-        raise NotImplementedError('must be implemented in a subclass')
+        raise NotImplementedError("must be implemented in a subclass")
 
     def delete_file(self, path):
         """Delete the file or directory at path."""
-        raise NotImplementedError('must be implemented in a subclass')
+        raise NotImplementedError("must be implemented in a subclass")
 
     def rename_file(self, old_path, new_path):
         """Rename a file or directory."""
-        raise NotImplementedError('must be implemented in a subclass')
+        raise NotImplementedError("must be implemented in a subclass")
 
     # ContentsManager API part 2: methods that have useable default
     # implementations, but can be overridden in subclasses.
 
     def delete(self, path):
         """Delete a file/directory and any associated checkpoints."""
-        path = path.strip('/')
+        path = path.strip("/")
         if not path:
             raise HTTPError(400, "Can't delete root")
         self.delete_file(path)
@@ -290,8 +356,8 @@ def update(self, model, path):
         For use in PATCH requests, to enable renaming a file without
         re-uploading its contents. Only used for renaming at the moment.
         """
-        path = path.strip('/')
-        new_path = model.get('path', path).strip('/')
+        path = path.strip("/")
+        new_path = model.get("path", path).strip("/")
         if path != new_path:
             self.rename(path, new_path)
         model = self.get(new_path, content=False)
@@ -310,9 +376,9 @@ def get_kernel_path(self, path, model=None):
         notebook server. FileContentsManager overrides this to use the
         directory containing the notebook.
         """
-        return ''
+        return ""
 
-    def increment_filename(self, filename, path='', insert=''):
+    def increment_filename(self, filename, path="", insert=""):
         """Increment a filename until it is unique.
 
         Parameters
@@ -330,96 +396,98 @@ def increment_filename(self, filename, path='', insert=''):
             A filename that is unique, based on the input filename.
         """
         # Extract the full suffix from the filename (e.g. .tar.gz)
-        path = path.strip('/')
-        basename, dot, ext = filename.rpartition('.')
-        if ext != 'ipynb':
-                basename, dot, ext = filename.partition('.')
-                
+        path = path.strip("/")
+        basename, dot, ext = filename.rpartition(".")
+        if ext != "ipynb":
+            basename, dot, ext = filename.partition(".")
+
         suffix = dot + ext
 
         for i in itertools.count():
             if i:
-                insert_i = '{}{}'.format(insert, i)
+                insert_i = "{}{}".format(insert, i)
             else:
-                insert_i = ''
-            name = u'{basename}{insert}{suffix}'.format(basename=basename,
-                insert=insert_i, suffix=suffix)
-            if not self.exists(u'{}/{}'.format(path, name)):
+                insert_i = ""
+            name = u"{basename}{insert}{suffix}".format(
+                basename=basename, insert=insert_i, suffix=suffix
+            )
+            if not self.exists(u"{}/{}".format(path, name)):
                 break
         return name
 
     def validate_notebook_model(self, model):
         """Add failed-validation message to model"""
         try:
-            validate_nb(model['content'])
+            validate_nb(model["content"])
         except ValidationError as e:
-            model['message'] = u'Notebook validation failed: {}:\n{}'.format(
-                e.message, json.dumps(e.instance, indent=1, default=lambda obj: '<UNKNOWN>'),
+            model["message"] = u"Notebook validation failed: {}:\n{}".format(
+                e.message,
+                json.dumps(e.instance, indent=1, default=lambda obj: "<UNKNOWN>"),
             )
         return model
-    
-    def new_untitled(self, path='', type='', ext=''):
+
+    def new_untitled(self, path="", type="", ext=""):
         """Create a new untitled file or directory in path
-        
+
         path must be a directory
-        
+
         File extension can be specified.
-        
+
         Use `new` to create files with a fully specified path (including filename).
         """
-        path = path.strip('/')
+        path = path.strip("/")
         if not self.dir_exists(path):
-            raise HTTPError(404, 'No such directory: %s' % path)
-        
+            raise HTTPError(404, "No such directory: %s" % path)
+
         model = {}
         if type:
-            model['type'] = type
-        
-        if ext == '.ipynb':
-            model.setdefault('type', 'notebook')
+            model["type"] = type
+
+        if ext == ".ipynb":
+            model.setdefault("type", "notebook")
         else:
-            model.setdefault('type', 'file')
-        
-        insert = ''
-        if model['type'] == 'directory':
+            model.setdefault("type", "file")
+
+        insert = ""
+        if model["type"] == "directory":
             untitled = self.untitled_directory
-            insert = ' '
-        elif model['type'] == 'notebook':
+            insert = " "
+        elif model["type"] == "notebook":
             untitled = self.untitled_notebook
-            ext = '.ipynb'
-        elif model['type'] == 'file':
+            ext = ".ipynb"
+        elif model["type"] == "file":
             untitled = self.untitled_file
         else:
-            raise HTTPError(400, "Unexpected model type: %r" % model['type'])
-        
+            raise HTTPError(400, "Unexpected model type: %r" % model["type"])
+
         name = self.increment_filename(untitled + ext, path, insert=insert)
-        path = u'{0}/{1}'.format(path, name)
+        path = u"{0}/{1}".format(path, name)
         return self.new(model, path)
-    
-    def new(self, model=None, path=''):
+
+    def new(self, model=None, path=""):
         """Create a new file or directory and return its model with no content.
-        
+
         To create a new untitled entity in a directory, use `new_untitled`.
         """
-        path = path.strip('/')
+        path = path.strip("/")
         if model is None:
             model = {}
-        
-        if path.endswith('.ipynb'):
-            model.setdefault('type', 'notebook')
+
+        if path.endswith(".ipynb"):
+            model.setdefault("type", "notebook")
         else:
-            model.setdefault('type', 'file')
-        
+            model.setdefault("type", "file")
+
         # no content, not a directory, so fill out new-file model
-        if 'content' not in model and model['type'] != 'directory':
-            if model['type'] == 'notebook':
-                model['content'] = new_notebook()
-                model['format'] = 'json'
+        if "content" not in model and model["type"] != "directory":
+            if model["type"] == "notebook":
+                model["content"] = new_notebook()
+                model["format"] = "json"
             else:
-                model['content'] = ''
-                model['type'] = 'file'
-                model['format'] = 'text'
-        
+                model["content"] = ""
+                model["type"] = "file"
+                model["format"] = "text"
+
         model = self.save(model, path)
         return model
 
@@ -429,33 +497,33 @@ def copy(self, from_path, to_path=None):
         If to_path not specified, it will be the parent directory of from_path.
         If to_path is a directory, filename will increment `from_path-Copy#.ext`.
         Considering multi-part extensions, the Copy# part will be placed before the first dot for all the extensions except `ipynb`.
-        For easier manual searching in case of notebooks, the Copy# part will be placed before the last dot. 
+        For easier manual searching in case of notebooks, the Copy# part will be placed before the last dot.
 
         from_path must be a full path to a file.
         """
-        path = from_path.strip('/')
+        path = from_path.strip("/")
         if to_path is not None:
-            to_path = to_path.strip('/')
+            to_path = to_path.strip("/")
 
-        if '/' in path:
-            from_dir, from_name = path.rsplit('/', 1)
+        if "/" in path:
+            from_dir, from_name = path.rsplit("/", 1)
         else:
-            from_dir = ''
+            from_dir = ""
             from_name = path
-        
+
         model = self.get(path)
-        model.pop('path', None)
-        model.pop('name', None)
-        if model['type'] == 'directory':
+        model.pop("path", None)
+        model.pop("name", None)
+        if model["type"] == "directory":
             raise HTTPError(400, "Can't copy directories")
-        
+
         if to_path is None:
             to_path = from_dir
         if self.dir_exists(to_path):
-            name = copy_pat.sub(u'.', from_name)
-            to_name = self.increment_filename(name, to_path, insert='-Copy')
-            to_path = u'{0}/{1}'.format(to_path, to_name)
-        
+            name = copy_pat.sub(u".", from_name)
+            to_name = self.increment_filename(name, to_path, insert="-Copy")
+            to_path = u"{0}/{1}".format(to_path, to_name)
+
         model = self.save(model, to_path)
         return model
 
@@ -471,12 +539,12 @@ def trust_notebook(self, path):
             The path of a notebook
         """
         model = self.get(path)
-        nb = model['content']
+        nb = model["content"]
         self.log.warning("Trusting notebook %s", path)
         self.notary.mark_cells(nb, True)
         self.check_and_sign(nb, path)
 
-    def check_and_sign(self, nb, path=''):
+    def check_and_sign(self, nb, path=""):
         """Check for trusted cells, and sign the notebook.
 
         Called as a part of saving notebooks.
@@ -493,7 +561,7 @@ def check_and_sign(self, nb, path=''):
         else:
             self.log.warning("Notebook %s is not trusted", path)
 
-    def mark_trusted_cells(self, nb, path=''):
+    def mark_trusted_cells(self, nb, path=""):
         """Mark cells as trusted if the notebook signature matches.
 
         Called as a part of loading notebooks.
diff --git a/tests/test_files.py b/tests/test_files.py
index e65dc2da93..e34e5cf3ab 100644
--- a/tests/test_files.py
+++ b/tests/test_files.py
@@ -1,85 +1,68 @@
 import os
+import io
+import zipfile
+import tarfile
+import pathlib
+import shutil
 import pytest
 import tornado
 
 from .utils import expected_http_error
 
 from nbformat import writes
-from nbformat.v4 import (new_notebook,
-                         new_markdown_cell, new_code_cell,
-                         new_output)
+from nbformat.v4 import new_notebook, new_markdown_cell, new_code_cell, new_output
 
 
 async def test_hidden_files(fetch, serverapp, root_dir):
     not_hidden = [
-        u'å b',
-        u'å b/ç. d',
+        u"å b",
+        u"å b/ç. d",
     ]
     hidden = [
-        u'.å b',
-        u'å b/.ç d',
+        u".å b",
+        u"å b/.ç d",
     ]
     dirs = not_hidden + hidden
 
     for d in dirs:
-        path  = root_dir / d.replace('/', os.sep)
+        path = root_dir / d.replace("/", os.sep)
         path.mkdir(parents=True, exist_ok=True)
-        path.joinpath('foo').write_text('foo')
-        path.joinpath('.foo').write_text('.foo')
-
+        path.joinpath("foo").write_text("foo")
+        path.joinpath(".foo").write_text(".foo")
 
     for d in not_hidden:
-        path = root_dir / d.replace('/', os.sep)
+        path = root_dir / d.replace("/", os.sep)
 
-        r = await fetch(
-            'files', d, 'foo',
-            method='GET'
-        )
-        assert r.body.decode() == 'foo'
+        r = await fetch("files", d, "foo", method="GET")
+        assert r.body.decode() == "foo"
 
         with pytest.raises(tornado.httpclient.HTTPClientError) as e:
-            r = await fetch(
-                'files', d, '.foo',
-                method='GET'
-            )
+            r = await fetch("files", d, ".foo", method="GET")
         assert expected_http_error(e, 404)
 
-
     for d in hidden:
-        path = root_dir / d.replace('/', os.sep)
-        for foo in ('foo', '.foo'):
+        path = root_dir / d.replace("/", os.sep)
+        for foo in ("foo", ".foo"):
             with pytest.raises(tornado.httpclient.HTTPClientError) as e:
-                r = await fetch(
-                    'files', d, foo,
-                    method='GET'
-                )
+                r = await fetch("files", d, foo, method="GET")
             assert expected_http_error(e, 404)
 
     serverapp.contents_manager.allow_hidden = True
 
     for d in not_hidden:
-        path = root_dir / d.replace('/', os.sep)
+        path = root_dir / d.replace("/", os.sep)
 
-        r = await fetch(
-            'files', d, 'foo',
-            method='GET'
-        )
-        assert r.body.decode() == 'foo'
+        r = await fetch("files", d, "foo", method="GET")
+        assert r.body.decode() == "foo"
 
-        r = await fetch(
-            'files', d, '.foo',
-            method='GET'
-        )
-        assert r.body.decode() == '.foo'
+        r = await fetch("files", d, ".foo", method="GET")
+        assert r.body.decode() == ".foo"
 
         for d in hidden:
-            path = root_dir / d.replace('/', os.sep)
+            path = root_dir / d.replace("/", os.sep)
 
-            for foo in ('foo', '.foo'):
-                r = await fetch(
-                    'files', d, foo,
-                    method='GET'
-                )
+            for foo in ("foo", ".foo"):
+                r = await fetch("files", d, foo, method="GET")
                 assert r.body.decode() == foo
 
 
@@ -87,73 +70,56 @@ async def test_contents_manager(fetch, serverapp, root_dir):
     "make sure ContentsManager returns right files (ipynb, bin, txt)."
     nb = new_notebook(
         cells=[
-            new_markdown_cell(u'Created by test ³'),
-            new_code_cell("print(2*6)", outputs=[
-                new_output("stream", text="12"),
-            ])
+            new_markdown_cell(u"Created by test ³"),
+            new_code_cell("print(2*6)", outputs=[new_output("stream", text="12"),]),
         ]
     )
-    root_dir.joinpath('testnb.ipynb').write_text(writes(nb, version=4), encoding='utf-8')
-    root_dir.joinpath('test.bin').write_bytes(b'\xff' + os.urandom(5))
-    root_dir.joinpath('test.txt').write_text('foobar')
-
-    r = await fetch(
-        'files/testnb.ipynb',
-        method='GET'
+    root_dir.joinpath("testnb.ipynb").write_text(
+        writes(nb, version=4), encoding="utf-8"
     )
+    root_dir.joinpath("test.bin").write_bytes(b"\xff" + os.urandom(5))
+    root_dir.joinpath("test.txt").write_text("foobar")
+
+    r = await fetch("files/testnb.ipynb", method="GET")
     assert r.code == 200
-    assert 'print(2*6)' in r.body.decode('utf-8')
+    assert "print(2*6)" in r.body.decode("utf-8")
 
-    r = await fetch(
-        'files/test.bin',
-        method='GET'
-    )
+    r = await fetch("files/test.bin", method="GET")
     assert r.code == 200
-    assert r.headers['content-type'] == 'application/octet-stream'
-    assert r.body[:1] == b'\xff'
+    assert r.headers["content-type"] == "application/octet-stream"
+    assert r.body[:1] == b"\xff"
     assert len(r.body) == 6
 
-    r = await fetch(
-        'files/test.txt',
-        method='GET'
-    )
+    r = await fetch("files/test.txt", method="GET")
     assert r.code == 200
-    assert r.headers['content-type'] == 'text/plain; charset=UTF-8'
-    assert r.body.decode() == 'foobar'
+    assert r.headers["content-type"] == "text/plain; charset=UTF-8"
+    assert r.body.decode() == "foobar"
 
 
 async def test_download(fetch, serverapp, root_dir):
-    text = 'hello'
-    root_dir.joinpath('test.txt').write_text(text)
+    text = "hello"
+    root_dir.joinpath("test.txt").write_text(text)
 
-    r = await fetch(
-        'files', 'test.txt',
-        method='GET'
-    )
-    disposition = r.headers.get('Content-Disposition', '')
-    assert 'attachment' not in disposition
+    r = await fetch("files", "test.txt", method="GET")
+    disposition = r.headers.get("Content-Disposition", "")
+    assert "attachment" not in disposition
 
-    r = await fetch(
-        'files', 'test.txt',
-        method='GET',
-        params={'download': True}
-    )
-    disposition = r.headers.get('Content-Disposition', '')
-    assert 'attachment' in disposition
+    r = await fetch("files", "test.txt", method="GET", params={"download": True})
+    disposition = r.headers.get("Content-Disposition", "")
+    assert "attachment" in disposition
     assert "filename*=utf-8''test.txt" in disposition
 
 
 async def test_old_files_redirect(fetch, serverapp, root_dir):
     """pre-2.0 'files/' prefixed links are properly redirected"""
-    root_dir.joinpath('files').mkdir(parents=True, exist_ok=True)
-    root_dir.joinpath('sub', 'files').mkdir(parents=True, exist_ok=True)
-
+    root_dir.joinpath("files").mkdir(parents=True, exist_ok=True)
+    root_dir.joinpath("sub", "files").mkdir(parents=True, exist_ok=True)
 
-    for prefix in ('', 'sub'):
-        root_dir.joinpath(prefix, 'files', 'f1.txt').write_text(prefix + '/files/f1')
-        root_dir.joinpath(prefix, 'files', 'f2.txt').write_text(prefix + '/files/f2')
-        root_dir.joinpath(prefix, 'f2.txt').write_text(prefix + '/f2')
-        root_dir.joinpath(prefix, 'f3.txt').write_text(prefix + '/f3')
+    for prefix in ("", "sub"):
+        root_dir.joinpath(prefix, "files", "f1.txt").write_text(prefix + "/files/f1")
+        root_dir.joinpath(prefix, "files", "f2.txt").write_text(prefix + "/files/f2")
+        root_dir.joinpath(prefix, "f2.txt").write_text(prefix + "/f2")
+        root_dir.joinpath(prefix, "f3.txt").write_text(prefix + "/f3")
 
     # These depend on the tree handlers
     #
@@ -171,4 +137,110 @@ async def test_old_files_redirect(fetch, serverapp, root_dir):
     #     r = self.request('GET', 'files/test.txt?download=1')
     #     disposition = r.headers.get('Content-Disposition', '')
     #     self.assertIn('attachment', disposition)
-    #     self.assertIn("filename*=utf-8''test.txt", disposition)
\ No newline at end of file
+    #     self.assertIn("filename*=utf-8''test.txt", disposition)
+
+
+async def test_download_directory(fetch, serverapp, root_dir):
+    # Create a dummy directory.
+    root_dir = pathlib.Path(root_dir)
+    archive_dir_path = root_dir / "download-archive-dir"
+    archive_dir_path.mkdir()
+    with open(archive_dir_path / "test1.txt", "w") as f:
+        f.write("hello1")
+    with open(archive_dir_path / "test2.txt", "w") as f:
+        f.write("hello2")
+    with open(archive_dir_path / "test3.md", "w") as f:
+        f.write("hello3")
+
+    # Try to download the created folder.
+    file_lists = {
+        "download-archive-dir/test2.txt",
+        "download-archive-dir/test1.txt",
+        "download-archive-dir/test3.md",
+    }
+
+    archive_formats = {
+        "zip": "r",
+        "tgz": "r|gz",
+        "tar.gz": "r|gz",
+        "tbz": "r|bz2",
+        "tbz2": "r|bz2",
+        "tar.bz": "r|bz2",
+        "tar.bz2": "r|bz2",
+        "txz": "r|xz",
+        "tar.xz": "r|xz",
+    }
+
+    archive_token = 59487596
+
+    for archive_format, mode in archive_formats.items():
+        params = dict(archiveToken=archive_token, archiveFormat=archive_format)
+        dir_path = str(archive_dir_path.relative_to(root_dir))
+        r = await fetch("directories", dir_path, method="GET", params=params)
+        assert r.code == 200
+        assert r.headers.get("content-type") == "application/octet-stream"
+        assert r.headers.get("cache-control") == "no-cache"
+        if archive_format == "zip":
+            with zipfile.ZipFile(io.BytesIO(r.body), mode=mode) as zf:
+                assert set(zf.namelist()) == file_lists
+        else:
+            with tarfile.open(fileobj=io.BytesIO(r.body), mode=mode) as tf:
+                assert set(map(lambda m: m.name, tf.getmembers())) == file_lists
+
+
+async def test_extract_directory(fetch, serverapp, root_dir):
+
+    format_mode = {
+        "zip": "w",
+        "tgz": "w|gz",
+        "tar.gz": "w|gz",
+        "tbz": "w|bz2",
+        "tbz2": "w|bz2",
+        "tar.bz": "w|bz2",
+        "tar.bz2": "w|bz2",
+        "txz": "w|xz",
+        "tar.xz": "w|xz",
+    }
+
+    for archive_format, mode in format_mode.items():
+
+        # Create a dummy directory.
+        root_dir = pathlib.Path(root_dir)
+        archive_dir_path = root_dir / "extract-archive-dir"
+        archive_dir_path.mkdir()
+        with open(archive_dir_path / "extract-test1.txt", "w") as f:
+            f.write("hello1")
+        with open(archive_dir_path / "extract-test2.txt", "w") as f:
+            f.write("hello2")
+        with open(archive_dir_path / "extract-test3.md", "w") as f:
+            f.write("hello3")
+
+        # Make an archive
+        archive_path = archive_dir_path.with_suffix("." + archive_format)
+        if archive_format == "zip":
+            with zipfile.ZipFile(archive_path, mode=mode) as writer:
+                for file_path in archive_dir_path.rglob("*"):
+                    if file_path.is_file():
+                        writer.write(file_path, file_path.relative_to(root_dir))
+        else:
+            with tarfile.open(str(archive_path), mode=mode) as writer:
+                for file_path in archive_dir_path.rglob("*"):
+                    if file_path.is_file():
+                        writer.add(str(file_path), str(file_path.relative_to(root_dir)))
+
+        # Remove the directory
+        shutil.rmtree(archive_dir_path)
+
+        # REST call to extract the archive
+        relative_archive_path = str(archive_path.relative_to(root_dir))
+        print(archive_path)
+        r = await fetch("extract-directories", relative_archive_path, method="GET")
+
+        assert r.code == 200
+        assert archive_dir_path.is_dir()
+
+        n_files = len(list(archive_dir_path.glob("*")))
+        assert n_files == 3
+
+        # Remove the directory after extraction
+        shutil.rmtree(archive_dir_path)

From 8a94b41ad8e210c6efd366bf0ce4e0390881d471 Mon Sep 17 00:00:00 2001
From: Hadrien Mary <hadrien.mary@gmail.com>
Date: Mon, 29 Jun 2020 17:01:32 -0400
Subject: [PATCH 02/11] fix py35 compat

---
 tests/test_files.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_files.py b/tests/test_files.py
index e34e5cf3ab..f148aca27c 100644
--- a/tests/test_files.py
+++ b/tests/test_files.py
@@ -142,7 +142,7 @@ async def test_old_files_redirect(fetch, serverapp, root_dir):
 
 async def test_download_directory(fetch, serverapp, root_dir):
     # Create a dummy directory.
-    root_dir = pathlib.Path(root_dir)
+    root_dir = pathlib.Path(str(root_dir))
     archive_dir_path = root_dir / "download-archive-dir"
     archive_dir_path.mkdir()
     with open(archive_dir_path / "test1.txt", "w") as f:
@@ -205,7 +205,7 @@ async def test_extract_directory(fetch, serverapp, root_dir):
     for archive_format, mode in format_mode.items():
 
         # Create a dummy directory.
-        root_dir = pathlib.Path(root_dir)
+        root_dir = pathlib.Path(str(root_dir))
         archive_dir_path = root_dir / "extract-archive-dir"
         archive_dir_path.mkdir()
         with open(archive_dir_path / "extract-test1.txt", "w") as f:

From 469557b49da9adb3c8093722c771afc93d0f9470 Mon Sep 17 00:00:00 2001
From: Hadrien Mary <hadrien.mary@gmail.com>
Date: Tue, 7 Jul 2020 07:59:32 -0400
Subject: [PATCH 03/11] run_in_executor for DirectoryHandler

---
 jupyter_server/files/handlers.py | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/jupyter_server/files/handlers.py b/jupyter_server/files/handlers.py
index 1a6d3fb137..16e21923f9 100644
--- a/jupyter_server/files/handlers.py
+++ b/jupyter_server/files/handlers.py
@@ -192,17 +192,17 @@ async def get(self, archive_path, include_body=False):
         # but it does not seem to work here (500 internal HTTP error).
         # This trategy is nice when compressing very large directory. Any idea how to integrate that is welcome.
 
-        # self.flush_cb = ioloop.PeriodicCallback(
-        #     self.flush, ARCHIVE_DOWNLOAD_FLUSH_DELAY
-        # )
-        # self.flush_cb.start()
+        self.flush_cb = ioloop.PeriodicCallback(
+            self.flush, ARCHIVE_DOWNLOAD_FLUSH_DELAY
+        )
+        self.flush_cb.start()
 
-        self.archive_and_download(archive_path, archive_format, archive_token)
+        # self.archive_and_download(archive_path, archive_format, archive_token)
 
-        # args = (archive_path, archive_format, archive_token)
-        # yield ioloop.IOLoop.current().run_in_executor(
-        #     None, self.archive_and_download, *args
-        # )
+        args = (archive_path, archive_format, archive_token)
+        yield ioloop.IOLoop.current().run_in_executor(
+            None, self.archive_and_download, *args
+        )
 
         if self.canceled:
             self.log.info("Download canceled.")
@@ -211,7 +211,7 @@ async def get(self, archive_path, include_body=False):
             self.log.info("Finished downloading {}.".format(archive_filename))
 
         self.set_cookie("archiveToken", archive_token)
-        # self.flush_cb.stop()
+        self.flush_cb.stop()
         self.finish()
 
     def archive_and_download(self, archive_path, archive_format, archive_token):
@@ -230,7 +230,7 @@ def archive_and_download(self, archive_path, archive_format, archive_token):
     def on_connection_close(self):
         super().on_connection_close()
         self.canceled = True
-        # self.flush_cb.stop()
+        self.flush_cb.stop()
 
 
 class ExtractDirectoryHandler(JupyterHandler):

From 4282e4cc82d638744d095bab226949c10ab942b7 Mon Sep 17 00:00:00 2001
From: Hadrien Mary <hadrien.mary@gmail.com>
Date: Tue, 7 Jul 2020 08:05:24 -0400
Subject: [PATCH 04/11] replace yield by await

---
 jupyter_server/files/handlers.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/jupyter_server/files/handlers.py b/jupyter_server/files/handlers.py
index 16e21923f9..be20bcd46a 100644
--- a/jupyter_server/files/handlers.py
+++ b/jupyter_server/files/handlers.py
@@ -200,7 +200,7 @@ async def get(self, archive_path, include_body=False):
         # self.archive_and_download(archive_path, archive_format, archive_token)
 
         args = (archive_path, archive_format, archive_token)
-        yield ioloop.IOLoop.current().run_in_executor(
+        await ioloop.IOLoop.current().run_in_executor(
             None, self.archive_and_download, *args
         )
 
@@ -235,7 +235,7 @@ def on_connection_close(self):
 
 class ExtractDirectoryHandler(JupyterHandler):
     @web.authenticated
-    def get(self, archive_path, include_body=False):
+    async def get(self, archive_path, include_body=False):
 
         # /extract-archive/ requests must originate from the same site
         self.check_xsrf_cookie()
@@ -248,10 +248,9 @@ def get(self, archive_path, include_body=False):
         root_dir = pathlib.Path(cm.root_dir)
         archive_path = root_dir / url2path(archive_path)
 
-        # yield ioloop.IOLoop.current().run_in_executor(
-        #     None, self.extract_archive, archive_path
-        # )
-        self.extract_archive(archive_path)
+        await ioloop.IOLoop.current().run_in_executor(
+            None, self.extract_archive, archive_path
+        )
 
         self.finish()
 

From e85b9044f66ae9c5f642542a652a45c312cfd7d8 Mon Sep 17 00:00:00 2001
From: Hadrien Mary <hadrien.mary@gmail.com>
Date: Tue, 7 Jul 2020 08:07:54 -0400
Subject: [PATCH 05/11] doc

---
 jupyter_server/files/handlers.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/jupyter_server/files/handlers.py b/jupyter_server/files/handlers.py
index be20bcd46a..b5addc643f 100644
--- a/jupyter_server/files/handlers.py
+++ b/jupyter_server/files/handlers.py
@@ -187,11 +187,6 @@ async def get(self, archive_path, include_body=False):
         )
 
         self.canceled = False
-
-        # The original extension is using `ioloop.PeriodicCallback` and `ioloop.IOLoop.current().run_in_executor`
-        # but it does not seem to work here (500 internal HTTP error).
-        # This trategy is nice when compressing very large directory. Any idea how to integrate that is welcome.
-
         self.flush_cb = ioloop.PeriodicCallback(
             self.flush, ARCHIVE_DOWNLOAD_FLUSH_DELAY
         )

From 07692729c568cd23e67eaf3f0d1a96d807595001 Mon Sep 17 00:00:00 2001
From: Hadrien Mary <hadrien.mary@gmail.com>
Date: Tue, 1 Sep 2020 09:51:52 -0400
Subject: [PATCH 06/11] docstrings

---
 jupyter_server/files/handlers.py            | 22 +++++++++++++++++++--
 jupyter_server/services/contents/manager.py | 13 ++++++------
 2 files changed, 27 insertions(+), 8 deletions(-)

diff --git a/jupyter_server/files/handlers.py b/jupyter_server/files/handlers.py
index b5addc643f..efa040c35f 100644
--- a/jupyter_server/files/handlers.py
+++ b/jupyter_server/files/handlers.py
@@ -100,6 +100,10 @@ async def get(self, path, include_body=True):
 
 
 class ArchiveStream:
+    """ArchiveStream is an abstraction layer to a Python archive allowing
+    to stream archive files.
+    """
+
     def __init__(self, handler):
         self.handler = handler
         self.position = 0
@@ -120,6 +124,9 @@ def flush(self):
 
 
 def make_writer(handler, archive_format="zip"):
+    """Given an handler object, create an `ArchiveStream` instance
+    and setup an archive file object using the specified archive format.
+    """
     fileobj = ArchiveStream(handler)
 
     if archive_format == "zip":
@@ -139,6 +146,9 @@ def make_writer(handler, archive_format="zip"):
 
 
 def make_reader(archive_path):
+    """Return the appropriate archive file instance given
+    the extension's path of `archive_path`.
+    """
 
     archive_format = "".join(archive_path.suffixes)[1:]
 
@@ -156,6 +166,11 @@ def make_reader(archive_path):
 
 
 class DirectoryHandler(JupyterHandler):
+    """Download a directory. Since a folder can't be directly downloaded,
+    it is first archived with or without compression before being downloaded from the client.
+    The archive format (zip, tar.gz, etc) can be configured within the request.
+    """
+
     @web.authenticated
     async def get(self, archive_path, include_body=False):
 
@@ -192,8 +207,6 @@ async def get(self, archive_path, include_body=False):
         )
         self.flush_cb.start()
 
-        # self.archive_and_download(archive_path, archive_format, archive_token)
-
         args = (archive_path, archive_format, archive_token)
         await ioloop.IOLoop.current().run_in_executor(
             None, self.archive_and_download, *args
@@ -229,6 +242,11 @@ def on_connection_close(self):
 
 
 class ExtractDirectoryHandler(JupyterHandler):
+    """Extract the content of an archive on the server side. Given an archive on
+    the server side, this class allows to request extracting the content of the archive.
+    The archive format is detected from the extension of the archive.
+    """
+
     @web.authenticated
     async def get(self, archive_path, include_body=False):
 
diff --git a/jupyter_server/services/contents/manager.py b/jupyter_server/services/contents/manager.py
index a06e7a0412..88ff3abd98 100644
--- a/jupyter_server/services/contents/manager.py
+++ b/jupyter_server/services/contents/manager.py
@@ -178,11 +178,12 @@ def _default_checkpoints_kwargs(self):
         klass=RequestHandler,
         allow_none=True,
         config=True,
-        help="""handler class to use when serving raw directory requests.
+        help="""Handler class to use when serving raw directory requests.
 
-        TODO: complete
+        This handler allows to archive a folder on the server side before
+        downloading it from the client.
 
-        Access to these files should be Authenticated.
+        Access to these directories should be Authenticated.
         """,
     )
 
@@ -197,11 +198,11 @@ def _default_checkpoints_kwargs(self):
         klass=RequestHandler,
         allow_none=True,
         config=True,
-        help="""handler class to use when serving raw directory requests.
+        help=""""Handler class to use when serving raw directory requests.
 
-        TODO: complete
+        This handler allows to extract the content of a folder on the server side.
 
-        Access to these files should be Authenticated.
+        Access to these directories should be Authenticated.
         """,
     )
 

From 626908c542e51b225f8838afb6ead2210560161b Mon Sep 17 00:00:00 2001
From: Hadrien Mary <hadrien.mary@gmail.com>
Date: Tue, 1 Sep 2020 12:10:39 -0400
Subject: [PATCH 07/11] add size limit + tests

---
 jupyter_server/files/handlers.py | 40 +++++++++++++++++++++++++-
 tests/test_files.py              | 49 ++++++++++++++++++++++++++++++--
 2 files changed, 86 insertions(+), 3 deletions(-)

diff --git a/jupyter_server/files/handlers.py b/jupyter_server/files/handlers.py
index efa040c35f..ff43a4c304 100644
--- a/jupyter_server/files/handlers.py
+++ b/jupyter_server/files/handlers.py
@@ -86,6 +86,8 @@ async def get(self, path, include_body=True):
 # The delay in ms at which we send the chunk of data
 # to the client.
 ARCHIVE_DOWNLOAD_FLUSH_DELAY = 100
+
+# Supported archive format
 SUPPORTED_FORMAT = [
     "zip",
     "tgz",
@@ -99,6 +101,10 @@ async def get(self, path, include_body=True):
 ]
 
 
+DEFAULT_DIRECTORY_SIZE_LIMIT = 1073741824  # 1GB
+DEFAULT_ARCHIVE_FORMAT = "zip"
+
+
 class ArchiveStream:
     """ArchiveStream is an abstraction layer to a Python archive allowing
     to stream archive files.
@@ -145,6 +151,13 @@ def make_writer(handler, archive_format="zip"):
     return archive_file
 
 
+def get_folder_size(dir_path):
+    """Return the size in bytes of a given directory.
+    """
+    dir_path = pathlib.Path(dir_path)
+    return sum(f.stat().st_size for f in dir_path.glob("**/*") if f.is_file())
+
+
 def make_reader(archive_path):
     """Return the appropriate archive file instance given
     the extension's path of `archive_path`.
@@ -183,15 +196,40 @@ async def get(self, archive_path, include_body=False):
             raise web.HTTPError(404)
 
         archive_token = self.get_argument("archiveToken")
-        archive_format = self.get_argument("archiveFormat", "zip")
+        archive_format = self.get_argument("archiveFormat", DEFAULT_ARCHIVE_FORMAT)
+        folder_size_limit = self.get_argument("folderSizeLimit", None)
+
+        # Check whether the specified archive format is supported.
         if archive_format not in SUPPORTED_FORMAT:
             self.log.error("Unsupported format {}.".format(archive_format))
             raise web.HTTPError(404)
 
+        # If the folder size limit is not specified in the request, a
+        # default size limit is used.
+        try:
+            folder_size_limit_num = int(folder_size_limit)
+
+        except (ValueError, TypeError):
+            self.log.warning(
+                "folderSizeLimit is a not a valid number: {}.".format(folder_size_limit)
+            )
+            folder_size_limit_num = DEFAULT_DIRECTORY_SIZE_LIMIT
+
         root_dir = pathlib.Path(cm.root_dir)
         archive_path = root_dir / url2path(archive_path)
         archive_filename = archive_path.with_suffix(".{}".format(archive_format)).name
 
+        # Check whether the archive folder is not larger than the size limit.
+        folder_size = get_folder_size(archive_path)
+        print(folder_size)
+        if folder_size > folder_size_limit_num:
+            self.log.error(
+                "Archive folder size is larger than the size limit: {} bytes with a size limit of {}.".format(
+                    folder_size, folder_size_limit_num
+                )
+            )
+            raise web.HTTPError(413)
+
         self.log.info(
             "Prepare {} for archiving and downloading.".format(archive_filename)
         )
diff --git a/tests/test_files.py b/tests/test_files.py
index f148aca27c..73f83e8038 100644
--- a/tests/test_files.py
+++ b/tests/test_files.py
@@ -140,7 +140,7 @@ async def test_old_files_redirect(fetch, serverapp, root_dir):
     #     self.assertIn("filename*=utf-8''test.txt", disposition)
 
 
-async def test_download_directory(fetch, serverapp, root_dir):
+def make_fake_folder(root_dir):
     # Create a dummy directory.
     root_dir = pathlib.Path(str(root_dir))
     archive_dir_path = root_dir / "download-archive-dir"
@@ -159,6 +159,14 @@ async def test_download_directory(fetch, serverapp, root_dir):
         "download-archive-dir/test3.md",
     }
 
+    dir_path = str(archive_dir_path.relative_to(root_dir))
+
+    return dir_path, file_lists
+
+
+async def test_download_directory(fetch, serverapp, root_dir):
+    dir_path, file_lists = make_fake_folder(root_dir)
+
     archive_formats = {
         "zip": "r",
         "tgz": "r|gz",
@@ -175,8 +183,8 @@ async def test_download_directory(fetch, serverapp, root_dir):
 
     for archive_format, mode in archive_formats.items():
         params = dict(archiveToken=archive_token, archiveFormat=archive_format)
-        dir_path = str(archive_dir_path.relative_to(root_dir))
         r = await fetch("directories", dir_path, method="GET", params=params)
+
         assert r.code == 200
         assert r.headers.get("content-type") == "application/octet-stream"
         assert r.headers.get("cache-control") == "no-cache"
@@ -188,6 +196,43 @@ async def test_download_directory(fetch, serverapp, root_dir):
                 assert set(map(lambda m: m.name, tf.getmembers())) == file_lists
 
 
+async def test_download_directory_size_limit(fetch, serverapp, root_dir):
+
+    dir_path, file_lists = make_fake_folder(root_dir)
+    archive_token = 59487596
+    archive_format = "zip"
+    mode = "r"
+
+    # The above fake folder has a size of 18
+    folder_size_limit = 10
+
+    params = dict(
+        archiveToken=archive_token,
+        archiveFormat=archive_format,
+        folderSizeLimit=folder_size_limit,
+    )
+
+    with pytest.raises(tornado.httpclient.HTTPClientError) as e:
+        await fetch("directories", dir_path, method="GET", params=params)
+    assert expected_http_error(e, 413)
+
+async def test_download_wrong_archive_format(fetch, serverapp, root_dir):
+
+    dir_path, file_lists = make_fake_folder(root_dir)
+    archive_token = 59487596
+    archive_format = "a_format_that_does_not_exist"
+    mode = "r"
+
+
+    params = dict(
+        archiveToken=archive_token,
+        archiveFormat=archive_format,
+    )
+
+    with pytest.raises(tornado.httpclient.HTTPClientError) as e:
+        await fetch("directories", dir_path, method="GET", params=params)
+    assert expected_http_error(e, 404)
+
 async def test_extract_directory(fetch, serverapp, root_dir):
 
     format_mode = {

From c1c62fbc008c68f5104cfb0beb57d24d88f685b7 Mon Sep 17 00:00:00 2001
From: Hadrien Mary <hadrien.mary@gmail.com>
Date: Tue, 1 Sep 2020 12:11:29 -0400
Subject: [PATCH 08/11] formatting

---
 tests/test_files.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/tests/test_files.py b/tests/test_files.py
index 73f83e8038..60d859995d 100644
--- a/tests/test_files.py
+++ b/tests/test_files.py
@@ -216,6 +216,7 @@ async def test_download_directory_size_limit(fetch, serverapp, root_dir):
         await fetch("directories", dir_path, method="GET", params=params)
     assert expected_http_error(e, 413)
 
+
 async def test_download_wrong_archive_format(fetch, serverapp, root_dir):
 
     dir_path, file_lists = make_fake_folder(root_dir)
@@ -223,16 +224,13 @@ async def test_download_wrong_archive_format(fetch, serverapp, root_dir):
     archive_format = "a_format_that_does_not_exist"
     mode = "r"
 
-
-    params = dict(
-        archiveToken=archive_token,
-        archiveFormat=archive_format,
-    )
+    params = dict(archiveToken=archive_token, archiveFormat=archive_format)
 
     with pytest.raises(tornado.httpclient.HTTPClientError) as e:
         await fetch("directories", dir_path, method="GET", params=params)
     assert expected_http_error(e, 404)
 
+
 async def test_extract_directory(fetch, serverapp, root_dir):
 
     format_mode = {

From af58ff3f02e9fe93ac7a324e64bfb6d11b4abc1f Mon Sep 17 00:00:00 2001
From: Hadrien Mary <hadrien.mary@gmail.com>
Date: Wed, 2 Sep 2020 08:29:15 -0400
Subject: [PATCH 09/11] remove print

---
 jupyter_server/files/handlers.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/jupyter_server/files/handlers.py b/jupyter_server/files/handlers.py
index ff43a4c304..a065669bbb 100644
--- a/jupyter_server/files/handlers.py
+++ b/jupyter_server/files/handlers.py
@@ -221,7 +221,6 @@ async def get(self, archive_path, include_body=False):
 
         # Check whether the archive folder is not larger than the size limit.
         folder_size = get_folder_size(archive_path)
-        print(folder_size)
         if folder_size > folder_size_limit_num:
             self.log.error(
                 "Archive folder size is larger than the size limit: {} bytes with a size limit of {}.".format(

From 6e136520e896bc1b420c8ad1878533ec7a652410 Mon Sep 17 00:00:00 2001
From: Hadrien Mary <hadrien.mary@gmail.com>
Date: Thu, 3 Sep 2020 12:07:08 -0400
Subject: [PATCH 10/11] make ExtractDirectoryHandler a PUT method

---
 jupyter_server/files/handlers.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/jupyter_server/files/handlers.py b/jupyter_server/files/handlers.py
index a065669bbb..fcc8392e4a 100644
--- a/jupyter_server/files/handlers.py
+++ b/jupyter_server/files/handlers.py
@@ -285,7 +285,7 @@ class ExtractDirectoryHandler(JupyterHandler):
     """
 
     @web.authenticated
-    async def get(self, archive_path, include_body=False):
+    async def put(self, archive_path, include_body=False):
 
         # /extract-archive/ requests must originate from the same site
         self.check_xsrf_cookie()

From 7099c20834e89e8cca96a0cd9d594b92f39b0787 Mon Sep 17 00:00:00 2001
From: Hadrien Mary <hadrien.mary@gmail.com>
Date: Thu, 3 Sep 2020 12:11:01 -0400
Subject: [PATCH 11/11] add check for directory_handler_class

---
 jupyter_server/files/handlers.py | 4 ++++
 tests/test_files.py              | 2 +-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/jupyter_server/files/handlers.py b/jupyter_server/files/handlers.py
index fcc8392e4a..932eaa5534 100644
--- a/jupyter_server/files/handlers.py
+++ b/jupyter_server/files/handlers.py
@@ -195,6 +195,10 @@ async def get(self, archive_path, include_body=False):
             self.log.info("Refusing to serve hidden file, via 404 Error")
             raise web.HTTPError(404)
 
+        if not hasattr(cm, "directory_handler_class"):
+            self.log.info("Method not allowed")
+            raise web.HTTPError(405)
+
         archive_token = self.get_argument("archiveToken")
         archive_format = self.get_argument("archiveFormat", DEFAULT_ARCHIVE_FORMAT)
         folder_size_limit = self.get_argument("folderSizeLimit", None)
diff --git a/tests/test_files.py b/tests/test_files.py
index 60d859995d..c24bf350e8 100644
--- a/tests/test_files.py
+++ b/tests/test_files.py
@@ -277,7 +277,7 @@ async def test_extract_directory(fetch, serverapp, root_dir):
         # REST call to extract the archive
         relative_archive_path = str(archive_path.relative_to(root_dir))
         print(archive_path)
-        r = await fetch("extract-directories", relative_archive_path, method="GET")
+        r = await fetch("extract-directories", relative_archive_path, method="PUT")
 
         assert r.code == 200
         assert archive_dir_path.is_dir()