From bc828c5cdec9aad240b47cbc6ea7300fb27ed323 Mon Sep 17 00:00:00 2001 From: Moses Mbugua Date: Fri, 24 Feb 2023 05:47:35 -0800 Subject: [PATCH] Enable users to copy both files and directories (#1190) Co-authored-by: mmbugua Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: foo Co-authored-by: Steven Silvester --- .../services/contents/filemanager.py | 251 +++++++++++++++++- jupyter_server/services/contents/manager.py | 13 +- tests/services/contents/test_api.py | 33 ++- tests/services/contents/test_manager.py | 83 ++++++ 4 files changed, 365 insertions(+), 15 deletions(-) diff --git a/jupyter_server/services/contents/filemanager.py b/jupyter_server/services/contents/filemanager.py index 53b3c34d62..83eefce397 100644 --- a/jupyter_server/services/contents/filemanager.py +++ b/jupyter_server/services/contents/filemanager.py @@ -2,10 +2,13 @@ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. import errno +import math import mimetypes import os +import platform import shutil import stat +import subprocess import sys import warnings from datetime import datetime @@ -16,7 +19,7 @@ from jupyter_core.paths import exists, is_file_hidden, is_hidden from send2trash import send2trash from tornado import web -from traitlets import Bool, TraitError, Unicode, default, validate +from traitlets import Bool, Int, TraitError, Unicode, default, validate from jupyter_server import _tz as tz from jupyter_server.base.handlers import AuthenticatedFileHandler @@ -25,7 +28,7 @@ from .filecheckpoints import AsyncFileCheckpoints, FileCheckpoints from .fileio import AsyncFileManagerMixin, FileManagerMixin -from .manager import AsyncContentsManager, ContentsManager +from .manager import AsyncContentsManager, ContentsManager, copy_pat try: from os.path import samefile @@ -41,6 +44,8 @@ class FileContentsManager(FileManagerMixin, ContentsManager): root_dir = Unicode(config=True) + max_copy_folder_size_mb = Int(500, config=True, help="The max folder size that can be copied") + @default("root_dir") def _default_root_dir(self): try: @@ -600,6 +605,126 @@ def get_kernel_path(self, path, model=None): parent_dir = path.rsplit("/", 1)[0] if "/" in path else "" return parent_dir + def copy(self, from_path, to_path=None): + """ + Copy an existing file or directory and return its new model. + If to_path not specified, it will be the parent directory of from_path. + If copying a file and to_path is a directory, filename/directoryname will increment `from_path-Copy#.ext`. + Considering multi-part extensions, the Copy# part will be placed before the first dot for all the extensions except `ipynb`. + For easier manual searching in case of notebooks, the Copy# part will be placed before the last dot. + from_path must be a full path to a file or directory. + """ + to_path_original = str(to_path) + path = from_path.strip("/") + if to_path is not None: + to_path = to_path.strip("/") + + if "/" in path: + from_dir, from_name = path.rsplit("/", 1) + else: + from_dir = "" + from_name = path + + model = self.get(path) + # limit the size of folders being copied to prevent a timeout error + if model["type"] == "directory": + self.check_folder_size(path) + else: + # let the super class handle copying files + return super().copy(from_path=from_path, to_path=to_path) + + is_destination_specified = to_path is not None + to_name = copy_pat.sub(".", from_name) + if not is_destination_specified: + to_path = from_dir + if self.dir_exists(to_path): + name = copy_pat.sub(".", from_name) + to_name = super().increment_filename(name, to_path, insert="-Copy") + to_path = f"{to_path}/{to_name}" + + return self._copy_dir( + from_path=from_path, + to_path_original=to_path_original, + to_name=to_name, + to_path=to_path, + ) + + def _copy_dir(self, from_path, to_path_original, to_name, to_path): + """ + handles copying directories + returns the model for the copied directory + """ + try: + os_from_path = self._get_os_path(from_path.strip("/")) + os_to_path = f'{self._get_os_path(to_path_original.strip("/"))}/{to_name}' + shutil.copytree(os_from_path, os_to_path) + model = self.get(to_path, content=False) + except OSError as err: + self.log.error(f"OSError in _copy_dir: {err}") + raise web.HTTPError( + 400, + f"Can't copy '{from_path}' into Folder '{to_path}'", + ) from err + + return model + + def check_folder_size(self, path): + """ + limit the size of folders being copied to be no more than the + trait max_copy_folder_size_mb to prevent a timeout error + """ + limit_bytes = self.max_copy_folder_size_mb * 1024 * 1024 + size = int(self._get_dir_size(self._get_os_path(path))) + # convert from KB to Bytes for macOS + size = size * 1024 if platform.system() == "Darwin" else size + + if size > limit_bytes: + raise web.HTTPError( + 400, + f""" + Can't copy folders larger than {self.max_copy_folder_size_mb}MB, + "{path}" is {self._human_readable_size(size)} + """, + ) + + def _get_dir_size(self, path="."): + """ + calls the command line program du to get the directory size + """ + try: + if platform.system() == "Darwin": + # retuns the size of the folder in KB + result = subprocess.run(["du", "-sk", path], capture_output=True).stdout.split() + else: + result = subprocess.run( + ["du", "-s", "--block-size=1", path], capture_output=True + ).stdout.split() + + self.log.info(f"current status of du command {result}") + size = result[0].decode("utf-8") + except Exception as err: + self.log.error(f"Error during directory copy: {err}") + raise web.HTTPError( + 400, + f""" + Unexpected error during copy operation, + not able to get the size of the {path} directory + """, + ) from err + return size + + def _human_readable_size(self, size): + """ + returns folder size in a human readable format + """ + if size == 0: + return "0 Bytes" + + units = ["Bytes", "KB", "MB", "GB", "TB", "PB"] + order = int(math.log2(size) / 10) if size else 0 + + return "{:.4g} {}".format(size / (1 << (order * 10)), units[order]) + class AsyncFileContentsManager(FileContentsManager, AsyncFileManagerMixin, AsyncContentsManager): """An async file contents manager.""" @@ -955,3 +1080,125 @@ async def get_kernel_path(self, path, model=None): return path parent_dir = path.rsplit("/", 1)[0] if "/" in path else "" return parent_dir + + async def copy(self, from_path, to_path=None): + """ + Copy an existing file or directory and return its new model. + If to_path not specified, it will be the parent directory of from_path. + If copying a file and to_path is a directory, filename/directoryname will increment `from_path-Copy#.ext`. + Considering multi-part extensions, the Copy# part will be placed before the first dot for all the extensions except `ipynb`. + For easier manual searching in case of notebooks, the Copy# part will be placed before the last dot. + from_path must be a full path to a file or directory. + """ + to_path_original = str(to_path) + path = from_path.strip("/") + if to_path is not None: + to_path = to_path.strip("/") + + if "/" in path: + from_dir, from_name = path.rsplit("/", 1) + else: + from_dir = "" + from_name = path + + model = await self.get(path) + # limit the size of folders being copied to prevent a timeout error + if model["type"] == "directory": + await self.check_folder_size(path) + else: + # let the super class handle copying files + return await AsyncContentsManager.copy(self, from_path=from_path, to_path=to_path) + + is_destination_specified = to_path is not None + to_name = copy_pat.sub(".", from_name) + if not is_destination_specified: + to_path = from_dir + if await self.dir_exists(to_path): + name = copy_pat.sub(".", from_name) + to_name = await super().increment_filename(name, to_path, insert="-Copy") + to_path = f"{to_path}/{to_name}" + + return await self._copy_dir( + from_path=from_path, + to_path_original=to_path_original, + to_name=to_name, + to_path=to_path, + ) + + async def _copy_dir( + self, from_path: str, to_path_original: str, to_name: str, to_path: str + ) -> dict: + """ + handles copying directories + returns the model for the copied directory + """ + try: + os_from_path = self._get_os_path(from_path.strip("/")) + os_to_path = f'{self._get_os_path(to_path_original.strip("/"))}/{to_name}' + shutil.copytree(os_from_path, os_to_path) + model = await self.get(to_path, content=False) + except OSError as err: + self.log.error(f"OSError in _copy_dir: {err}") + raise web.HTTPError( + 400, + f"Can't copy '{from_path}' into read-only Folder '{to_path}'", + ) from err + + return model + + async def check_folder_size(self, path: str) -> None: + """ + limit the size of folders being copied to be no more than the + trait max_copy_folder_size_mb to prevent a timeout error + """ + limit_bytes = self.max_copy_folder_size_mb * 1024 * 1024 + + size = int(await self._get_dir_size(self._get_os_path(path))) + # convert from KB to Bytes for macOS + size = size * 1024 if platform.system() == "Darwin" else size + if size > limit_bytes: + raise web.HTTPError( + 400, + f""" + Can't copy folders larger than {self.max_copy_folder_size_mb}MB, + "{path}" is {await self._human_readable_size(size)} + """, + ) + + async def _get_dir_size(self, path: str = ".") -> str: + """ + calls the command line program du to get the directory size + """ + try: + if platform.system() == "Darwin": + # retuns the size of the folder in KB + result = subprocess.run(["du", "-sk", path], capture_output=True).stdout.split() + else: + result = subprocess.run( + ["du", "-s", "--block-size=1", path], capture_output=True + ).stdout.split() + + self.log.info(f"current status of du command {result}") + size = result[0].decode("utf-8") + except Exception as err: + self.log.error(f"Error during directory copy: {err}") + raise web.HTTPError( + 400, + f""" + Unexpected error during copy operation, + not able to get the size of the {path} directory + """, + ) from err + return size + + async def _human_readable_size(self, size: int) -> str: + """ + returns folder size in a human readable format + """ + if size == 0: + return "0 Bytes" + + units = ["Bytes", "KB", "MB", "GB", "TB", "PB"] + order = int(math.log2(size) / 10) if size else 0 + + return "{:.4g} {}".format(size / (1 << (order * 10)), units[order]) diff --git a/jupyter_server/services/contents/manager.py b/jupyter_server/services/contents/manager.py index c1e38cc914..b6ef67e1a5 100644 --- a/jupyter_server/services/contents/manager.py +++ b/jupyter_server/services/contents/manager.py @@ -15,7 +15,18 @@ from nbformat import validate as validate_nb from nbformat.v4 import new_notebook from tornado.web import HTTPError, RequestHandler -from traitlets import Any, Bool, Dict, Instance, List, TraitError, Type, Unicode, default, validate +from traitlets import ( + Any, + Bool, + Dict, + Instance, + List, + TraitError, + Type, + Unicode, + default, + validate, +) from traitlets.config.configurable import LoggingConfigurable from jupyter_server import DEFAULT_EVENTS_SCHEMA_PATH, JUPYTER_SERVER_EVENTS_URI diff --git a/tests/services/contents/test_api.py b/tests/services/contents/test_api.py index e95ae11255..252f368985 100644 --- a/tests/services/contents/test_api.py +++ b/tests/services/contents/test_api.py @@ -494,6 +494,27 @@ async def test_copy(jp_fetch, contents, contents_dir, _check_created): _check_created(r, str(contents_dir), path, copy3, type="notebook") +async def test_copy_dir(jp_fetch, contents, contents_dir, _check_created): + # created a nest copy of a the original folder + dest_dir = "foo" + path = "parent" + response = await jp_fetch( + "api", "contents", path, method="POST", body=json.dumps({"copy_from": dest_dir}) + ) + + _check_created(response, str(contents_dir), path, dest_dir, type="directory") + + # copy to a folder where a similar name exists + dest_dir = "foo" + path = "parent" + copy_dir = f"{dest_dir}-Copy1" + response = await jp_fetch( + "api", "contents", path, method="POST", body=json.dumps({"copy_from": dest_dir}) + ) + + _check_created(response, str(contents_dir), path, copy_dir, type="directory") + + async def test_copy_path(jp_fetch, contents, contents_dir, _check_created): path1 = "foo" path2 = "å b" @@ -577,18 +598,6 @@ async def test_copy_put_400_hidden( assert expected_http_error(e, 400) -async def test_copy_dir_400(jp_fetch, contents, contents_dir, _check_created): - with pytest.raises(tornado.httpclient.HTTPClientError) as e: - await jp_fetch( - "api", - "contents", - "foo", - method="POST", - body=json.dumps({"copy_from": "å b"}), - ) - assert expected_http_error(e, 400) - - @pytest.mark.skipif(sys.platform == "win32", reason="Disabled copying hidden files on Windows") async def test_copy_400_hidden( jp_fetch, diff --git a/tests/services/contents/test_manager.py b/tests/services/contents/test_manager.py index 03d51f6770..184f21cc75 100644 --- a/tests/services/contents/test_manager.py +++ b/tests/services/contents/test_manager.py @@ -1,4 +1,5 @@ import os +import shutil import sys import time from itertools import combinations @@ -52,6 +53,39 @@ def _make_dir(jp_contents_manager, api_path): print("Directory already exists: %r" % os_path) +def _make_big_dir(contents_manager, api_path): + # make a directory that is over 100 MB in size + os_path = contents_manager._get_os_path(api_path) + try: + os.makedirs(os_path) + + with open(f"{os_path}/demofile.txt", "a") as textFile: + textFile.write( + """ + Lorem ipsum dolor sit amet, consectetur adipiscing elit, + sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. + Ut enim ad minim veniam, quis nostrud exercitation ullamco + laboris nisi ut aliquip ex ea commodo consequat. + Duis aute irure dolor in reprehenderit in voluptate + velit esse cillum dolore eu fugiat nulla pariatur. + Excepteur sint occaecat cupidatat non proident, + sunt in culpa qui officia deserunt mollit anim id est laborum. + """ + ) + + num_sub_folders = contents_manager.max_copy_folder_size_mb * 10 + for i in range(num_sub_folders): + os.makedirs(f"{os_path}/subfolder-{i}") + for j in range(200): + shutil.copy( + f"{os_path}/demofile.txt", + f"{os_path}/subfolder-{i}/testfile{j}.txt", + ) + + except OSError as err: + print("Directory already exists", err) + + def symlink(jp_contents_manager, src, dst): """Make a symlink to src from dst @@ -818,6 +852,55 @@ async def test_copy(jp_contents_manager): assert copy3["path"] == "copy 3.ipynb" +async def test_copy_dir(jp_contents_manager): + cm = jp_contents_manager + destDir = "Untitled Folder 1" + sourceDir = "Morningstar Notebooks" + nonExistantDir = "FolderDoesNotExist" + + _make_dir(cm, destDir) + _make_dir(cm, sourceDir) + + nestedDir = f"{destDir}/{sourceDir}" + + # copy one folder insider another folder + copy = await ensure_async(cm.copy(from_path=sourceDir, to_path=destDir)) + assert copy["path"] == nestedDir + + # need to test when copying in a directory where the another folder with the same name exists + _make_dir(cm, nestedDir) + copy = await ensure_async(cm.copy(from_path=sourceDir, to_path=destDir)) + assert copy["path"] == f"{nestedDir}-Copy1" + + # need to test for when copying in the same path as the sourceDir + copy = await ensure_async(cm.copy(from_path=sourceDir, to_path="")) + assert copy["path"] == f"{sourceDir}-Copy1" + + # ensure its still possible to copy a folder to another folder that doesn't exist + copy = await ensure_async( + cm.copy( + from_path=sourceDir, + to_path=nonExistantDir, + ) + ) + assert copy["path"] == f"{nonExistantDir}/{sourceDir}" + + +async def test_copy_big_dir(jp_contents_manager): + # this tests how the Content API limits preventing copying folders that are more than + # the size limit specified in max_copy_folder_size_mb trait + cm = jp_contents_manager + destDir = "Untitled Folder 1" + sourceDir = "Morningstar Notebooks" + cm.max_copy_folder_size_mb = 5 + _make_dir(cm, destDir) + _make_big_dir(contents_manager=cm, api_path=sourceDir) + with pytest.raises(HTTPError) as exc_info: + await ensure_async(cm.copy(from_path=sourceDir, to_path=destDir)) + + assert exc_info.type is HTTPError + + async def test_mark_trusted_cells(jp_contents_manager): cm = jp_contents_manager nb, name, path = await new_notebook(cm)