Skip to content

Commit

Permalink
Enable users to copy both files and directories (#1190)
Browse files Browse the repository at this point in the history
Co-authored-by: mmbugua <moses.mbugua@morningstar.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: foo <foo@bar.com>
Co-authored-by: Steven Silvester <steven.silvester@ieee.org>
  • Loading branch information
5 people authored Feb 24, 2023
1 parent 903942c commit bc828c5
Show file tree
Hide file tree
Showing 4 changed files with 365 additions and 15 deletions.
251 changes: 249 additions & 2 deletions jupyter_server/services/contents/filemanager.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,13 @@
# Copyright (c) Jupyter Development Team.
# Distributed under the terms of the Modified BSD License.
import errno
import math
import mimetypes
import os
import platform
import shutil
import stat
import subprocess
import sys
import warnings
from datetime import datetime
Expand All @@ -16,7 +19,7 @@
from jupyter_core.paths import exists, is_file_hidden, is_hidden
from send2trash import send2trash
from tornado import web
from traitlets import Bool, TraitError, Unicode, default, validate
from traitlets import Bool, Int, TraitError, Unicode, default, validate

from jupyter_server import _tz as tz
from jupyter_server.base.handlers import AuthenticatedFileHandler
Expand All @@ -25,7 +28,7 @@

from .filecheckpoints import AsyncFileCheckpoints, FileCheckpoints
from .fileio import AsyncFileManagerMixin, FileManagerMixin
from .manager import AsyncContentsManager, ContentsManager
from .manager import AsyncContentsManager, ContentsManager, copy_pat

try:
from os.path import samefile
Expand All @@ -41,6 +44,8 @@ class FileContentsManager(FileManagerMixin, ContentsManager):

root_dir = Unicode(config=True)

max_copy_folder_size_mb = Int(500, config=True, help="The max folder size that can be copied")

@default("root_dir")
def _default_root_dir(self):
try:
Expand Down Expand Up @@ -600,6 +605,126 @@ def get_kernel_path(self, path, model=None):
parent_dir = path.rsplit("/", 1)[0] if "/" in path else ""
return parent_dir

def copy(self, from_path, to_path=None):
"""
Copy an existing file or directory and return its new model.
If to_path not specified, it will be the parent directory of from_path.
If copying a file and to_path is a directory, filename/directoryname will increment `from_path-Copy#.ext`.
Considering multi-part extensions, the Copy# part will be placed before the first dot for all the extensions except `ipynb`.
For easier manual searching in case of notebooks, the Copy# part will be placed before the last dot.
from_path must be a full path to a file or directory.
"""
to_path_original = str(to_path)
path = from_path.strip("/")
if to_path is not None:
to_path = to_path.strip("/")

if "/" in path:
from_dir, from_name = path.rsplit("/", 1)
else:
from_dir = ""
from_name = path

model = self.get(path)
# limit the size of folders being copied to prevent a timeout error
if model["type"] == "directory":
self.check_folder_size(path)
else:
# let the super class handle copying files
return super().copy(from_path=from_path, to_path=to_path)

is_destination_specified = to_path is not None
to_name = copy_pat.sub(".", from_name)
if not is_destination_specified:
to_path = from_dir
if self.dir_exists(to_path):
name = copy_pat.sub(".", from_name)
to_name = super().increment_filename(name, to_path, insert="-Copy")
to_path = f"{to_path}/{to_name}"

return self._copy_dir(
from_path=from_path,
to_path_original=to_path_original,
to_name=to_name,
to_path=to_path,
)

def _copy_dir(self, from_path, to_path_original, to_name, to_path):
"""
handles copying directories
returns the model for the copied directory
"""
try:
os_from_path = self._get_os_path(from_path.strip("/"))
os_to_path = f'{self._get_os_path(to_path_original.strip("/"))}/{to_name}'
shutil.copytree(os_from_path, os_to_path)
model = self.get(to_path, content=False)
except OSError as err:
self.log.error(f"OSError in _copy_dir: {err}")
raise web.HTTPError(
400,
f"Can't copy '{from_path}' into Folder '{to_path}'",
) from err

return model

def check_folder_size(self, path):
"""
limit the size of folders being copied to be no more than the
trait max_copy_folder_size_mb to prevent a timeout error
"""
limit_bytes = self.max_copy_folder_size_mb * 1024 * 1024
size = int(self._get_dir_size(self._get_os_path(path)))
# convert from KB to Bytes for macOS
size = size * 1024 if platform.system() == "Darwin" else size

if size > limit_bytes:
raise web.HTTPError(
400,
f"""
Can't copy folders larger than {self.max_copy_folder_size_mb}MB,
"{path}" is {self._human_readable_size(size)}
""",
)

def _get_dir_size(self, path="."):
"""
calls the command line program du to get the directory size
"""
try:
if platform.system() == "Darwin":
# retuns the size of the folder in KB
result = subprocess.run(["du", "-sk", path], capture_output=True).stdout.split()
else:
result = subprocess.run(
["du", "-s", "--block-size=1", path], capture_output=True
).stdout.split()

self.log.info(f"current status of du command {result}")
size = result[0].decode("utf-8")
except Exception as err:
self.log.error(f"Error during directory copy: {err}")
raise web.HTTPError(
400,
f"""
Unexpected error during copy operation,
not able to get the size of the {path} directory
""",
) from err
return size

def _human_readable_size(self, size):
"""
returns folder size in a human readable format
"""
if size == 0:
return "0 Bytes"

units = ["Bytes", "KB", "MB", "GB", "TB", "PB"]
order = int(math.log2(size) / 10) if size else 0

return "{:.4g} {}".format(size / (1 << (order * 10)), units[order])


class AsyncFileContentsManager(FileContentsManager, AsyncFileManagerMixin, AsyncContentsManager):
"""An async file contents manager."""
Expand Down Expand Up @@ -955,3 +1080,125 @@ async def get_kernel_path(self, path, model=None):
return path
parent_dir = path.rsplit("/", 1)[0] if "/" in path else ""
return parent_dir

async def copy(self, from_path, to_path=None):
"""
Copy an existing file or directory and return its new model.
If to_path not specified, it will be the parent directory of from_path.
If copying a file and to_path is a directory, filename/directoryname will increment `from_path-Copy#.ext`.
Considering multi-part extensions, the Copy# part will be placed before the first dot for all the extensions except `ipynb`.
For easier manual searching in case of notebooks, the Copy# part will be placed before the last dot.
from_path must be a full path to a file or directory.
"""
to_path_original = str(to_path)
path = from_path.strip("/")
if to_path is not None:
to_path = to_path.strip("/")

if "/" in path:
from_dir, from_name = path.rsplit("/", 1)
else:
from_dir = ""
from_name = path

model = await self.get(path)
# limit the size of folders being copied to prevent a timeout error
if model["type"] == "directory":
await self.check_folder_size(path)
else:
# let the super class handle copying files
return await AsyncContentsManager.copy(self, from_path=from_path, to_path=to_path)

is_destination_specified = to_path is not None
to_name = copy_pat.sub(".", from_name)
if not is_destination_specified:
to_path = from_dir
if await self.dir_exists(to_path):
name = copy_pat.sub(".", from_name)
to_name = await super().increment_filename(name, to_path, insert="-Copy")
to_path = f"{to_path}/{to_name}"

return await self._copy_dir(
from_path=from_path,
to_path_original=to_path_original,
to_name=to_name,
to_path=to_path,
)

async def _copy_dir(
self, from_path: str, to_path_original: str, to_name: str, to_path: str
) -> dict:
"""
handles copying directories
returns the model for the copied directory
"""
try:
os_from_path = self._get_os_path(from_path.strip("/"))
os_to_path = f'{self._get_os_path(to_path_original.strip("/"))}/{to_name}'
shutil.copytree(os_from_path, os_to_path)
model = await self.get(to_path, content=False)
except OSError as err:
self.log.error(f"OSError in _copy_dir: {err}")
raise web.HTTPError(
400,
f"Can't copy '{from_path}' into read-only Folder '{to_path}'",
) from err

return model

async def check_folder_size(self, path: str) -> None:
"""
limit the size of folders being copied to be no more than the
trait max_copy_folder_size_mb to prevent a timeout error
"""
limit_bytes = self.max_copy_folder_size_mb * 1024 * 1024

size = int(await self._get_dir_size(self._get_os_path(path)))
# convert from KB to Bytes for macOS
size = size * 1024 if platform.system() == "Darwin" else size
if size > limit_bytes:
raise web.HTTPError(
400,
f"""
Can't copy folders larger than {self.max_copy_folder_size_mb}MB,
"{path}" is {await self._human_readable_size(size)}
""",
)

async def _get_dir_size(self, path: str = ".") -> str:
"""
calls the command line program du to get the directory size
"""
try:
if platform.system() == "Darwin":
# retuns the size of the folder in KB
result = subprocess.run(["du", "-sk", path], capture_output=True).stdout.split()
else:
result = subprocess.run(
["du", "-s", "--block-size=1", path], capture_output=True
).stdout.split()

self.log.info(f"current status of du command {result}")
size = result[0].decode("utf-8")
except Exception as err:
self.log.error(f"Error during directory copy: {err}")
raise web.HTTPError(
400,
f"""
Unexpected error during copy operation,
not able to get the size of the {path} directory
""",
) from err
return size

async def _human_readable_size(self, size: int) -> str:
"""
returns folder size in a human readable format
"""
if size == 0:
return "0 Bytes"

units = ["Bytes", "KB", "MB", "GB", "TB", "PB"]
order = int(math.log2(size) / 10) if size else 0

return "{:.4g} {}".format(size / (1 << (order * 10)), units[order])
13 changes: 12 additions & 1 deletion jupyter_server/services/contents/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,18 @@
from nbformat import validate as validate_nb
from nbformat.v4 import new_notebook
from tornado.web import HTTPError, RequestHandler
from traitlets import Any, Bool, Dict, Instance, List, TraitError, Type, Unicode, default, validate
from traitlets import (
Any,
Bool,
Dict,
Instance,
List,
TraitError,
Type,
Unicode,
default,
validate,
)
from traitlets.config.configurable import LoggingConfigurable

from jupyter_server import DEFAULT_EVENTS_SCHEMA_PATH, JUPYTER_SERVER_EVENTS_URI
Expand Down
33 changes: 21 additions & 12 deletions tests/services/contents/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -494,6 +494,27 @@ async def test_copy(jp_fetch, contents, contents_dir, _check_created):
_check_created(r, str(contents_dir), path, copy3, type="notebook")


async def test_copy_dir(jp_fetch, contents, contents_dir, _check_created):
# created a nest copy of a the original folder
dest_dir = "foo"
path = "parent"
response = await jp_fetch(
"api", "contents", path, method="POST", body=json.dumps({"copy_from": dest_dir})
)

_check_created(response, str(contents_dir), path, dest_dir, type="directory")

# copy to a folder where a similar name exists
dest_dir = "foo"
path = "parent"
copy_dir = f"{dest_dir}-Copy1"
response = await jp_fetch(
"api", "contents", path, method="POST", body=json.dumps({"copy_from": dest_dir})
)

_check_created(response, str(contents_dir), path, copy_dir, type="directory")


async def test_copy_path(jp_fetch, contents, contents_dir, _check_created):
path1 = "foo"
path2 = "å b"
Expand Down Expand Up @@ -577,18 +598,6 @@ async def test_copy_put_400_hidden(
assert expected_http_error(e, 400)


async def test_copy_dir_400(jp_fetch, contents, contents_dir, _check_created):
with pytest.raises(tornado.httpclient.HTTPClientError) as e:
await jp_fetch(
"api",
"contents",
"foo",
method="POST",
body=json.dumps({"copy_from": "å b"}),
)
assert expected_http_error(e, 400)


@pytest.mark.skipif(sys.platform == "win32", reason="Disabled copying hidden files on Windows")
async def test_copy_400_hidden(
jp_fetch,
Expand Down
Loading

0 comments on commit bc828c5

Please sign in to comment.