Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable users to copy both files and directories #1190

Merged
merged 13 commits into from
Feb 24, 2023
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
247 changes: 246 additions & 1 deletion jupyter_server/services/contents/filemanager.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,13 @@
# Copyright (c) Jupyter Development Team.
# Distributed under the terms of the Modified BSD License.
import errno
import math
import mimetypes
import os
import platform
import shutil
import stat
import subprocess
import sys
import warnings
from datetime import datetime
Expand All @@ -25,7 +28,7 @@

from .filecheckpoints import AsyncFileCheckpoints, FileCheckpoints
from .fileio import AsyncFileManagerMixin, FileManagerMixin
from .manager import AsyncContentsManager, ContentsManager
from .manager import AsyncContentsManager, ContentsManager, copy_pat

try:
from os.path import samefile
Expand Down Expand Up @@ -600,6 +603,126 @@ def get_kernel_path(self, path, model=None):
parent_dir = path.rsplit("/", 1)[0] if "/" in path else ""
return parent_dir

def copy(self, from_path, to_path=None):
"""
Copy an existing file or directory and return its new model.
If to_path not specified, it will be the parent directory of from_path.
If copying a file and to_path is a directory, filename/directoryname will increment `from_path-Copy#.ext`.
Considering multi-part extensions, the Copy# part will be placed before the first dot for all the extensions except `ipynb`.
For easier manual searching in case of notebooks, the Copy# part will be placed before the last dot.
from_path must be a full path to a file or directory.
"""
to_path_original = str(to_path)
path = from_path.strip("/")
if to_path is not None:
to_path = to_path.strip("/")

if "/" in path:
from_dir, from_name = path.rsplit("/", 1)
else:
from_dir = ""
from_name = path

model = self.get(path)
# limit the size of folders being copied to prevent a timeout error
if model["type"] == "directory":
self.check_folder_size(path)
else:
# let the super class handle copying files
return super().copy(from_path=from_path, to_path=to_path)

is_destination_specified = to_path is not None
to_name = copy_pat.sub(".", from_name)
if not is_destination_specified:
to_path = from_dir
if self.dir_exists(to_path):
name = copy_pat.sub(".", from_name)
to_name = super().increment_filename(name, to_path, insert="-Copy")
to_path = f"{to_path}/{to_name}"

return self._copy_dir(
from_path=from_path,
to_path_original=to_path_original,
to_name=to_name,
to_path=to_path,
)

def _copy_dir(self, from_path, to_path_original, to_name, to_path):
"""
handles copying directories
returns the model for the copied directory
"""
try:
os_from_path = self._get_os_path(from_path.strip("/"))
os_to_path = f'{self._get_os_path(to_path_original.strip("/"))}/{to_name}'
shutil.copytree(os_from_path, os_to_path)
model = self.get(to_path, content=False)
except OSError as err:
self.log.error(f"OSError in _copy_dir: {err}")
raise web.HTTPError(
400,
f"Can't copy '{from_path}' into Folder '{to_path}'",
) from err

return model

def check_folder_size(self, path):
"""
limit the size of folders being copied to be no more than the
trait max_copy_folder_size_mb to prevent a timeout error
"""
limit_bytes = self.max_copy_folder_size_mb * 1024 * 1024
size = int(self._get_dir_size(self._get_os_path(path)))
# convert from KB to Bytes for macOS
size = size * 1024 if platform.system() == "Darwin" else size

if size > limit_bytes:
raise web.HTTPError(
400,
f"""
Can't copy folders larger than {self.max_copy_folder_size_mb}MB,
"{path}" is {self._human_readable_size(size)}
""",
)

def _get_dir_size(self, path="."):
"""
calls the command line program du to get the directory size
"""
try:
if platform.system() == "Darwin":
# retuns the size of the folder in KB
result = subprocess.run(["du", "-sk", path], capture_output=True).stdout.split()
else:
result = subprocess.run(
["du", "-s", "--block-size=1", path], capture_output=True
).stdout.split()

self.log.info(f"current status of du command {result}")
size = result[0].decode("utf-8")
except Exception as err:
self.log.error(f"Error during directory copy: {err}")
raise web.HTTPError(
400,
f"""
Unexpected error during copy operation,
not able to get the size of the {path} directory
""",
) from err
return size

def _human_readable_size(self, size):
"""
returns folder size in a human readable format
"""
if size == 0:
return "0 Bytes"

units = ["Bytes", "KB", "MB", "GB", "TB", "PB"]
order = int(math.log2(size) / 10) if size else 0

return "{:.4g} {}".format(size / (1 << (order * 10)), units[order])


class AsyncFileContentsManager(FileContentsManager, AsyncFileManagerMixin, AsyncContentsManager):
"""An async file contents manager."""
Expand Down Expand Up @@ -955,3 +1078,125 @@ async def get_kernel_path(self, path, model=None):
return path
parent_dir = path.rsplit("/", 1)[0] if "/" in path else ""
return parent_dir

async def copy(self, from_path, to_path=None):
"""
Copy an existing file or directory and return its new model.
If to_path not specified, it will be the parent directory of from_path.
If copying a file and to_path is a directory, filename/directoryname will increment `from_path-Copy#.ext`.
Considering multi-part extensions, the Copy# part will be placed before the first dot for all the extensions except `ipynb`.
For easier manual searching in case of notebooks, the Copy# part will be placed before the last dot.
from_path must be a full path to a file or directory.
"""
to_path_original = str(to_path)
path = from_path.strip("/")
if to_path is not None:
to_path = to_path.strip("/")

if "/" in path:
from_dir, from_name = path.rsplit("/", 1)
else:
from_dir = ""
from_name = path

model = await self.get(path)
# limit the size of folders being copied to prevent a timeout error
if model["type"] == "directory":
await self.check_folder_size(path)
else:
# let the super class handle copying files
return await AsyncContentsManager.copy(self, from_path=from_path, to_path=to_path)

is_destination_specified = to_path is not None
to_name = copy_pat.sub(".", from_name)
if not is_destination_specified:
to_path = from_dir
if await self.dir_exists(to_path):
name = copy_pat.sub(".", from_name)
to_name = await super().increment_filename(name, to_path, insert="-Copy")
to_path = f"{to_path}/{to_name}"

return await self._copy_dir(
from_path=from_path,
to_path_original=to_path_original,
to_name=to_name,
to_path=to_path,
)

async def _copy_dir(
self, from_path: str, to_path_original: str, to_name: str, to_path: str
) -> dict:
"""
handles copying directories
returns the model for the copied directory
"""
try:
os_from_path = self._get_os_path(from_path.strip("/"))
os_to_path = f'{self._get_os_path(to_path_original.strip("/"))}/{to_name}'
shutil.copytree(os_from_path, os_to_path)
model = await self.get(to_path, content=False)
except OSError as err:
self.log.error(f"OSError in _copy_dir: {err}")
raise web.HTTPError(
400,
f"Can't copy '{from_path}' into read-only Folder '{to_path}'",
) from err

return model

async def check_folder_size(self, path: str) -> None:
"""
limit the size of folders being copied to be no more than the
trait max_copy_folder_size_mb to prevent a timeout error
"""
limit_bytes = self.max_copy_folder_size_mb * 1024 * 1024

size = int(await self._get_dir_size(self._get_os_path(path)))
# convert from KB to Bytes for macOS
size = size * 1024 if platform.system() == "Darwin" else size
if size > limit_bytes:
raise web.HTTPError(
400,
f"""
Can't copy folders larger than {self.max_copy_folder_size_mb}MB,
"{path}" is {await self._human_readable_size(size)}
""",
)

async def _get_dir_size(self, path: str = ".") -> str:
"""
calls the command line program du to get the directory size
"""
try:
if platform.system() == "Darwin":
# retuns the size of the folder in KB
result = subprocess.run(["du", "-sk", path], capture_output=True).stdout.split()
else:
result = subprocess.run(
["du", "-s", "--block-size=1", path], capture_output=True
).stdout.split()

self.log.info(f"current status of du command {result}")
size = result[0].decode("utf-8")
except Exception as err:
self.log.error(f"Error during directory copy: {err}")
raise web.HTTPError(
400,
f"""
Unexpected error during copy operation,
not able to get the size of the {path} directory
""",
) from err
return size

async def _human_readable_size(self, size: int) -> str:
"""
returns folder size in a human readable format
"""
if size == 0:
return "0 Bytes"

units = ["Bytes", "KB", "MB", "GB", "TB", "PB"]
order = int(math.log2(size) / 10) if size else 0

return "{:.4g} {}".format(size / (1 << (order * 10)), units[order])
16 changes: 15 additions & 1 deletion jupyter_server/services/contents/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,19 @@
from nbformat import validate as validate_nb
from nbformat.v4 import new_notebook
from tornado.web import HTTPError, RequestHandler
from traitlets import Any, Bool, Dict, Instance, List, TraitError, Type, Unicode, default, validate
from traitlets import (
Any,
Bool,
Dict,
Instance,
Int,
List,
TraitError,
Type,
Unicode,
default,
validate,
)
from traitlets.config.configurable import LoggingConfigurable

from jupyter_server import DEFAULT_EVENTS_SCHEMA_PATH, JUPYTER_SERVER_EVENTS_URI
Expand Down Expand Up @@ -121,6 +133,8 @@ def _notary_default(self):
""",
)

max_copy_folder_size_mb = Int(500, config=True, help="The max folder size that can be copied")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, perhaps this trait should be on FileManager, since it isn't implemented in the more generic ContentsManager. Anyone have any thoughts?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes - I completely agree - good catch.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will make sure to make that change @blink1073.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@blink1073, when you say FileManager do you mean the FileContentsManager or the LargeFileManager?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi @kenyaachon - yes, FileContentsManager is the appropriate location for the trait since it's the "most base" class for the file-based contents managers. Sorry for the confusion.


untitled_notebook = Unicode(
_i18n("Untitled"),
config=True,
Expand Down
33 changes: 21 additions & 12 deletions tests/services/contents/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -494,6 +494,27 @@ async def test_copy(jp_fetch, contents, contents_dir, _check_created):
_check_created(r, str(contents_dir), path, copy3, type="notebook")


async def test_copy_dir(jp_fetch, contents, contents_dir, _check_created):
# created a nest copy of a the original folder
dest_dir = "foo"
path = "parent"
response = await jp_fetch(
"api", "contents", path, method="POST", body=json.dumps({"copy_from": dest_dir})
)

_check_created(response, str(contents_dir), path, dest_dir, type="directory")

# copy to a folder where a similar name exists
dest_dir = "foo"
path = "parent"
copy_dir = f"{dest_dir}-Copy1"
response = await jp_fetch(
"api", "contents", path, method="POST", body=json.dumps({"copy_from": dest_dir})
)

_check_created(response, str(contents_dir), path, copy_dir, type="directory")


async def test_copy_path(jp_fetch, contents, contents_dir, _check_created):
path1 = "foo"
path2 = "å b"
Expand Down Expand Up @@ -577,18 +598,6 @@ async def test_copy_put_400_hidden(
assert expected_http_error(e, 400)


async def test_copy_dir_400(jp_fetch, contents, contents_dir, _check_created):
with pytest.raises(tornado.httpclient.HTTPClientError) as e:
await jp_fetch(
"api",
"contents",
"foo",
method="POST",
body=json.dumps({"copy_from": "å b"}),
)
assert expected_http_error(e, 400)


@pytest.mark.skipif(sys.platform == "win32", reason="Disabled copying hidden files on Windows")
async def test_copy_400_hidden(
jp_fetch,
Expand Down
Loading