Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

v 0.1.3 #84

Merged
merged 12 commits into from
Sep 18, 2024
2 changes: 0 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ def load_requirements(filename: str) -> list[str]:
return []


REQUIRED = []
TEST_REQUIRED = load_requirements(REQUIREMENTS_TEST_FILENAME)
DEV_REQUIRED = load_requirements(REQUIREMENTS_DEV_FILENAME)

Expand All @@ -27,6 +26,5 @@ def load_requirements(filename: str) -> list[str]:


setup(
install_requires=REQUIRED,
extras_require=EXTRAS,
)
93 changes: 36 additions & 57 deletions src/nbmetaclean/app_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,53 +58,33 @@
)


def check_ec(nb_files: list[Path], strict: bool, no_exec: bool) -> list[Path]:
"""Check notebooks for correct sequence of execution_count and errors in outputs."""
wrong_ec = []
for nb in nb_files:
result = check_nb_ec(
read_nb(nb),
strict,
no_exec,
)
if not result:
wrong_ec.append(nb)

return wrong_ec


def check_errors(nb_files: list[Path]) -> list[Path]:
"""Check notebooks for errors in outputs."""
nb_errors = []
for nb in nb_files:
result = check_nb_errors(read_nb(nb))
if not result:
nb_errors.append(nb)

return nb_errors


def check_warnings(nb_files: list[Path]) -> list[Path]:
"""Check notebooks for warnings in outputs."""
nb_warnings = []
for nb in nb_files:
result = check_nb_warnings(read_nb(nb))
if not result:
nb_warnings.append(nb)

return nb_warnings


def print_results(
def print_error(
nbs: list[Path],
message: str,
) -> None:
"""Print results."""
"""Print error message."""
print(f"{len(nbs)} notebooks with {message}:")
for nb in nbs:
print("- ", nb)


def print_results(
wrong_ec: list[Path],
nb_errors: list[Path],
nb_warnings: list[Path],
read_error: list[Path],
) -> None:
"""Print results."""
if wrong_ec:
print_error(wrong_ec, "wrong execution_count")
if nb_errors:
print_error(nb_errors, "errors in outputs")
if nb_warnings:
print_error(nb_warnings, "warnings in outputs")
if read_error:
print_error(read_error, "read error")


def app_check() -> None:
"""Check notebooks for correct sequence of execution_count and errors in outputs."""
cfg = parser.parse_args()
Expand All @@ -123,32 +103,31 @@ def app_check() -> None:
sys.exit(1)

nb_files = get_nb_names_from_list(cfg.path)
read_error: list[Path] = []
if cfg.verbose:
print(f"Checking {len(nb_files)} notebooks.")

check_passed = True
if cfg.ec:
wrong_ec = check_ec(nb_files, not cfg.not_strict, cfg.no_exec)

if wrong_ec:
print_results(wrong_ec, "wrong execution_count")
check_passed = False
wrong_ec: list[Path] = []
nb_errors: list[Path] = []
nb_warnings: list[Path] = []
for nb_name in nb_files:
nb = read_nb(nb_name)
if nb is None:
read_error.append(nb_name)
continue

if cfg.err:
nb_errors = check_errors(nb_files)
if cfg.ec and not check_nb_ec(nb, not cfg.not_strict, cfg.no_exec):
wrong_ec.append(nb_name)

if nb_errors:
print_results(nb_errors, "errors in outputs")
check_passed = False
if cfg.err and not check_nb_errors(nb):
nb_errors.append(nb_name)

if cfg.warn:
nb_warnings = check_warnings(nb_files)
if cfg.warn and not check_nb_warnings(nb):
nb_warnings.append(nb_name)

if nb_warnings:
print_results(nb_warnings, "warnings in outputs")
check_passed = False
print_results(wrong_ec, nb_errors, nb_warnings, read_error)

if not check_passed:
if wrong_ec or nb_errors or nb_warnings or read_error:
sys.exit(1)


Expand Down
17 changes: 12 additions & 5 deletions src/nbmetaclean/app_clean.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,9 +99,9 @@ def process_mask(mask: Union[list[str], None]) -> Union[tuple[TupleStr, ...], No

def print_result(
cleaned: list[Path],
errors: list[tuple[Path, Exception]],
errors: list[Path],
clean_config: CleanConfig,
path: list[Path],
path: list[str],
num_nbs: int,
) -> None:
if clean_config.verbose:
Expand All @@ -118,8 +118,8 @@ def print_result(
print("- ", nb)
if errors:
print(f"with errors: {len(errors)}")
for nb, exc in errors:
print(f"{nb}: {exc}")
for nb in errors:
print("- ", nb)


def app_clean() -> None:
Expand All @@ -143,13 +143,20 @@ def app_clean() -> None:
dry_run=cfg.dry_run,
verbose=cfg.verbose if not cfg.silent else False,
)
path_list = cfg.path if isinstance(cfg.path, list) else [cfg.path]
path_list: list[str] = cfg.path if isinstance(cfg.path, list) else [cfg.path]
nb_files = get_nb_names_from_list(path_list, hidden=cfg.clean_hidden_nbs)

cleaned, errors = clean_nb_file(
nb_files,
clean_config,
)
# print(cfg)
if cfg.path == ".": # if running without arguments add some info.
if not nb_files:
print("No notebooks found at current directory.")
sys.exit(0)
elif not cfg.silent and not cleaned and not errors:
print(f"Checked: {len(nb_files)} notebooks. All notebooks are clean.")

if not cfg.silent:
print_result(cleaned, errors, clean_config, path_list, len(nb_files))
Expand Down
16 changes: 8 additions & 8 deletions src/nbmetaclean/clean.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,12 +162,13 @@ def clean_nb(
changed = False
if cfg.clear_nb_metadata and (metadata := nb.get("metadata")):
old_metadata = copy.deepcopy(metadata)
masks = NB_METADATA_PRESERVE_MASKS
if cfg.nb_metadata_preserve_mask:
if not cfg.mask_merge:
masks = cfg.nb_metadata_preserve_mask
else:
masks = cfg.nb_metadata_preserve_mask + masks
masks = cfg.nb_metadata_preserve_mask + NB_METADATA_PRESERVE_MASKS
else:
masks = NB_METADATA_PRESERVE_MASKS
nb["metadata"] = filter_metadata(metadata, masks=masks)
if nb["metadata"] != old_metadata:
changed = True
Expand All @@ -186,7 +187,7 @@ def clean_nb(
def clean_nb_file(
path: Union[Path, list[Path]],
cfg: Optional[CleanConfig] = None,
) -> tuple[list[Path], list[tuple[Path, Exception]]]:
) -> tuple[list[Path], list[Path]]:
"""Clean metadata and execution count from notebook.

Args:
Expand All @@ -200,12 +201,11 @@ def clean_nb_file(
if not isinstance(path, list):
path = [path]
cleaned: list[Path] = []
errors: list[tuple[Path, Exception]] = []
errors: list[Path] = []
for filename in path:
try:
nb = read_nb(filename)
except Exception as ex:
errors.append((filename, ex))
nb = read_nb(filename)
if nb is None:
errors.append(filename)
continue
result = clean_nb(
nb,
Expand Down
15 changes: 11 additions & 4 deletions src/nbmetaclean/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,23 @@
]


def read_nb(path: PathOrStr) -> Nb:
def read_nb(path: PathOrStr) -> Nb | None:
"""Read notebook from filename.

If file does not exist or is not a valid notebook, return None.
Args:
path (Union[str, PosixPath): Notebook filename.

Returns:
Notebook: Jupyter Notebook as dict.
Notebook Union[None, Notebook]: Jupyter Notebook as dict or None if not valid or does not exist.
"""
return json.load(open(path, "r", encoding="utf-8"))
nb_path = Path(path)
if not nb_path.exists() or not nb_path.is_file():
return None
try:
nb = json.load(open(nb_path, "r", encoding="utf-8"))
return nb
except Exception:
return None


def write_nb(
Expand Down
2 changes: 1 addition & 1 deletion src/nbmetaclean/version.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
__version__ = "0.1.2" # pragma: no cover
__version__ = "0.1.3" # pragma: no cover

__all__ = ["__version__"] # pragma: no cover
20 changes: 19 additions & 1 deletion tests/test_app_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
from pathlib import Path
import subprocess

import pytest

from nbmetaclean.helpers import read_nb, write_nb
from nbmetaclean.version import __version__

Expand Down Expand Up @@ -150,8 +152,11 @@ def test_check_nb_ec(tmp_path: Path):

def test_check_nb_errors(tmp_path: Path):
"""test check `--err` option."""
test_nb_path = tmp_path / nb_name
nb_name = "test_nb_3_ec.ipynb"
test_nb = read_nb(example_nbs_path / nb_name)
assert test_nb is not None

test_nb_path = tmp_path / nb_name
write_nb(test_nb, test_nb_path)
res_out, res_err = run_app(test_nb_path, ["--err"])
assert not res_out
Expand Down Expand Up @@ -199,3 +204,16 @@ def test_check_app_version():
res_out, res_err = run_app("-v")
assert res_out == f"nbcheck from nbmetaclean, version: {__version__}\n"
assert not res_err


@pytest.mark.parametrize("arg", ["--ec", "--err", "--warn"])
def test_check_app_read_error(tmp_path: Path, arg: str):
"""test check_app with wrong nb file."""
test_nb_path = tmp_path / "test_nb.ipynb"
with open(test_nb_path, "w") as fh:
fh.write("")

res_out, res_err = run_app(test_nb_path, [arg])
assert res_out.startswith("1 notebooks with read error:\n")
assert res_out.endswith("test_nb.ipynb\n")
assert not res_err
47 changes: 39 additions & 8 deletions tests/test_app_clean.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,26 +5,57 @@
import subprocess

from nbmetaclean.helpers import read_nb, write_nb
from nbmetaclean.version import __version__


def run_app(
nb_path: Path,
nb_path: Path | list[Path] | None = None,
args: list[str] = [],
cwd: Path | None = None,
) -> tuple[str, str]:
"""run app"""
if isinstance(nb_path, Path):
args.insert(0, str(nb_path))
elif isinstance(nb_path, list):
args = [str(nb) for nb in nb_path] + args

run_result = subprocess.run(
["python", "-m", "nbmetaclean.app_clean", str(nb_path), *args],
["python", "-m", "nbmetaclean.app_clean", *args],
capture_output=True,
check=False,
cwd=cwd,
)
return run_result.stdout.decode("utf-8"), run_result.stderr.decode("utf-8")


example_nbs_path = Path("tests/test_nbs")

# this test conflict with coverage - need to be fixed
# def test_app_clean_no_args(tmp_path: Path) -> None:
# """test app_clean with no args"""
# res_out, res_err = run_app(cwd=tmp_path)
# assert res_out == "No notebooks found at current directory.\n"
# assert not res_err

# # prepare test clean notebook
# nb_name_clean = "test_nb_2_clean.ipynb"
# test_nb = read_nb(example_nbs_path / nb_name_clean)
# test_nb_path = tmp_path / nb_name_clean
# write_nb(test_nb, test_nb_path)

# res_out, res_err = run_app(cwd=tmp_path)
# assert res_out == "Checked: 1 notebooks. All notebooks are clean.\n"
# assert not res_err

# # add metadata
# test_nb["metadata"]["some key"] = "some value"
# write_nb(test_nb, test_nb_path)

# res_out, res_err = run_app(cwd=tmp_path)
# assert res_out == "cleaned: test_nb_2_clean.ipynb\n"
# assert not res_err


def test_clean_nb_metadata(tmp_path: Path):
def test_clean_nb_metadata(tmp_path: Path) -> None:
"""test clean_nb_metadata"""
nb_name_clean = "test_nb_2_clean.ipynb"
test_nb = read_nb(example_nbs_path / nb_name_clean)
Expand Down Expand Up @@ -178,10 +209,10 @@ def test_clean_nb_wrong_file(tmp_path: Path):

def test_app_clean_version():
"""test check `--version` option."""
res_out, res_err = run_app("--version")
assert res_out == f"nbmetaclean version: {__version__}\n"
res_out, res_err = run_app(args=["--version"])
assert res_out.startswith("nbmetaclean version: ")
assert not res_err

res_out, res_err = run_app("-v")
assert res_out == f"nbmetaclean version: {__version__}\n"
res_out, res_err = run_app(args=["-v"])
assert res_out.startswith("nbmetaclean version: ")
assert not res_err
Loading