From 93ddaee8d9702b2ac5f26bd60fcaad95054aa48c Mon Sep 17 00:00:00 2001 From: ayasyrev Date: Mon, 16 Sep 2024 13:41:34 +0300 Subject: [PATCH 1/7] bump 0.1.3_dev --- src/nbmetaclean/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/nbmetaclean/version.py b/src/nbmetaclean/version.py index 8c437c6..d36cb6b 100644 --- a/src/nbmetaclean/version.py +++ b/src/nbmetaclean/version.py @@ -1,3 +1,3 @@ -__version__ = "0.1.2" # pragma: no cover +__version__ = "0.1.0_dev" # pragma: no cover __all__ = ["__version__"] # pragma: no cover From 82052621662037aabf14d3ad9c8c3a326ada18e5 Mon Sep 17 00:00:00 2001 From: ayasyrev Date: Mon, 16 Sep 2024 16:34:04 +0300 Subject: [PATCH 2/7] refactor read_nb - return None if error. Refactor app_check, add tests. --- src/nbmetaclean/app_check.py | 93 ++++++++++++++---------------------- src/nbmetaclean/app_clean.py | 4 +- src/nbmetaclean/clean.py | 9 ++-- src/nbmetaclean/helpers.py | 15 ++++-- tests/test_app_check.py | 20 +++++++- tests/test_clean.py | 9 ++-- tests/test_read_write.py | 14 ++++++ 7 files changed, 92 insertions(+), 72 deletions(-) diff --git a/src/nbmetaclean/app_check.py b/src/nbmetaclean/app_check.py index 3751aa7..07e2db9 100644 --- a/src/nbmetaclean/app_check.py +++ b/src/nbmetaclean/app_check.py @@ -58,53 +58,33 @@ ) -def check_ec(nb_files: list[Path], strict: bool, no_exec: bool) -> list[Path]: - """Check notebooks for correct sequence of execution_count and errors in outputs.""" - wrong_ec = [] - for nb in nb_files: - result = check_nb_ec( - read_nb(nb), - strict, - no_exec, - ) - if not result: - wrong_ec.append(nb) - - return wrong_ec - - -def check_errors(nb_files: list[Path]) -> list[Path]: - """Check notebooks for errors in outputs.""" - nb_errors = [] - for nb in nb_files: - result = check_nb_errors(read_nb(nb)) - if not result: - nb_errors.append(nb) - - return nb_errors - - -def check_warnings(nb_files: list[Path]) -> list[Path]: - """Check notebooks for warnings in outputs.""" - nb_warnings = [] - for nb in nb_files: - result = check_nb_warnings(read_nb(nb)) - if not result: - nb_warnings.append(nb) - - return nb_warnings - - -def print_results( +def print_error( nbs: list[Path], message: str, ) -> None: - """Print results.""" + """Print error message.""" print(f"{len(nbs)} notebooks with {message}:") for nb in nbs: print("- ", nb) +def print_results( + wrong_ec: list[Path], + nb_errors: list[Path], + nb_warnings: list[Path], + read_error: list[Path], +) -> None: + """Print results.""" + if wrong_ec: + print_error(wrong_ec, "wrong execution_count") + if nb_errors: + print_error(nb_errors, "errors in outputs") + if nb_warnings: + print_error(nb_warnings, "warnings in outputs") + if read_error: + print_error(read_error, "read error") + + def app_check() -> None: """Check notebooks for correct sequence of execution_count and errors in outputs.""" cfg = parser.parse_args() @@ -123,32 +103,31 @@ def app_check() -> None: sys.exit(1) nb_files = get_nb_names_from_list(cfg.path) + read_error: list[Path] = [] if cfg.verbose: print(f"Checking {len(nb_files)} notebooks.") - check_passed = True - if cfg.ec: - wrong_ec = check_ec(nb_files, not cfg.not_strict, cfg.no_exec) - - if wrong_ec: - print_results(wrong_ec, "wrong execution_count") - check_passed = False + wrong_ec: list[Path] = [] + nb_errors: list[Path] = [] + nb_warnings: list[Path] = [] + for nb_name in nb_files: + nb = read_nb(nb_name) + if nb is None: + read_error.append(nb_name) + continue - if cfg.err: - nb_errors = check_errors(nb_files) + if cfg.ec and not check_nb_ec(nb, not cfg.not_strict, cfg.no_exec): + wrong_ec.append(nb_name) - if nb_errors: - print_results(nb_errors, "errors in outputs") - check_passed = False + if cfg.err and not check_nb_errors(nb): + nb_errors.append(nb_name) - if cfg.warn: - nb_warnings = check_warnings(nb_files) + if cfg.warn and not check_nb_warnings(nb): + nb_warnings.append(nb_name) - if nb_warnings: - print_results(nb_warnings, "warnings in outputs") - check_passed = False + print_results(wrong_ec, nb_errors, nb_warnings, read_error) - if not check_passed: + if wrong_ec or nb_errors or nb_warnings or read_error: sys.exit(1) diff --git a/src/nbmetaclean/app_clean.py b/src/nbmetaclean/app_clean.py index fc57f83..4c9d2c9 100644 --- a/src/nbmetaclean/app_clean.py +++ b/src/nbmetaclean/app_clean.py @@ -118,8 +118,8 @@ def print_result( print("- ", nb) if errors: print(f"with errors: {len(errors)}") - for nb, exc in errors: - print(f"{nb}: {exc}") + for nb in errors: + print("- ", nb) def app_clean() -> None: diff --git a/src/nbmetaclean/clean.py b/src/nbmetaclean/clean.py index 7de6f15..d3939a6 100644 --- a/src/nbmetaclean/clean.py +++ b/src/nbmetaclean/clean.py @@ -200,12 +200,11 @@ def clean_nb_file( if not isinstance(path, list): path = [path] cleaned: list[Path] = [] - errors: list[tuple[Path, Exception]] = [] + errors: list[Path] = [] for filename in path: - try: - nb = read_nb(filename) - except Exception as ex: - errors.append((filename, ex)) + nb = read_nb(filename) + if nb is None: + errors.append(filename) continue result = clean_nb( nb, diff --git a/src/nbmetaclean/helpers.py b/src/nbmetaclean/helpers.py index 0290096..4db19ae 100644 --- a/src/nbmetaclean/helpers.py +++ b/src/nbmetaclean/helpers.py @@ -16,16 +16,23 @@ ] -def read_nb(path: PathOrStr) -> Nb: +def read_nb(path: PathOrStr) -> Nb | None: """Read notebook from filename. - + If file does not exist or is not a valid notebook, return None. Args: path (Union[str, PosixPath): Notebook filename. Returns: - Notebook: Jupyter Notebook as dict. + Notebook Union[None, Notebook]: Jupyter Notebook as dict or None if not valid or does not exist. """ - return json.load(open(path, "r", encoding="utf-8")) + path = Path(path) + if not path.exists() or not path.is_file(): + return None + try: + nb = json.load(open(path, "r", encoding="utf-8")) + return nb + except Exception: + return None def write_nb( diff --git a/tests/test_app_check.py b/tests/test_app_check.py index 81c4eb4..080b43b 100644 --- a/tests/test_app_check.py +++ b/tests/test_app_check.py @@ -3,6 +3,8 @@ from pathlib import Path import subprocess +import pytest + from nbmetaclean.helpers import read_nb, write_nb from nbmetaclean.version import __version__ @@ -150,8 +152,11 @@ def test_check_nb_ec(tmp_path: Path): def test_check_nb_errors(tmp_path: Path): """test check `--err` option.""" - test_nb_path = tmp_path / nb_name + nb_name = "test_nb_3_ec.ipynb" test_nb = read_nb(example_nbs_path / nb_name) + assert test_nb is not None + + test_nb_path = tmp_path / nb_name write_nb(test_nb, test_nb_path) res_out, res_err = run_app(test_nb_path, ["--err"]) assert not res_out @@ -199,3 +204,16 @@ def test_check_app_version(): res_out, res_err = run_app("-v") assert res_out == f"nbcheck from nbmetaclean, version: {__version__}\n" assert not res_err + + +@pytest.mark.parametrize("arg", ["--ec", "--err", "--warn"]) +def test_check_app_read_error(tmp_path: Path, arg: str): + """test check_app with wrong nb file.""" + test_nb_path = tmp_path / "test_nb.ipynb" + with open(test_nb_path, "w") as fh: + fh.write("") + + res_out, res_err = run_app(test_nb_path, [arg]) + assert res_out.startswith("1 notebooks with read error:\n") + assert res_out.endswith("test_nb.ipynb\n") + assert not res_err diff --git a/tests/test_clean.py b/tests/test_clean.py index 465df66..c924cd3 100644 --- a/tests/test_clean.py +++ b/tests/test_clean.py @@ -301,21 +301,24 @@ def test_clean_nb_file(tmp_path: Path, capsys: CaptureFixture[str]): def test_clean_nb_file_errors(capsys: CaptureFixture[str], tmp_path: Path): """test clean_nb_file, errors""" + # not existing nb path = tmp_path / "wrong_name" cleaned, errors = clean_nb_file(path) assert len(cleaned) == 0 assert len(errors) == 1 - assert errors[0][0] == path - assert "No such file or directory" in str(errors[0][1]) + assert errors[0] == path captured = capsys.readouterr() assert not captured.out assert not captured.err + + # not valid nb with path.open("w", encoding="utf-8") as fh: fh.write("wrong nb") cleaned, errors = clean_nb_file(path) - assert "wrong_name" in str(errors[0]) assert len(cleaned) == 0 assert len(errors) == 1 + assert errors[0].name == "wrong_name" + captured = capsys.readouterr() assert not captured.out assert not captured.err diff --git a/tests/test_read_write.py b/tests/test_read_write.py index b2b94b5..87ac75f 100644 --- a/tests/test_read_write.py +++ b/tests/test_read_write.py @@ -48,3 +48,17 @@ def test_write_nb(tmp_path: Path): result = write_nb(nb, tmp_path / "test_nb_1", timestamp=timestamp) res_stat = result.stat() assert timestamp == (res_stat.st_atime, res_stat.st_mtime) + + +def test_read_nb_errors(tmp_path: Path): + """test read notebook not exist or invalid""" + # not valid + with open(tmp_path / "test.ipynb", "w", encoding="utf-8") as fh: + fh.write("invalid") + assert read_nb(tmp_path / "test.ipynb") is None + + # not exist + assert read_nb(tmp_path / "test_nb_1.ipynb") is None + + # not file + assert read_nb(tmp_path) is None From 46a7c00598d7a14ccf4657b4bf8d3465dd39dc58 Mon Sep 17 00:00:00 2001 From: ayasyrev Date: Tue, 17 Sep 2024 11:11:17 +0300 Subject: [PATCH 3/7] app_clean refactor output when no args --- src/nbmetaclean/app_clean.py | 9 ++++++- src/nbmetaclean/clean.py | 2 +- tests/test_app_clean.py | 49 ++++++++++++++++++++++++++++++------ 3 files changed, 50 insertions(+), 10 deletions(-) diff --git a/src/nbmetaclean/app_clean.py b/src/nbmetaclean/app_clean.py index 4c9d2c9..1e32763 100644 --- a/src/nbmetaclean/app_clean.py +++ b/src/nbmetaclean/app_clean.py @@ -99,7 +99,7 @@ def process_mask(mask: Union[list[str], None]) -> Union[tuple[TupleStr, ...], No def print_result( cleaned: list[Path], - errors: list[tuple[Path, Exception]], + errors: list[Path], clean_config: CleanConfig, path: list[Path], num_nbs: int, @@ -150,6 +150,13 @@ def app_clean() -> None: nb_files, clean_config, ) + # print(cfg) + if cfg.path == ".": # if running without arguments add some info. + if not nb_files: + print("No notebooks found at current directory.") + sys.exit(0) + elif not cfg.silent and not cleaned and not errors: + print(f"Checked: {len(nb_files)} notebooks. All notebooks are clean.") if not cfg.silent: print_result(cleaned, errors, clean_config, path_list, len(nb_files)) diff --git a/src/nbmetaclean/clean.py b/src/nbmetaclean/clean.py index d3939a6..9feda61 100644 --- a/src/nbmetaclean/clean.py +++ b/src/nbmetaclean/clean.py @@ -186,7 +186,7 @@ def clean_nb( def clean_nb_file( path: Union[Path, list[Path]], cfg: Optional[CleanConfig] = None, -) -> tuple[list[Path], list[tuple[Path, Exception]]]: +) -> tuple[list[Path], list[Path]]: """Clean metadata and execution count from notebook. Args: diff --git a/tests/test_app_clean.py b/tests/test_app_clean.py index b32f3cf..d5761dc 100644 --- a/tests/test_app_clean.py +++ b/tests/test_app_clean.py @@ -4,19 +4,27 @@ import subprocess +from pytest import CaptureFixture + from nbmetaclean.helpers import read_nb, write_nb -from nbmetaclean.version import __version__ def run_app( - nb_path: Path, + nb_path: Path | list[Path] | None = None, args: list[str] = [], + cwd: Path | None = None, ) -> tuple[str, str]: """run app""" + if isinstance(nb_path, Path): + args.insert(0, str(nb_path)) + elif isinstance(nb_path, list): + args = [str(nb) for nb in nb_path] + args + run_result = subprocess.run( - ["python", "-m", "nbmetaclean.app_clean", str(nb_path), *args], + ["python", "-m", "nbmetaclean.app_clean", *args], capture_output=True, check=False, + cwd=cwd, ) return run_result.stdout.decode("utf-8"), run_result.stderr.decode("utf-8") @@ -24,7 +32,32 @@ def run_app( example_nbs_path = Path("tests/test_nbs") -def test_clean_nb_metadata(tmp_path: Path): +def test_app_clean_no_args(tmp_path: Path) -> None: + """test app_clean with no args""" + res_out, res_err = run_app(cwd=tmp_path) + assert res_out == "No notebooks found at current directory.\n" + assert not res_err + + # prepare test clean notebook + nb_name_clean = "test_nb_2_clean.ipynb" + test_nb = read_nb(example_nbs_path / nb_name_clean) + test_nb_path = tmp_path / nb_name_clean + write_nb(test_nb, test_nb_path) + + res_out, res_err = run_app(cwd=tmp_path) + assert res_out == "Checked: 1 notebooks. All notebooks are clean.\n" + assert not res_err + + # add metadata + test_nb["metadata"]["some key"] = "some value" + write_nb(test_nb, test_nb_path) + + res_out, res_err = run_app(cwd=tmp_path) + assert res_out == "cleaned: test_nb_2_clean.ipynb\n" + assert not res_err + + +def test_clean_nb_metadata(tmp_path: Path, capsys: CaptureFixture) -> None: """test clean_nb_metadata""" nb_name_clean = "test_nb_2_clean.ipynb" test_nb = read_nb(example_nbs_path / nb_name_clean) @@ -178,10 +211,10 @@ def test_clean_nb_wrong_file(tmp_path: Path): def test_app_clean_version(): """test check `--version` option.""" - res_out, res_err = run_app("--version") - assert res_out == f"nbmetaclean version: {__version__}\n" + res_out, res_err = run_app(args=["--version"]) + assert res_out.startswith("nbmetaclean version: ") assert not res_err - res_out, res_err = run_app("-v") - assert res_out == f"nbmetaclean version: {__version__}\n" + res_out, res_err = run_app(args=["-v"]) + assert res_out.startswith("nbmetaclean version: ") assert not res_err From 69b965a95a74187d711322aaa50eaa8aa070fbfe Mon Sep 17 00:00:00 2001 From: ayasyrev Date: Tue, 17 Sep 2024 12:58:59 +0300 Subject: [PATCH 4/7] mute tests for no args - coverage fail --- tests/test_app_clean.py | 44 ++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 23 deletions(-) diff --git a/tests/test_app_clean.py b/tests/test_app_clean.py index d5761dc..fba727e 100644 --- a/tests/test_app_clean.py +++ b/tests/test_app_clean.py @@ -4,8 +4,6 @@ import subprocess -from pytest import CaptureFixture - from nbmetaclean.helpers import read_nb, write_nb @@ -31,33 +29,33 @@ def run_app( example_nbs_path = Path("tests/test_nbs") +# this test conflict with coverage - need to be fixed +# def test_app_clean_no_args(tmp_path: Path) -> None: +# """test app_clean with no args""" +# res_out, res_err = run_app(cwd=tmp_path) +# assert res_out == "No notebooks found at current directory.\n" +# assert not res_err -def test_app_clean_no_args(tmp_path: Path) -> None: - """test app_clean with no args""" - res_out, res_err = run_app(cwd=tmp_path) - assert res_out == "No notebooks found at current directory.\n" - assert not res_err +# # prepare test clean notebook +# nb_name_clean = "test_nb_2_clean.ipynb" +# test_nb = read_nb(example_nbs_path / nb_name_clean) +# test_nb_path = tmp_path / nb_name_clean +# write_nb(test_nb, test_nb_path) - # prepare test clean notebook - nb_name_clean = "test_nb_2_clean.ipynb" - test_nb = read_nb(example_nbs_path / nb_name_clean) - test_nb_path = tmp_path / nb_name_clean - write_nb(test_nb, test_nb_path) +# res_out, res_err = run_app(cwd=tmp_path) +# assert res_out == "Checked: 1 notebooks. All notebooks are clean.\n" +# assert not res_err - res_out, res_err = run_app(cwd=tmp_path) - assert res_out == "Checked: 1 notebooks. All notebooks are clean.\n" - assert not res_err +# # add metadata +# test_nb["metadata"]["some key"] = "some value" +# write_nb(test_nb, test_nb_path) - # add metadata - test_nb["metadata"]["some key"] = "some value" - write_nb(test_nb, test_nb_path) - - res_out, res_err = run_app(cwd=tmp_path) - assert res_out == "cleaned: test_nb_2_clean.ipynb\n" - assert not res_err +# res_out, res_err = run_app(cwd=tmp_path) +# assert res_out == "cleaned: test_nb_2_clean.ipynb\n" +# assert not res_err -def test_clean_nb_metadata(tmp_path: Path, capsys: CaptureFixture) -> None: +def test_clean_nb_metadata(tmp_path: Path) -> None: """test clean_nb_metadata""" nb_name_clean = "test_nb_2_clean.ipynb" test_nb = read_nb(example_nbs_path / nb_name_clean) From 2a1dc83720e05bcd29d232711a79c11cdb4b1bb8 Mon Sep 17 00:00:00 2001 From: ayasyrev Date: Tue, 17 Sep 2024 16:57:21 +0300 Subject: [PATCH 5/7] Remove empty REQUIRED in setup.py, update version to 0.1.3_dev in version.py --- setup.py | 2 -- src/nbmetaclean/version.py | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/setup.py b/setup.py index f2d7ae3..3dfbad2 100644 --- a/setup.py +++ b/setup.py @@ -14,7 +14,6 @@ def load_requirements(filename: str) -> list[str]: return [] -REQUIRED = [] TEST_REQUIRED = load_requirements(REQUIREMENTS_TEST_FILENAME) DEV_REQUIRED = load_requirements(REQUIREMENTS_DEV_FILENAME) @@ -27,6 +26,5 @@ def load_requirements(filename: str) -> list[str]: setup( - install_requires=REQUIRED, extras_require=EXTRAS, ) diff --git a/src/nbmetaclean/version.py b/src/nbmetaclean/version.py index d36cb6b..6e4e34b 100644 --- a/src/nbmetaclean/version.py +++ b/src/nbmetaclean/version.py @@ -1,3 +1,3 @@ -__version__ = "0.1.0_dev" # pragma: no cover +__version__ = "0.1.3_dev" # pragma: no cover __all__ = ["__version__"] # pragma: no cover From 0796e98854f02c8a78fcdc10793a2084ba0d4de9 Mon Sep 17 00:00:00 2001 From: ayasyrev Date: Wed, 18 Sep 2024 08:50:39 +0300 Subject: [PATCH 6/7] Refactor type hints and improve metadata mask handling in multiple functions. --- src/nbmetaclean/app_clean.py | 4 ++-- src/nbmetaclean/clean.py | 5 +++-- src/nbmetaclean/helpers.py | 6 +++--- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/src/nbmetaclean/app_clean.py b/src/nbmetaclean/app_clean.py index 1e32763..7b7ad3a 100644 --- a/src/nbmetaclean/app_clean.py +++ b/src/nbmetaclean/app_clean.py @@ -101,7 +101,7 @@ def print_result( cleaned: list[Path], errors: list[Path], clean_config: CleanConfig, - path: list[Path], + path: list[str], num_nbs: int, ) -> None: if clean_config.verbose: @@ -143,7 +143,7 @@ def app_clean() -> None: dry_run=cfg.dry_run, verbose=cfg.verbose if not cfg.silent else False, ) - path_list = cfg.path if isinstance(cfg.path, list) else [cfg.path] + path_list: list[str] = cfg.path if isinstance(cfg.path, list) else [cfg.path] nb_files = get_nb_names_from_list(path_list, hidden=cfg.clean_hidden_nbs) cleaned, errors = clean_nb_file( diff --git a/src/nbmetaclean/clean.py b/src/nbmetaclean/clean.py index 9feda61..b7b3c51 100644 --- a/src/nbmetaclean/clean.py +++ b/src/nbmetaclean/clean.py @@ -162,12 +162,13 @@ def clean_nb( changed = False if cfg.clear_nb_metadata and (metadata := nb.get("metadata")): old_metadata = copy.deepcopy(metadata) - masks = NB_METADATA_PRESERVE_MASKS if cfg.nb_metadata_preserve_mask: if not cfg.mask_merge: masks = cfg.nb_metadata_preserve_mask else: - masks = cfg.nb_metadata_preserve_mask + masks + masks = cfg.nb_metadata_preserve_mask + NB_METADATA_PRESERVE_MASKS + else: + masks = NB_METADATA_PRESERVE_MASKS nb["metadata"] = filter_metadata(metadata, masks=masks) if nb["metadata"] != old_metadata: changed = True diff --git a/src/nbmetaclean/helpers.py b/src/nbmetaclean/helpers.py index 4db19ae..78b3ea9 100644 --- a/src/nbmetaclean/helpers.py +++ b/src/nbmetaclean/helpers.py @@ -25,11 +25,11 @@ def read_nb(path: PathOrStr) -> Nb | None: Returns: Notebook Union[None, Notebook]: Jupyter Notebook as dict or None if not valid or does not exist. """ - path = Path(path) - if not path.exists() or not path.is_file(): + nb_path = Path(path) + if not nb_path.exists() or not nb_path.is_file(): return None try: - nb = json.load(open(path, "r", encoding="utf-8")) + nb = json.load(open(nb_path, "r", encoding="utf-8")) return nb except Exception: return None From b7f0466b4bca1b9fbe7d9d0e8404bd1f66022dba Mon Sep 17 00:00:00 2001 From: ayasyrev Date: Wed, 18 Sep 2024 09:01:12 +0300 Subject: [PATCH 7/7] bump 0.1.3 --- src/nbmetaclean/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/nbmetaclean/version.py b/src/nbmetaclean/version.py index 6e4e34b..316d35f 100644 --- a/src/nbmetaclean/version.py +++ b/src/nbmetaclean/version.py @@ -1,3 +1,3 @@ -__version__ = "0.1.3_dev" # pragma: no cover +__version__ = "0.1.3" # pragma: no cover __all__ = ["__version__"] # pragma: no cover