From 21204f17c701e32269897d9154c6807cbd9deb0a Mon Sep 17 00:00:00 2001 From: Adriane Boyd Date: Fri, 25 Nov 2022 13:00:57 +0100 Subject: [PATCH] Add smart_open requirement, update deprecated options (#11864) * Switch from deprecated `ignore_ext` to `compression` * Add upload/download test for local files --- requirements.txt | 1 + setup.cfg | 3 ++- spacy/cli/_util.py | 2 +- spacy/tests/test_cli.py | 18 +++++++++++++++++- 4 files changed, 21 insertions(+), 3 deletions(-) diff --git a/requirements.txt b/requirements.txt index ca4099be5bc..96523041bfe 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,6 +12,7 @@ srsly>=2.4.1,<3.0.0 catalogue>=2.0.6,<2.1.0 typer>=0.3.0,<0.5.0 pathy>=0.3.5 +smart-open>=5.2.1,<7.0.0 # Third party dependencies numpy>=1.15.0 requests>=2.13.0,<3.0.0 diff --git a/setup.cfg b/setup.cfg index 586a044fff0..8a5e054b538 100644 --- a/setup.cfg +++ b/setup.cfg @@ -51,9 +51,10 @@ install_requires = wasabi>=0.8.1,<1.1.0 srsly>=2.4.1,<3.0.0 catalogue>=2.0.6,<2.1.0 + # Third-party dependencies typer>=0.3.0,<0.5.0 pathy>=0.3.5 - # Third-party dependencies + smart-open>=5.2.1,<7.0.0 tqdm>=4.38.0,<5.0.0 numpy>=1.15.0 requests>=2.13.0,<3.0.0 diff --git a/spacy/cli/_util.py b/spacy/cli/_util.py index fb680d888a2..1b9740d0bdb 100644 --- a/spacy/cli/_util.py +++ b/spacy/cli/_util.py @@ -358,7 +358,7 @@ def download_file(src: Union[str, "Pathy"], dest: Path, *, force: bool = False) if dest.exists() and not force: return None src = str(src) - with smart_open.open(src, mode="rb", ignore_ext=True) as input_file: + with smart_open.open(src, mode="rb", compression="disable") as input_file: with dest.open(mode="wb") as output_file: output_file.write(input_file.read()) diff --git a/spacy/tests/test_cli.py b/spacy/tests/test_cli.py index 9d3f1ee71dd..e64d273c504 100644 --- a/spacy/tests/test_cli.py +++ b/spacy/tests/test_cli.py @@ -12,6 +12,7 @@ from spacy.cli._util import parse_config_overrides, string_to_list from spacy.cli._util import substitute_project_variables from spacy.cli._util import validate_project_commands +from spacy.cli._util import upload_file, download_file from spacy.cli.debug_data import _compile_gold, _get_labels_from_model from spacy.cli.debug_data import _get_labels_from_spancat from spacy.cli.download import get_compatibility, get_version @@ -719,4 +720,19 @@ def test_debug_data_compile_gold(): ref = Doc(nlp.vocab, words=["Token", ".", "New York City"], sent_starts=[True, False, True], ents=["O", "B-ENT", "I-ENT"]) eg = Example(pred, ref) data = _compile_gold([eg], ["ner"], nlp, True) - assert data["boundary_cross_ents"] == 1 \ No newline at end of file + assert data["boundary_cross_ents"] == 1 + + +def test_upload_download_local_file(): + with make_tempdir() as d1, make_tempdir() as d2: + filename = "f.txt" + content = "content" + local_file = d1 / filename + remote_file = d2 / filename + with local_file.open(mode="w") as file_: + file_.write(content) + upload_file(local_file, remote_file) + local_file.unlink() + download_file(remote_file, local_file) + with local_file.open(mode="r") as file_: + assert file_.read() == content