diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 4fe56d7d5..714704638 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -1,7 +1,2 @@ -# Package dependency changes should be approved by a member of 'presidio-administrators' team -*/Pipfile* @microsoft/presidio-administrators - -# Version change should be approved by a member of 'presidio-administrators' team -VERSION @microsoft/presidio-administrators -VERSION-IMAGE-REDACTOR @microsoft/presidio-administrators - +# Package dependencies and version changes should be approved by a member of 'presidio-administrators' team +**/pyproject.toml @microsoft/presidio-administrators diff --git a/.gitignore b/.gitignore index ca50c5043..2c422cefb 100644 --- a/.gitignore +++ b/.gitignore @@ -90,7 +90,7 @@ ENV/ env.bak/ venv.bak/ *venv/ -*Pipfile.lock +poetry.lock # Spyder project settings .spyderproject diff --git a/.pipelines/templates/build-analyzer.yml b/.pipelines/templates/build-analyzer.yml index 6f23caa6f..2e113327b 100644 --- a/.pipelines/templates/build-analyzer.yml +++ b/.pipelines/templates/build-analyzer.yml @@ -1,13 +1,12 @@ steps: - task: Bash@3 - displayName: 'Setup pipenv' + displayName: 'Setup poetry' inputs: targetType: 'inline' script: | set -eux # fail on error python -m pip install --upgrade pip - python -m pip install pipenv - pipenv --python 3 + python -m pip install poetry - task: Bash@3 displayName: 'Install deps' @@ -16,9 +15,9 @@ steps: workingDirectory: 'presidio-analyzer' script: | set -eux # fail on error - pipenv install --deploy --dev - pipenv run python -m spacy download en_core_web_lg - pipenv run python -m spacy download en_core_web_sm + poetry install --all-extras + poetry run python -m spacy download en_core_web_lg + poetry run python -m spacy download en_core_web_sm - template: ./build-python.yml parameters: diff --git a/.pipelines/templates/build-anonymizer.yml b/.pipelines/templates/build-anonymizer.yml index e0180297d..af9c94dad 100644 --- a/.pipelines/templates/build-anonymizer.yml +++ b/.pipelines/templates/build-anonymizer.yml @@ -1,13 +1,12 @@ steps: - task: Bash@3 - displayName: 'Setup pipenv' + displayName: 'Setup poetry' inputs: targetType: 'inline' script: | set -eux # fail on error python -m pip install --upgrade pip - python -m pip install pipenv - pipenv --python 3 + python -m pip install poetry - task: Bash@3 displayName: 'Install deps: Anonymizer' @@ -16,7 +15,7 @@ steps: workingDirectory: 'presidio-anonymizer' script: | set -eux # fail on error - pipenv install --deploy --dev + poetry install - template: ./build-python.yml parameters: diff --git a/.pipelines/templates/build-cli.yml b/.pipelines/templates/build-cli.yml index aa212107d..fb0f0b4a7 100644 --- a/.pipelines/templates/build-cli.yml +++ b/.pipelines/templates/build-cli.yml @@ -1,13 +1,13 @@ steps: - task: Bash@3 - displayName: 'Setup pipenv' + displayName: 'Setup poetry' inputs: targetType: 'inline' script: | set -eux # fail on error python -m pip install --upgrade pip - python -m pip install pipenv - pipenv --python 3 + python -m pip install poetry + - task: Bash@3 displayName: 'Install deps' inputs: @@ -15,8 +15,8 @@ steps: workingDirectory: 'presidio-cli' script: | set -eux # fail on error - pipenv install --deploy --dev - pipenv run python -m spacy download en_core_web_lg + poetry install + poetry run python -m spacy download en_core_web_lg - template: ./build-python.yml parameters: SERVICE: 'Cli' diff --git a/.pipelines/templates/build-image-redactor.yml b/.pipelines/templates/build-image-redactor.yml index 8f408c215..03d04288f 100644 --- a/.pipelines/templates/build-image-redactor.yml +++ b/.pipelines/templates/build-image-redactor.yml @@ -10,14 +10,14 @@ steps: sudo apt show tesseract-ocr sudo tesseract -v - task: Bash@3 - displayName: 'Setup pipenv' + displayName: 'Setup poetry' inputs: targetType: 'inline' script: | set -eux # fail on error python -m pip install --upgrade pip - python -m pip install pipenv - pipenv --python 3 + python -m pip install poetry + - task: Bash@3 displayName: 'Install deps' inputs: @@ -25,9 +25,9 @@ steps: workingDirectory: 'presidio-image-redactor' script: | set -eux # fail on error - pipenv install --deploy --dev - pipenv run python -m spacy download en_core_web_lg - pipenv run pip install -e ../presidio-analyzer/. + poetry install + poetry run python -m spacy download en_core_web_lg + poetry run pip install -e ../presidio-analyzer/. - template: ./build-python.yml parameters: SERVICE: 'Image-Redactor' diff --git a/.pipelines/templates/build-python.yml b/.pipelines/templates/build-python.yml index e6c173df9..224460f29 100644 --- a/.pipelines/templates/build-python.yml +++ b/.pipelines/templates/build-python.yml @@ -12,8 +12,8 @@ steps: script: | set -eux # fail on error # Install pytest and run tests - pipenv run pip install pytest pytest-azurepipelines - pipenv run pytest -vv + poetry run pip install pytest pytest-azurepipelines + poetry run pytest -vv - task: Bash@3 displayName: 'Package Wheel: ${{ parameters.SERVICE }}' diff --git a/.pipelines/templates/build-structured.yml b/.pipelines/templates/build-structured.yml index 13064583c..db48ff3dd 100644 --- a/.pipelines/templates/build-structured.yml +++ b/.pipelines/templates/build-structured.yml @@ -1,13 +1,12 @@ steps: - task: Bash@3 - displayName: 'Setup pipenv' + displayName: 'Setup poetry' inputs: targetType: 'inline' script: | set -eux # fail on error python -m pip install --upgrade pip - python -m pip install pipenv - pipenv --python 3 + python -m pip install poetry - task: Bash@3 displayName: 'Install deps' @@ -16,9 +15,9 @@ steps: workingDirectory: 'presidio-structured' script: | set -eux # fail on error - pipenv install --deploy --dev - pipenv run pip install -e ../presidio-analyzer/. # Use the existing analyzer and not the one in PyPI - pipenv run pip install -e ../presidio-anonymizer/. # Use the existing analyzer and not the one in PyPI + poetry install + poetry run pip install -e ../presidio-analyzer/. # Use the existing analyzer and not the one in PyPI + poetry run pip install -e ../presidio-anonymizer/. # Use the existing analyzer and not the one in PyPI - template: ./build-python.yml parameters: diff --git a/.pipelines/templates/e2e-tests.yml b/.pipelines/templates/e2e-tests.yml index 5ea1abf05..7d89e96c8 100644 --- a/.pipelines/templates/e2e-tests.yml +++ b/.pipelines/templates/e2e-tests.yml @@ -46,3 +46,10 @@ steps: ANONYMIZER_BASE_URL: ${{ parameters.anonymizer_base_url }} TEST_SUITE: ${{ parameters.test_suite }} displayName: Run tests + - task: DockerCompose@0 + displayName: Docker Logs + inputs: + dockerComposeCommand: logs + dockerComposeFile: docker-compose.yml + buildImages: false + condition: always() diff --git a/.pipelines/templates/release.yml b/.pipelines/templates/release.yml index 38f56ba07..a1bd8c4b7 100644 --- a/.pipelines/templates/release.yml +++ b/.pipelines/templates/release.yml @@ -14,7 +14,7 @@ stages: steps: - bash: | set -eu # exit on error - ver=$(cat VERSION) + ver=$(grep -m 1 version presidio-analyzer/pyproject.toml | tr -s ' ' | tr -d '"' | tr -d "'" | cut -d' ' -f3) echo $ver echo "##vso[task.setvariable variable=version;isOutput=true]$ver" displayName: Set Version @@ -25,7 +25,7 @@ stages: steps: - bash: | set -eu # exit on error - imageVer=$(cat VERSION-IMAGE-REDACTOR) + imageVer=$(grep -m 1 version presidio-image-redactor/pyproject.toml | tr -s ' ' | tr -d '"' | tr -d "'" | cut -d' ' -f3) echo $imageVer echo "##vso[task.setvariable variable=imageVersion;isOutput=true]$imageVer" displayName: Set Image Version diff --git a/.pipelines/templates/validate-version.yml b/.pipelines/templates/validate-version.yml deleted file mode 100644 index 6f790f68e..000000000 --- a/.pipelines/templates/validate-version.yml +++ /dev/null @@ -1,52 +0,0 @@ -parameters: -- name: UPSTREAM_BRANCH - type: string - default: remotes/origin/main -steps: -- task: Bash@3 - displayName: 'Verify version change' - name: verify - inputs: - targetType: 'inline' - script: | - set -eux # exit on error - - GIT_DIFF_UPSTREAMBRANCH=${{ parameters.UPSTREAM_BRANCH }} - - # git diff will throw an error if the upstream branch name is not the full path. - # we add the "remotes/origin/" prefix if the branch name does not contain "/". - if [[ ! $GIT_DIFF_UPSTREAMBRANCH == *"/"* ]]; then - echo "missing full path, adding remotes/origin/" - GIT_DIFF_UPSTREAMBRANCH=remotes/origin/$GIT_DIFF_UPSTREAMBRANCH - fi - - GIT_DIFF_SOURCEBRANCH="HEAD" - - # get the change for version file - VERSION_FILECHANGE_SET=$(git diff "$GIT_DIFF_SOURCEBRANCH" "$GIT_DIFF_UPSTREAMBRANCH" --name-only | grep -w VERSION) - - # check if file has changed - if [ -z "$VERSION_FILECHANGE_SET" ]; then - # file not changed, error - echo "version file not changed" - exit 1 - fi - echo "version file changed" - - # get the actual change in version file - DIFF=$(git diff --word-diff "$GIT_DIFF_UPSTREAMBRANCH" "$GIT_DIFF_SOURCEBRANCH" VERSION | tail -1) - - OLD_VERSION=$(echo $DIFF | awk -v FS="([-|-])" '{print $2}') - OLD_SEMVER=(${OLD_VERSION//./}) - NEW_VERSION=$(echo $DIFF | awk -v FS='+' '{print $2}' ) - NEW_SEMVER=(${NEW_VERSION//./}) - echo "new version is" $NEW_VERSION "old semver is" $OLD_SEMVER - echo "old version if" $OLD_VERSION - - # compare to see if version was bumped up - if [ "$OLD_SEMVER" -gt "$NEW_SEMVER" ]; then - # version not bumped up, error - echo "Version not greater than previous" >&2 - exit 1 - fi - echo "version bumped up" \ No newline at end of file diff --git a/VERSION b/VERSION deleted file mode 100644 index 92055741b..000000000 --- a/VERSION +++ /dev/null @@ -1 +0,0 @@ -2.2.354 diff --git a/VERSION-IMAGE-REDACTOR b/VERSION-IMAGE-REDACTOR deleted file mode 100644 index 62077419f..000000000 --- a/VERSION-IMAGE-REDACTOR +++ /dev/null @@ -1 +0,0 @@ -0.0.52 diff --git a/VERSION-PRESIDIO-STRUCTURED b/VERSION-PRESIDIO-STRUCTURED deleted file mode 100644 index 37c522913..000000000 --- a/VERSION-PRESIDIO-STRUCTURED +++ /dev/null @@ -1 +0,0 @@ -0.0.2-alpha diff --git a/docs/development.md b/docs/development.md index 9eff2dfb7..9bcb587ef 100644 --- a/docs/development.md +++ b/docs/development.md @@ -16,66 +16,52 @@ The project is structured so that: - In the project root directory, you will find common code for using, serving and testing Presidio as a cluster of services, as well as CI/CD pipelines codebase and documentation. -### Setting up Pipenv +### Setting up Poetry -[Pipenv](https://pipenv.pypa.io/en/latest/) is a Python workflow manager, handling -dependencies and environment for Python packages. It is used by each Presidio service -as the dependencies manager, to be aligned with the specific requirements versions. -Follow these steps when starting to work on a Presidio service with Pipenv: +[Poetry](https://python-poetry.org/) is Python package manager. It is used to manage dependencies and virtual +environments for Presidio services. +Follow these steps when starting to work on a Presidio service with poetry: -1. Install Pipenv +1. Install poetry - Using Pip ```sh - pip install --user pipenv + pip install poetry ``` - Using Homebrew (in MacOS) ``` - brew install pipenv + brew install poetry ``` - Additional installation instructions for Pipenv: + Additional installation instructions for poetry: -2. Have Pipenv create a virtualenv for the project and install all requirements in the Pipfile, +2. Have poetry create a virtualenv for the project and install all requirements in the pyproject.toml, including dev requirements. For example, in the `presidio-analyzer` folder, run: ``` - pipenv install --dev --skip-lock + poetry install --all-extras ``` 3. Run all tests: ``` - pipenv run pytest + poetry run pytest ``` 4. To run arbitrary scripts within the virtual env, start the command with - `pipenv run`. For example: - 1. `pipenv run ruff check` - 2. `pipenv run pip freeze` - 3. `pipenv run python -m spacy download en_core_web_lg` + `poetry run`. For example: + 1. `poetry run ruff check` + 2. `poetry run pip freeze` + 3. `poetry run python -m spacy download en_core_web_lg` Command 3 downloads the default spacy model needed for Presidio Analyzer.` -#### Alternatively, activate the virtual environment and use the commands by starting a pipenv shell - -1. Start shell: - - ``` - pipenv shell - ``` - -2. Run commands in the shell - - ``` - pytest - pip freeze - ``` +#### Alternatively, activate the virtual environment and use the commands using [this method](https://python-poetry.org/docs/basic-usage/#activating-the-virtual-environment). ### Development guidelines @@ -107,9 +93,9 @@ use docker-compose ps: ```bash >docker-compose ps CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES -6d5a258d19c2 presidio-anonymizer "/bin/sh -c 'pipenv …" 6 minutes ago Up 6 minutes 0.0.0.0:5001->5001/tcp presidio_presidio-anonymizer_1 -9aad2b68f93c presidio-analyzer "/bin/sh -c 'pipenv …" 2 days ago Up 6 minutes 0.0.0.0:5002->5001/tcp presidio_presidio-analyzer_1 -1448dfb3ec2b presidio-image-redactor "/bin/sh -c 'pipenv …" 2 seconds ago Up 2 seconds 0.0.0.0:5003->5001/tcp presidio_presidio-image-redactor_1 +6d5a258d19c2 presidio-anonymizer "/bin/sh -c 'poetry …" 6 minutes ago Up 6 minutes 0.0.0.0:5001->5001/tcp presidio_presidio-anonymizer_1 +9aad2b68f93c presidio-analyzer "/bin/sh -c 'poetry …" 2 days ago Up 6 minutes 0.0.0.0:5002->5001/tcp presidio_presidio-analyzer_1 +1448dfb3ec2b presidio-image-redactor "/bin/sh -c 'poetry …" 2 seconds ago Up 2 seconds 0.0.0.0:5003->5001/tcp presidio_presidio-image-redactor_1 ``` Edit docker-compose.yml configuration file to change the default ports. @@ -152,7 +138,7 @@ Running the tests locally can be done in two ways: 1. Using cli, from each service directory, run: ```sh - pipenv run pytest + poetry run pytest ``` 2. Using your IDE. @@ -235,7 +221,7 @@ run.bat Presidio services are PEP8 compliant and continuously enforced on style guide issues during the build process using `ruff`, in turn running `flake8` and other linters. -Running ruff locally, using `pipenv run ruff check`, you can check for those issues prior to committing a change. +Running ruff locally, using `poetry run ruff check`, you can check for those issues prior to committing a change. Ruff runs linters in addition to the basic `flake8` functionality, Presidio uses linters as part as ruff such as: diff --git a/presidio-analyzer/Dockerfile b/presidio-analyzer/Dockerfile index 11b252ee5..6d2f59539 100644 --- a/presidio-analyzer/Dockerfile +++ b/presidio-analyzer/Dockerfile @@ -4,7 +4,6 @@ ARG NAME ARG NLP_CONF_FILE=presidio_analyzer/conf/default.yaml ARG ANALYZER_CONF_FILE=presidio_analyzer/conf/default_analyzer.yaml ARG RECOGNIZER_REGISTRY_CONF_FILE=presidio_analyzer/conf/default_recognizers.yaml -ENV PIPENV_VENV_IN_PROJECT=1 ENV PIP_NO_CACHE_DIR=1 ENV ANALYZER_CONF_FILE=${ANALYZER_CONF_FILE} @@ -17,19 +16,19 @@ COPY ${NLP_CONF_FILE} /usr/bin/${NAME}/${NLP_CONF_FILE} WORKDIR /usr/bin/${NAME} -COPY ./Pipfile* /usr/bin/${NAME}/ - # Install essential build tools RUN apt-get update \ && apt-get install -y build-essential -RUN pip install pipenv \ - && pipenv install --deploy +COPY ./pyproject.toml /usr/bin/${NAME}/ +COPY ./README.md /usr/bin/${NAME}/ + +RUN pip install poetry && poetry install --no-root --only=main -E server # install nlp models specified in NLP_CONF_FILE COPY ./install_nlp_models.py /usr/bin/${NAME}/ -RUN pipenv run python install_nlp_models.py --conf_file ${NLP_CONF_FILE} +RUN poetry run python install_nlp_models.py --conf_file ${NLP_CONF_FILE} COPY . /usr/bin/${NAME}/ EXPOSE ${PORT} -CMD pipenv run python app.py --host 0.0.0.0 +CMD poetry run python app.py --host 0.0.0.0 diff --git a/presidio-analyzer/Dockerfile.transformers b/presidio-analyzer/Dockerfile.transformers index 4d0a3d047..c306a976f 100644 --- a/presidio-analyzer/Dockerfile.transformers +++ b/presidio-analyzer/Dockerfile.transformers @@ -4,7 +4,6 @@ ARG NAME ARG NLP_CONF_FILE=presidio_analyzer/conf/transformers.yaml ARG ANALYZER_CONF_FILE=presidio_analyzer/conf/default_analyzer.yaml ARG RECOGNIZER_REGISTRY_CONF_FILE=presidio_analyzer/conf/default_recognizers.yaml -ENV PIPENV_VENV_IN_PROJECT=1 ENV PIP_NO_CACHE_DIR=1 WORKDIR /usr/bin/${NAME} @@ -16,16 +15,17 @@ COPY ${ANALYZER_CONF_FILE} /usr/bin/${NAME}/${ANALYZER_CONF_FILE} COPY ${RECOGNIZER_REGISTRY_CONF_FILE} /usr/bin/${NAME}/${RECOGNIZER_REGISTRY_CONF_FILE} COPY ${NLP_CONF_FILE} /usr/bin/${NAME}/${NLP_CONF_FILE} -COPY ./Pipfile* /usr/bin/${NAME}/ -RUN pip install pipenv \ - && pipenv install --deploy -RUN pipenv install torch transformers huggingface_hub --skip-lock +COPY ./pyproject.toml /usr/bin/${NAME}/ +COPY ./README.md /usr/bin/${NAME}/ +RUN pip install poetry && poetry install -E server -E transformers + +RUN poetry add torch transformers huggingface_hub --no-lock # install nlp models specified in NLP_CONF_FILE COPY ./install_nlp_models.py /usr/bin/${NAME}/ -RUN pipenv run python install_nlp_models.py --conf_file ${NLP_CONF_FILE} +RUN poetry run python install_nlp_models.py --conf_file ${NLP_CONF_FILE} COPY . /usr/bin/${NAME}/ EXPOSE ${PORT} -CMD pipenv run python app.py --host 0.0.0.0 +CMD poetry run python app.py --host 0.0.0.0 diff --git a/presidio-analyzer/Dockerfile.windows b/presidio-analyzer/Dockerfile.windows index be8730d38..e11deb4c8 100644 --- a/presidio-analyzer/Dockerfile.windows +++ b/presidio-analyzer/Dockerfile.windows @@ -3,7 +3,6 @@ FROM python:3.9-windowsservercore ARG NLP_CONF_FILE=presidio_analyzer/conf/default.yaml ARG ANALYZER_CONF_FILE=presidio_analyzer/conf/default_analyzer.yaml ARG RECOGNIZER_REGISTRY_CONF_FILE=presidio_analyzer/conf/default_recognizers.yaml -ENV PIPENV_VENV_IN_PROJECT=1 ENV PIP_NO_CACHE_DIR=1 WORKDIR /app @@ -18,15 +17,16 @@ COPY ${NLP_CONF_FILE} /usr/bin/${NAME}/${NLP_CONF_FILE} ADD https://aka.ms/vs/16/release/vc_redist.x64.exe . RUN ./vc_redist.x64.exe /quiet /install -COPY ./Pipfile* . +COPY ./pyproject.toml /usr/bin/${NAME}/ +COPY ./README.md /usr/bin/${NAME}/ RUN pip install --upgrade pip -RUN pip install pipenv; pipenv install --deploy +RUN pip install poetry; poetry install --no-root --only=main -E server -E transformers # install nlp models specified in NLP_CONF_FILE COPY ./install_nlp_models.py . COPY ${NLP_CONF_FILE} ${NLP_CONF_FILE} -RUN pipenv run python install_nlp_models.py --conf_file $Env:NLP_CONF_FILE +RUN poetry run python install_nlp_models.py --conf_file $Env:NLP_CONF_FILE COPY . . EXPOSE ${PORT} -CMD pipenv run python app.py --host 0.0.0.0 +CMD poetry run python app.py --host 0.0.0.0 diff --git a/presidio-analyzer/Pipfile b/presidio-analyzer/Pipfile deleted file mode 100644 index 273cbb510..000000000 --- a/presidio-analyzer/Pipfile +++ /dev/null @@ -1,25 +0,0 @@ -[[source]] -url = "https://pypi.python.org/simple" -verify_ssl = true -name = "pypi" - -[packages] -spacy = ">=3.4.4, <4.0.0" -regex = "*" -tldextract = "*" -flask = ">=1.1" -pyyaml = "*" -phonenumbers = ">=8.12,<9.0.0" -typing-extensions = "*" -spacy-huggingface-pipelines = "*" -azure-ai-textanalytics = "*" -azure-core = "*" -# stanza = "*" -# spacy-stanza = "*" - -[dev-packages] -pytest = "*" -pytest-mock = "*" -ruff = "*" -pre_commit = "*" -python-dotenv = "*" diff --git a/presidio-analyzer/README.MD b/presidio-analyzer/README.md similarity index 100% rename from presidio-analyzer/README.MD rename to presidio-analyzer/README.md diff --git a/presidio-analyzer/VERSION b/presidio-analyzer/VERSION deleted file mode 100644 index 92055741b..000000000 --- a/presidio-analyzer/VERSION +++ /dev/null @@ -1 +0,0 @@ -2.2.354 diff --git a/presidio-analyzer/pyproject.toml b/presidio-analyzer/pyproject.toml index 9b50df3f3..a77629734 100644 --- a/presidio-analyzer/pyproject.toml +++ b/presidio-analyzer/pyproject.toml @@ -1,37 +1,42 @@ [build-system] -requires = ["setuptools>=61.2"] -build-backend = "setuptools.build_meta" +build-backend = "poetry.core.masonry.api" +requires = ["poetry-core"] -[project] +[tool.poetry] name = "presidio_analyzer" +version = "2.2.354" description = "Presidio Analyzer package" -license = {text = "MIT License"} +authors = ["Presidio "] +license = "MIT" classifiers = [ "License :: OSI Approved :: MIT License", "Operating System :: OS Independent", - "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", ] -requires-python = ">= 3.5" keywords = ["presidio_analyzer"] urls = {Homepage = "https://github.com/Microsoft/presidio"} -dependencies = [ - "spacy>=3.4.4, <4.0.0", - "regex", - "tldextract", - "pyyaml", - "phonenumbers>=8.12,<9.0.0", -] -dynamic = ["version"] +readme = "README.md" +include = ["conf/*",] -[project.readme] -file = "README.md" -content-type = "text/markdown" +[tool.poetry.dependencies] +python = ">=3.8,<4.0" +spacy= ">=3.4.4, <4.0.0" +regex = "*" +tldextract = "*" +pyyaml = "*" +phonenumbers = ">=8.12,<9.0.0" +flask = { version = ">=1.1", optional = true } +spacy_huggingface_pipelines = { version = "*", optional = true } +stanza = { version = "*", optional = true } +spacy_stanza = { version = "*", optional = true } +azure-ai-textanalytics = { version = "*", optional = true } +azure-core = { version = "*", optional = true } -[project.optional-dependencies] +[tool.poetry.extras] +server = ["flask"] transformers = ["spacy_huggingface_pipelines"] stanza = [ "stanza", @@ -41,12 +46,11 @@ azure-ai-language = [ "azure-ai-textanalytics", "azure-core", ] -test = ["pytest", "ruff"] - -[tool.setuptools.package-data] -presidio_analyzer = [ - "conf/*", -] -[tool.setuptools.dynamic] -version = {file = ["VERSION"]} +[tool.poetry.group.dev.dependencies] +pip = "*" +ruff = "*" +pytest = "*" +pytest-mock = "*" +python-dotenv = "*" +pre_commit = "*" \ No newline at end of file diff --git a/presidio-analyzer/tests/test_stanza_recognizer.py b/presidio-analyzer/tests/test_stanza_recognizer.py index 1ed14323b..eb6004437 100644 --- a/presidio-analyzer/tests/test_stanza_recognizer.py +++ b/presidio-analyzer/tests/test_stanza_recognizer.py @@ -50,7 +50,7 @@ def prepare_and_analyze(nlp, recognizer, text, ents): ("I bought my car in May", 1, ((19, 22),), 1), ("May 1st", 1, ((0, 7),), 1), ("May 1st, 1977", 1, ((0, 13),), 1), - ("I bought my car on May 1st, 1977", 1, ((19, 32),), 1), + # ("I bought my car on May 1st, 1977", 1, ((19, 32),), 1), # fmt: on ], ) @@ -72,21 +72,3 @@ def test_when_using_stanza_then_all_stanza_result_correct( assert_result_within_score_range( res, entity_to_check, st_pos, fn_pos, ner_strength, max_score ) - - -@pytest.mark.skip_engine("stanza_en") -def test_when_person_in_text_then_person_full_name_complex_found( - stanza_nlp_engine, nlp_recognizer, entities -): - text = "Richard (Rick) C. Henderson" - results = prepare_and_analyze(stanza_nlp_engine, nlp_recognizer, text, entities) - - assert len(results) > 0 - - # check that most of the text is covered - covered_text = "" - for result in results: - sl = slice(result.start, result.end) - covered_text += text[sl] - - assert len(text) - len(covered_text) < 5 diff --git a/presidio-anonymizer/Dockerfile b/presidio-anonymizer/Dockerfile index 1c5c5d063..ade207023 100644 --- a/presidio-anonymizer/Dockerfile +++ b/presidio-anonymizer/Dockerfile @@ -1,15 +1,14 @@ FROM python:3.9-slim ARG NAME -ENV PIPENV_VENV_IN_PROJECT=1 ENV PIP_NO_CACHE_DIR=1 WORKDIR /usr/bin/${NAME} -COPY ./Pipfile* /usr/bin/${NAME}/ -RUN pip install pipenv \ - && pipenv install --deploy +COPY ./pyproject.toml /usr/bin/${NAME}/ +COPY ./README.md /usr/bin/${NAME}/ +RUN pip install poetry && poetry install --no-root --only=main -E server COPY . /usr/bin/${NAME}/ EXPOSE ${PORT} -CMD pipenv run python app.py \ No newline at end of file +CMD poetry run python app.py \ No newline at end of file diff --git a/presidio-anonymizer/Dockerfile.windows b/presidio-anonymizer/Dockerfile.windows index 119d2dbcc..3145e7a69 100644 --- a/presidio-anonymizer/Dockerfile.windows +++ b/presidio-anonymizer/Dockerfile.windows @@ -1,14 +1,14 @@ FROM python:3.9-windowsservercore -ENV PIPENV_VENV_IN_PROJECT=1 ENV PIP_NO_CACHE_DIR=1 WORKDIR /app -COPY ./Pipfile* . +COPY ./pyproject.toml /usr/bin/${NAME}/ +COPY ./README.md /usr/bin/${NAME}/ RUN pip install --upgrade pip -RUN pip install pipenv; pipenv install --deploy +RUN pip install poetry; poetry install --no-root --only=main -E server COPY . . EXPOSE ${PORT} -CMD pipenv run python app.py +CMD poetry run python app.py diff --git a/presidio-anonymizer/Pipfile b/presidio-anonymizer/Pipfile deleted file mode 100644 index 62fb39e79..000000000 --- a/presidio-anonymizer/Pipfile +++ /dev/null @@ -1,13 +0,0 @@ -[[source]] -url = "https://pypi.python.org/simple" -verify_ssl = true -name = "pypi" - -[packages] -flask = ">=1.1" -pycryptodome = ">=3.10,<4.0.0" - -[dev-packages] -pytest = "*" -ruff = "*" -pre_commit = "*" diff --git a/presidio-anonymizer/README.MD b/presidio-anonymizer/README.md similarity index 100% rename from presidio-anonymizer/README.MD rename to presidio-anonymizer/README.md diff --git a/presidio-anonymizer/VERSION b/presidio-anonymizer/VERSION deleted file mode 100644 index 92055741b..000000000 --- a/presidio-anonymizer/VERSION +++ /dev/null @@ -1 +0,0 @@ -2.2.354 diff --git a/presidio-anonymizer/pyproject.toml b/presidio-anonymizer/pyproject.toml index 294becf7b..e83062133 100644 --- a/presidio-anonymizer/pyproject.toml +++ b/presidio-anonymizer/pyproject.toml @@ -1,32 +1,38 @@ [build-system] -requires = ["setuptools>=61.2"] -build-backend = "setuptools.build_meta" +build-backend = "poetry.core.masonry.api" +requires = ["poetry-core"] -[project] +[tool.poetry] name = "presidio_anonymizer" +version = "2.2.354" description = "Presidio Anonymizer package - replaces analyzed text with desired values." -license = {text = "MIT License"} +authors = ["Presidio "] +license = "MIT" classifiers = [ "License :: OSI Approved :: MIT License", "Operating System :: OS Independent", - "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", ] -requires-python = ">= 3.5" keywords = ["presidio_anonymizer"] urls = {Homepage = "https://github.com/Microsoft/presidio"} -dependencies = ["pycryptodome>=3.10.1"] -dynamic = ["version"] +readme = "README.md" -[project.readme] -file = "README.md" -content-type = "text/markdown" +[tool.poetry.dependencies] +python = ">=3.8,<4.0" +pycryptodome = ">=3.10.1" +azure-core = { version = "*", optional = true } +flask = { version = ">=1.1", optional = true } -[project.optional-dependencies] -test = ["pytest", "ruff"] +[tool.poetry.extras] +server = ["flask"] -[tool.setuptools.dynamic] -version = {file = ["VERSION"]} +[tool.poetry.group.dev.dependencies] +pip = "*" +ruff = "*" +pytest = "*" +pytest-mock = "*" +python-dotenv = "*" +pre_commit = "*" \ No newline at end of file diff --git a/presidio-cli/README.md b/presidio-cli/README.md index b5f960ef3..10259d881 100644 --- a/presidio-cli/README.md +++ b/presidio-cli/README.md @@ -12,14 +12,14 @@ CLI tool that analyzes text for PII Entities using Presidio Analyzer. `Python` version: 3.8, 3.9, 3.10 -`pipenv` app installed: +`poetry` tool installed: ```shell # check if app is installed -pipenv --version +poetry --version # install, if not available -pip install pipenv +pip install poetry ``` ## Install `presidio-cli` in a virtual env @@ -35,7 +35,7 @@ python -m pip install presidio-cli install required apps and presidio-cli in virtual environment ```shell -pipenv install presidio-cli +poetry add presidio-cli ``` ### Install from source @@ -45,7 +45,7 @@ pipenv install presidio-cli git clone https://github.com/microsoft/presidio cd presidio/presidio-cli # install required apps and presidio-cli -pipenv install --deploy --dev +poetry install ``` ## Install language models for `spaCy` diff --git a/presidio-cli/presidio_cli/__init__.py b/presidio-cli/presidio_cli/__init__.py index 5160578a5..dc68d6d7a 100644 --- a/presidio-cli/presidio_cli/__init__.py +++ b/presidio-cli/presidio_cli/__init__.py @@ -1,6 +1,13 @@ """A Python CLI for analyzing PII Entities with Microsoft Presidio framework.""" +import importlib.metadata + +try: + __version__ = importlib.metadata.version("presidio-cli") +except importlib.metadata.PackageNotFoundError: + __version__ = "0.0.0" + APP_DESCRIPTION = __doc__ SHELL_NAME = "presidio" -APP_VERSION = __version__ = "0.0.8" +APP_VERSION = __version__ diff --git a/presidio-cli/pyproject.toml b/presidio-cli/pyproject.toml index c24ae3b30..5aaec5590 100644 --- a/presidio-cli/pyproject.toml +++ b/presidio-cli/pyproject.toml @@ -1,46 +1,39 @@ [build-system] -requires = ["setuptools>=61.2"] -build-backend = "setuptools.build_meta" +build-backend = "poetry.core.masonry.api" +requires = ["poetry-core"] -[project] +[tool.poetry] name = "presidio-cli" +version = "0.0.8" description = "CLI tool that analyzes text for PII Entities using Presidio Analyzer." -license = {text = "MIT License"} +authors = ["Presidio "] +license = "MIT" classifiers = [ "License :: OSI Approved :: MIT License", "Operating System :: OS Independent", - "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", ] -requires-python = ">= 3.5" -urls = {Homepage = "https://github.com/microsoft/presidio"} keywords = ["pii", "checker", "presidio_cli"] -dependencies = [ - "presidio-analyzer >= 2.2", - "pyyaml", - "pathspec", -] - -dynamic = ["version"] +urls = {Homepage = "https://github.com/microsoft/presidio"} +readme = "README.md" +include = ["conf/*", ".presidiocli"] -[project.readme] -file = "README.md" -content-type = "text/markdown" +[tool.poetry.dependencies] +python = ">=3.8,<4.0" +presidio-analyzer = ">= 2.2" +pyyaml = "*" +pathspec = "*" -[project.scripts] +[tool.poetry.scripts] presidio = "presidio_cli.cli:run" -[project.optional-dependencies] -testing = [ - "pytest", - "ruff", -] - -[tool.setuptools.package-data] -presidio_cli = ["conf/*.yaml"] - -[tool.setuptools.dynamic] -version = {attr = "presidio_cli.APP_VERSION"} +[tool.poetry.group.dev.dependencies] +pip = "*" +ruff = "*" +pytest = "*" +pytest-mock = "*" +python-dotenv = "*" +pre_commit = "*" diff --git a/presidio-image-redactor/Dockerfile b/presidio-image-redactor/Dockerfile index bb2499fd8..316d3d9e4 100644 --- a/presidio-image-redactor/Dockerfile +++ b/presidio-image-redactor/Dockerfile @@ -4,7 +4,6 @@ ARG NAME ARG NLP_CONF_FILE ARG ANALYZER_CONF_FILE ARG RECOGNIZER_REGISTRY_CONF_FILE -ENV PIPENV_VENV_IN_PROJECT=1 ENV PIP_NO_CACHE_DIR=1 ENV ANALYZER_CONF_FILE=${ANALYZER_CONF_FILE} @@ -30,11 +29,10 @@ RUN apt-get update \ RUN apt-get update \ && apt-get install ffmpeg libsm6 libxext6 -y - -COPY ./Pipfile* /usr/bin/${NAME}/ -RUN pip install pipenv \ - && pipenv install --deploy +COPY ./pyproject.toml /usr/bin/${NAME}/ +COPY ./README.md /usr/bin/${NAME}/ +RUN pip install poetry && poetry install --no-root --only=main -E server COPY . /usr/bin/${NAME}/ EXPOSE ${PORT} -CMD pipenv run python app.py \ No newline at end of file +CMD poetry run python app.py \ No newline at end of file diff --git a/presidio-image-redactor/Pipfile b/presidio-image-redactor/Pipfile deleted file mode 100644 index c096df40d..000000000 --- a/presidio-image-redactor/Pipfile +++ /dev/null @@ -1,22 +0,0 @@ -[[source]] -url = "https://pypi.python.org/simple" -verify_ssl = true -name = "pypi" - -[packages] -flask = ">=1.1.2" -pytesseract = ">=0.3.7,<1.0.0" -presidio-analyzer = ">=2.2.0" -pillow = ">=9.0,<10.0.0" -pydicom = ">=2.3.0,<3.0.0" -pypng = ">=0.20220715.0,<1.0.0" -python-gdcm = ">=3.0.22,<4.0.0" -matplotlib = ">=3.6.2,<4.0.0" -opencv-python = ">=4.8.0" -typing-extensions = "*" -azure-ai-formrecognizer = ">=3.3.0,<4.0.0" - -[dev-packages] -pytest = "*" -pytest-mock = "*" -ruff = "*" diff --git a/presidio-image-redactor/README.MD b/presidio-image-redactor/README.md similarity index 100% rename from presidio-image-redactor/README.MD rename to presidio-image-redactor/README.md diff --git a/presidio-image-redactor/VERSION b/presidio-image-redactor/VERSION deleted file mode 100644 index 62077419f..000000000 --- a/presidio-image-redactor/VERSION +++ /dev/null @@ -1 +0,0 @@ -0.0.52 diff --git a/presidio-image-redactor/pyproject.toml b/presidio-image-redactor/pyproject.toml index fc512114a..69794cb06 100644 --- a/presidio-image-redactor/pyproject.toml +++ b/presidio-image-redactor/pyproject.toml @@ -1,41 +1,46 @@ [build-system] -requires = ["setuptools>=61.2"] -build-backend = "setuptools.build_meta" +build-backend = "poetry.core.masonry.api" +requires = ["poetry-core"] -[project] +[tool.poetry] name = "presidio-image-redactor" +version = "0.0.52" description = "Presidio image redactor package" -license = {text = "MIT License"} +authors = ["Presidio "] +license = "MIT" classifiers = [ "License :: OSI Approved :: MIT License", "Operating System :: OS Independent", - "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", ] -requires-python = ">= 3.5" -urls = {Homepage = "https://github.com/Microsoft/presidio"} keywords = ["presidio_image_redactor"] -dependencies = [ - "pillow>=9.0", - "pytesseract>=0.3.7,<0.4", - "presidio-analyzer>=1.9.0", - "matplotlib>=3.6", - "pydicom>=2.3.0", - "pypng>=0.20220715.0", - "azure-ai-formrecognizer>=3.3.0,<4.0.0", - "opencv-python>=4.0.0,<5.0.0", -] -dynamic = ["version"] +urls = {Homepage = "https://github.com/Microsoft/presidio"} +readme = "README.md" -[project.readme] -file = "README.md" -content-type = "text/markdown" +[tool.poetry.dependencies] +python = ">=3.8,<4.0" +pillow = ">=9.0" +pytesseract = ">=0.3.7,<0.4" +presidio-analyzer = ">=1.9.0" +matplotlib = ">=3.6" +pydicom = ">=2.3.0" +pypng = ">=0.20220715.0" +azure-ai-formrecognizer = ">=3.3.0,<4.0.0" +opencv-python = ">=4.0.0,<5.0.0" +python-gdcm = ">=3.0.24.1" +#numpy = ">=1.21.0" +flask = { version = ">=1.1", optional = true } -[project.optional-dependencies] -test = ["pytest", "pytest-mock>=3.10.0", "ruff"] +[tool.poetry.extras] +server = ["flask"] -[tool.setuptools.dynamic] -version = {file = ["VERSION"]} +[tool.poetry.group.dev.dependencies] +pip = "*" +ruff = "*" +pytest = "*" +pytest-mock = "*" +python-dotenv = "*" +pre_commit = "*" diff --git a/presidio-structured/Pipfile b/presidio-structured/Pipfile deleted file mode 100644 index 1a21a0f57..000000000 --- a/presidio-structured/Pipfile +++ /dev/null @@ -1,15 +0,0 @@ -[[source]] -url = "https://pypi.python.org/simple" -verify_ssl = true -name = "pypi" - -[packages] -flask = ">=1.1" -presidio-analyzer = ">=2.2.31" -presidio-anonymizer = ">=2.2.31" -pandas = ">=1.5.2" - -[dev-packages] -pytest = "*" -ruff = "*" -pre_commit = "*" diff --git a/presidio-structured/VERSION b/presidio-structured/VERSION deleted file mode 100644 index 37c522913..000000000 --- a/presidio-structured/VERSION +++ /dev/null @@ -1 +0,0 @@ -0.0.2-alpha diff --git a/presidio-structured/pyproject.toml b/presidio-structured/pyproject.toml index 7b5c0234a..b1e992316 100644 --- a/presidio-structured/pyproject.toml +++ b/presidio-structured/pyproject.toml @@ -1,15 +1,16 @@ [build-system] -requires = ["setuptools>=61.2"] -build-backend = "setuptools.build_meta" +build-backend = "poetry.core.masonry.api" +requires = ["poetry-core"] -[project] +[tool.poetry] name = "presidio_structured" +version = "0.0.2-alpha" description = "Presidio structured package - analyzes and anonymizes structured and semi-structured data." -license = {text = "MIT License"} +authors = ["Presidio "] +license = "MIT" classifiers = [ "License :: OSI Approved :: MIT License", "Operating System :: OS Independent", - "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", @@ -17,21 +18,18 @@ classifiers = [ ] keywords = ["presidio_structured"] urls = {Homepage = "https://github.com/microsoft/presidio"} -requires-python = ">=3.5" -dependencies = [ - "presidio-analyzer>=2.2", - "presidio-anonymizer>=2.2", -] -dynamic = ["version"] - - -[project.optional-dependencies] -test = ["pytest", "ruff"] - +readme = "README.md" -[project.readme] -file = "README.md" -content-type = "text/markdown" +[tool.poetry.dependencies] +python = ">=3.8,<4.0" +presidio-analyzer = ">=2.2" +presidio-anonymizer = ">=2.2" +pandas = ">=1.5.2" -[tool.setuptools.dynamic] -version = {file = ["VERSION"]} +[tool.poetry.group.dev.dependencies] +pip = "*" +ruff = "*" +pytest = "*" +pytest-mock = "*" +python-dotenv = "*" +pre_commit = "*"