Skip to content

Commit

Permalink
Dev containers for: analyzer, analyzer+transformers, anonymizer and i…
Browse files Browse the repository at this point in the history
…mage redaction (#1450)
  • Loading branch information
roeybc authored Sep 19, 2024
1 parent 4aeb56b commit 0721e36
Show file tree
Hide file tree
Showing 9 changed files with 133 additions and 1 deletion.
21 changes: 21 additions & 0 deletions .devcontainer/presidio-analyzer-transformers/devcontainer.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
{
"name": "Presidio Analyzer Transformers",
"build": {
"dockerfile": "../../presidio-analyzer/Dockerfile.dev",
"context": "../../presidio-analyzer",
"args": {
"DEV_MODE": "transformers",
"NLP_CONF_FILE": "presidio_analyzer/conf/transformers.yaml",
"POETRY_EXTRAS": "-E transformers"
}
},
"workspaceMount": "source=${localWorkspaceFolder}/presidio-analyzer,target=/workspace,type=bind",
"workspaceFolder": "/workspace",
"postCreateCommand": "chmod +x ./install_dependencies.sh && ./install_dependencies.sh",
"postAttachCommand": "poetry shell",
"customizations": {
"extensions": [
"ms-python.python",
],
}
}
18 changes: 18 additions & 0 deletions .devcontainer/presidio-analyzer/devcontainer.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
{
"name": "Presidio Analyzer",
"build": {
"dockerfile": "../../presidio-analyzer/Dockerfile.dev",
"context": "../../presidio-analyzer",
"args": {
"DEV_MODE": "dev"
}
},
"workspaceMount": "source=${localWorkspaceFolder}/presidio-analyzer,target=/workspace,type=bind",
"workspaceFolder": "/workspace",
"postAttachCommand": "chmod +x ./install_dependencies.sh && ./install_dependencies.sh && poetry shell",
"customizations": {
"extensions": [
"ms-python.python",
],
}
}
20 changes: 20 additions & 0 deletions .devcontainer/presidio-anonymizer/devcontainer.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{
"name": "Presidio Anonymizer",
"build": {
"dockerfile": "../../presidio-anonymizer/Dockerfile.dev",
"context": "../../presidio-anonymizer"
},
"workspaceMount": "source=${localWorkspaceFolder}/presidio-anonymizer,target=/workspace,type=bind",
"workspaceFolder": "/workspace",
"onCreateCommand": [
"poetry",
"install",
"--no-interaction"
],
"postAttachCommand": "poetry shell",
"customizations": {
"extensions": [
"ms-python.python",
],
}
}
20 changes: 20 additions & 0 deletions .devcontainer/presidio-image-redactor/devcontainer.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{
"name": "Presidio Image Redactor",
"build": {
"dockerfile": "../../presidio-image-redactor/Dockerfile.dev",
"context": "../../presidio-image-redactor"
},
"workspaceMount": "source=${localWorkspaceFolder}/presidio-image-redactor,target=/workspace,type=bind",
"workspaceFolder": "/workspace",
"onCreateCommand": [
"poetry",
"install",
"--no-interaction"
],
"postAttachCommand": "poetry shell",
"customizations": {
"extensions": [
"ms-python.python",
],
}
}
20 changes: 20 additions & 0 deletions presidio-analyzer/Dockerfile.dev
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
FROM python:3.9-slim

ARG DEV_MODE=dev
ARG POETRY_EXTRAS=""
ARG NLP_CONF_FILE=presidio_analyzer/conf/default.yaml
ARG ANALYZER_CONF_FILE=presidio_analyzer/conf/default_analyzer.yaml
ARG RECOGNIZER_REGISTRY_CONF_FILE=presidio_analyzer/conf/default_recognizers.yaml

ENV DEV_MODE=${DEV_MODE}
ENV PIP_NO_CACHE_DIR=1
ENV ANALYZER_CONF_FILE=${ANALYZER_CONF_FILE}
ENV RECOGNIZER_REGISTRY_CONF_FILE=${RECOGNIZER_REGISTRY_CONF_FILE}
ENV NLP_CONF_FILE=${NLP_CONF_FILE}
ENV POETRY_EXTRAS=${POETRY_EXTRAS}

# Install essential build tools
RUN apt-get update \
&& apt-get install -y build-essential

RUN pip install poetry
5 changes: 5 additions & 0 deletions presidio-analyzer/install_dependencies.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#!/bin/bash

poetry install -E server ${POETRY_EXTRAS} --no-interaction

poetry run python install_nlp_models.py --conf_file "$NLP_CONF_FILE"
7 changes: 6 additions & 1 deletion presidio-analyzer/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,15 @@ stanza = { version = "*", optional = true }
spacy_stanza = { version = "*", optional = true }
azure-ai-textanalytics = { version = "*", optional = true }
azure-core = { version = "*", optional = true }
transformers = { version = "*", optional = true }
huggingface_hub = { version = "*", optional = true }

[tool.poetry.extras]
server = ["flask"]
transformers = ["spacy_huggingface_pipelines"]
transformers = [
"transformers",
"huggingface_hub",
"spacy_huggingface_pipelines"]
stanza = [
"stanza",
"spacy_stanza",
Expand Down
7 changes: 7 additions & 0 deletions presidio-anonymizer/Dockerfile.dev
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# Dockerfile
FROM python:3.9-slim

RUN apt-get update \
&& apt-get install -y build-essential

RUN pip install poetry
16 changes: 16 additions & 0 deletions presidio-image-redactor/Dockerfile.dev
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Dockerfile
FROM python:3.9-slim

RUN apt-get update \
&& apt-get install -y build-essential

# Install dependencies required for Tesseract
RUN apt-get update \
&& apt-get install tesseract-ocr -y \
&& rm -rf /var/lib/apt/lists/* \
&& tesseract -v

RUN apt-get update \
&& apt-get install ffmpeg libsm6 libxext6 -y

RUN pip install poetry

0 comments on commit 0721e36

Please sign in to comment.