From 0e8528958869e967b291e23c1b8452d0347031df Mon Sep 17 00:00:00 2001 From: Omri Mendels Date: Mon, 25 Mar 2024 09:57:15 +0200 Subject: [PATCH 1/3] changed default aggregation_strategy to max --- .../presidio_analyzer/nlp_engine/ner_model_configuration.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/presidio-analyzer/presidio_analyzer/nlp_engine/ner_model_configuration.py b/presidio-analyzer/presidio_analyzer/nlp_engine/ner_model_configuration.py index 8408f776a..0453dc87a 100644 --- a/presidio-analyzer/presidio_analyzer/nlp_engine/ner_model_configuration.py +++ b/presidio-analyzer/presidio_analyzer/nlp_engine/ner_model_configuration.py @@ -64,7 +64,7 @@ class NerModelConfiguration: """ # noqa E501 labels_to_ignore: Optional[Collection[str]] = None - aggregation_strategy: Optional[str] = "simple" + aggregation_strategy: Optional[str] = "max" stride: Optional[int] = 14 alignment_mode: Optional[str] = "expand" default_score: Optional[float] = 0.85 From 271676a000e94f21372cdf5e213a0aeeed95baaa Mon Sep 17 00:00:00 2001 From: Omri Mendels Date: Mon, 25 Mar 2024 10:00:53 +0200 Subject: [PATCH 2/3] Update transformers.yaml --- presidio-analyzer/conf/transformers.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/presidio-analyzer/conf/transformers.yaml b/presidio-analyzer/conf/transformers.yaml index 37388cc60..2b05c7a65 100644 --- a/presidio-analyzer/conf/transformers.yaml +++ b/presidio-analyzer/conf/transformers.yaml @@ -9,7 +9,7 @@ models: ner_model_configuration: labels_to_ignore: - O - aggregation_strategy: simple # "simple", "first", "average", "max" + aggregation_strategy: max # "simple", "first", "average", "max" stride: 16 # If stride >= 0, process long texts in # overlapping windows of the model max # length. The value is the length of the From bcca95ef28a16b2205098841751d8ec78b0048dd Mon Sep 17 00:00:00 2001 From: Omri Mendels Date: Mon, 25 Mar 2024 11:07:14 +0200 Subject: [PATCH 3/3] Update transformers.yaml --- presidio-analyzer/conf/transformers.yaml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/presidio-analyzer/conf/transformers.yaml b/presidio-analyzer/conf/transformers.yaml index 2b05c7a65..c36218530 100644 --- a/presidio-analyzer/conf/transformers.yaml +++ b/presidio-analyzer/conf/transformers.yaml @@ -9,12 +9,12 @@ models: ner_model_configuration: labels_to_ignore: - O - aggregation_strategy: max # "simple", "first", "average", "max" - stride: 16 # If stride >= 0, process long texts in - # overlapping windows of the model max - # length. The value is the length of the - # window overlap in transformer tokenizer - # tokens, NOT the length of the stride. + aggregation_strategy: max # "simple", "first", "average", "max" + stride: 16 # If stride >= 0, process long texts in + # overlapping windows of the model max + # length. The value is the length of the + # window overlap in transformer tokenizer + # tokens, NOT the length of the stride. alignment_mode: expand # "strict", "contract", "expand" model_to_presidio_entity_mapping: PER: PERSON