From fc02f42c41ee04ba73a2ff2062fab1f94f3ab5e3 Mon Sep 17 00:00:00 2001 From: iscai-msft <43154838+iscai-msft@users.noreply.github.com> Date: Tue, 1 Sep 2020 10:52:05 -0400 Subject: [PATCH] [text analytics] add domain_filter param (#13451) --- .../azure/ai/textanalytics/__init__.py | 2 + .../azure/ai/textanalytics/_models.py | 5 +++ .../textanalytics/_text_analytics_client.py | 6 +++ .../aio/_text_analytics_client_async.py | 7 +++ ...e_pii_entities.test_phi_domain_filter.yaml | 44 +++++++++++++++++++ ...entities_async.test_phi_domain_filter.yaml | 33 ++++++++++++++ .../tests/test_recognize_pii_entities.py | 16 ++++++- .../test_recognize_pii_entities_async.py | 14 ++++++ 8 files changed, 126 insertions(+), 1 deletion(-) create mode 100644 sdk/textanalytics/azure-ai-textanalytics/tests/recordings/test_recognize_pii_entities.test_phi_domain_filter.yaml create mode 100644 sdk/textanalytics/azure-ai-textanalytics/tests/recordings/test_recognize_pii_entities_async.test_phi_domain_filter.yaml diff --git a/sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/__init__.py b/sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/__init__.py index 476f9842a0665..ef3d19429fd8d 100644 --- a/sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/__init__.py +++ b/sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/__init__.py @@ -31,6 +31,7 @@ OpinionSentiment, RecognizePiiEntitiesResult, PiiEntity, + PiiEntityDomainType, ) __all__ = [ @@ -59,6 +60,7 @@ 'OpinionSentiment', 'RecognizePiiEntitiesResult', 'PiiEntity', + 'PiiEntityDomainType', ] __version__ = VERSION diff --git a/sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_models.py b/sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_models.py index e9733e8fec25c..de975185f66e7 100644 --- a/sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_models.py +++ b/sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_models.py @@ -4,6 +4,7 @@ # Licensed under the MIT License. # ------------------------------------ import re +from enum import Enum from ._generated.models import ( LanguageInput, MultiLanguageInput, @@ -64,6 +65,10 @@ def get(self, key, default=None): return self.__dict__[key] return default +class PiiEntityDomainType(str, Enum): + """The different domains of PII entities that users can filter by""" + PROTECTED_HEALTH_INFORMATION = "PHI" # See https://aka.ms/tanerpii for more information. + class DetectedLanguage(DictMixin): """DetectedLanguage contains the predicted language found in text, diff --git a/sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_text_analytics_client.py b/sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_text_analytics_client.py index 8c636e18ae92a..7c68034c9de83 100644 --- a/sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_text_analytics_client.py +++ b/sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_text_analytics_client.py @@ -260,6 +260,10 @@ def recognize_pii_entities( # type: ignore be used for scoring, e.g. "latest", "2019-10-01". If a model-version is not specified, the API will default to the latest, non-preview version. :keyword bool show_stats: If set to true, response will contain document level statistics. + :keyword domain_filter: Filters the response entities to ones only included in the specified domain. + I.e., if set to 'PHI', will only return entities in the Protected Healthcare Information domain. + See https://aka.ms/tanerpii for more information. + :paramtype domain_filter: str or ~azure.ai.textanalytics.PiiEntityDomainType :return: The combined list of :class:`~azure.ai.textanalytics.RecognizePiiEntitiesResult` and :class:`~azure.ai.textanalytics.DocumentError` in the order the original documents were passed in. @@ -281,6 +285,7 @@ def recognize_pii_entities( # type: ignore docs = _validate_input(documents, "language", language) model_version = kwargs.pop("model_version", None) show_stats = kwargs.pop("show_stats", False) + domain_filter = kwargs.pop("domain_filter", None) if self._string_code_unit: kwargs.update({"string_index_type": self._string_code_unit}) try: @@ -288,6 +293,7 @@ def recognize_pii_entities( # type: ignore documents=docs, model_version=model_version, show_stats=show_stats, + domain=domain_filter, cls=kwargs.pop("cls", pii_entities_result), **kwargs ) diff --git a/sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/aio/_text_analytics_client_async.py b/sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/aio/_text_analytics_client_async.py index ffa9cd77d6e2a..f7c6290665ba5 100644 --- a/sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/aio/_text_analytics_client_async.py +++ b/sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/aio/_text_analytics_client_async.py @@ -262,6 +262,10 @@ async def recognize_pii_entities( # type: ignore be used for scoring, e.g. "latest", "2019-10-01". If a model-version is not specified, the API will default to the latest, non-preview version. :keyword bool show_stats: If set to true, response will contain document level statistics. + :keyword domain_filter: Filters the response entities to ones only included in the specified domain. + I.e., if set to 'PHI', will only return entities in the Protected Healthcare Information domain. + See https://aka.ms/tanerpii for more information. + :paramtype domain_filter: str or ~azure.ai.textanalytics.PiiEntityDomainType :return: The combined list of :class:`~azure.ai.textanalytics.RecognizePiiEntitiesResult` and :class:`~azure.ai.textanalytics.DocumentError` in the order the original documents were passed in. @@ -283,6 +287,8 @@ async def recognize_pii_entities( # type: ignore docs = _validate_input(documents, "language", language) model_version = kwargs.pop("model_version", None) show_stats = kwargs.pop("show_stats", False) + domain_filter = kwargs.pop("domain_filter", None) + if self._string_code_unit: kwargs.update({"string_index_type": self._string_code_unit}) try: @@ -290,6 +296,7 @@ async def recognize_pii_entities( # type: ignore documents=docs, model_version=model_version, show_stats=show_stats, + domain=domain_filter, cls=kwargs.pop("cls", pii_entities_result), **kwargs ) diff --git a/sdk/textanalytics/azure-ai-textanalytics/tests/recordings/test_recognize_pii_entities.test_phi_domain_filter.yaml b/sdk/textanalytics/azure-ai-textanalytics/tests/recordings/test_recognize_pii_entities.test_phi_domain_filter.yaml new file mode 100644 index 0000000000000..ee650032c7a00 --- /dev/null +++ b/sdk/textanalytics/azure-ai-textanalytics/tests/recordings/test_recognize_pii_entities.test_phi_domain_filter.yaml @@ -0,0 +1,44 @@ +interactions: +- request: + body: '{"documents": [{"id": "0", "text": "I work at Microsoft and my phone number + is 333-333-3333", "language": "en"}]}' + headers: + Accept: + - application/json, text/json + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '113' + Content-Type: + - application/json + User-Agent: + - azsdk-python-ai-textanalytics/5.0.1 Python/3.8.5 (macOS-10.13.6-x86_64-i386-64bit) + method: POST + uri: https://westus2.api.cognitive.microsoft.com/text/analytics/v3.1-preview.1/entities/recognition/pii?showStats=false&domain=PHI&stringIndexType=UnicodeCodePoint + response: + body: + string: '{"documents":[{"id":"0","entities":[{"text":"333-333-3333","category":"Phone + Number","offset":43,"length":12,"confidenceScore":0.8}],"warnings":[]}],"errors":[],"modelVersion":"2020-07-01"}' + headers: + apim-request-id: + - c2319b95-6fd2-46c9-80e3-06c8f2701825 + content-type: + - application/json; charset=utf-8 + csp-billing-usage: + - CognitiveServices.TextAnalytics.BatchScoring=1 + date: + - Mon, 31 Aug 2020 20:32:54 GMT + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + transfer-encoding: + - chunked + x-content-type-options: + - nosniff + x-envoy-upstream-service-time: + - '79' + status: + code: 200 + message: OK +version: 1 diff --git a/sdk/textanalytics/azure-ai-textanalytics/tests/recordings/test_recognize_pii_entities_async.test_phi_domain_filter.yaml b/sdk/textanalytics/azure-ai-textanalytics/tests/recordings/test_recognize_pii_entities_async.test_phi_domain_filter.yaml new file mode 100644 index 0000000000000..7395d5ac2e411 --- /dev/null +++ b/sdk/textanalytics/azure-ai-textanalytics/tests/recordings/test_recognize_pii_entities_async.test_phi_domain_filter.yaml @@ -0,0 +1,33 @@ +interactions: +- request: + body: '{"documents": [{"id": "0", "text": "I work at Microsoft and my phone number + is 333-333-3333", "language": "en"}]}' + headers: + Accept: + - application/json, text/json + Content-Length: + - '113' + Content-Type: + - application/json + User-Agent: + - azsdk-python-ai-textanalytics/5.0.1 Python/3.8.5 (macOS-10.13.6-x86_64-i386-64bit) + method: POST + uri: https://westus2.api.cognitive.microsoft.com/text/analytics/v3.1-preview.1/entities/recognition/pii?showStats=false&domain=PHI&stringIndexType=UnicodeCodePoint + response: + body: + string: '{"documents":[{"id":"0","entities":[{"text":"333-333-3333","category":"Phone + Number","offset":43,"length":12,"confidenceScore":0.8}],"warnings":[]}],"errors":[],"modelVersion":"2020-07-01"}' + headers: + apim-request-id: 9265752d-3262-4dbb-94d6-be26889e3db9 + content-type: application/json; charset=utf-8 + csp-billing-usage: CognitiveServices.TextAnalytics.BatchScoring=1 + date: Mon, 31 Aug 2020 20:32:55 GMT + strict-transport-security: max-age=31536000; includeSubDomains; preload + transfer-encoding: chunked + x-content-type-options: nosniff + x-envoy-upstream-service-time: '82' + status: + code: 200 + message: OK + url: https://westus2.api.cognitive.microsoft.com//text/analytics/v3.1-preview.1/entities/recognition/pii?showStats=false&domain=PHI&stringIndexType=UnicodeCodePoint +version: 1 diff --git a/sdk/textanalytics/azure-ai-textanalytics/tests/test_recognize_pii_entities.py b/sdk/textanalytics/azure-ai-textanalytics/tests/test_recognize_pii_entities.py index 736f75cd46d56..fe12a81cf1d76 100644 --- a/sdk/textanalytics/azure-ai-textanalytics/tests/test_recognize_pii_entities.py +++ b/sdk/textanalytics/azure-ai-textanalytics/tests/test_recognize_pii_entities.py @@ -17,6 +17,7 @@ TextDocumentInput, VERSION, TextAnalyticsApiVersion, + PiiEntityDomainType, ) # pre-apply the client_cls positional argument so it needn't be explicitly passed below @@ -573,4 +574,17 @@ def test_recognize_pii_entities_v3(self, client): with pytest.raises(NotImplementedError) as excinfo: client.recognize_pii_entities(["this should fail"]) - assert "'recognize_pii_entities' endpoint is only available for API version v3.1-preview.1 and up" in str(excinfo.value) \ No newline at end of file + assert "'recognize_pii_entities' endpoint is only available for API version v3.1-preview.1 and up" in str(excinfo.value) + + @GlobalTextAnalyticsAccountPreparer() + @TextAnalyticsClientPreparer() + def test_phi_domain_filter(self, client): + # without the domain filter, this should return two entities: Microsoft as an org, + # and the phone number. With the domain filter, it should only return one. + result = client.recognize_pii_entities( + ["I work at Microsoft and my phone number is 333-333-3333"], + domain_filter=PiiEntityDomainType.PROTECTED_HEALTH_INFORMATION + ) + self.assertEqual(len(result[0].entities), 1) + self.assertEqual(result[0].entities[0].text, '333-333-3333') + self.assertEqual(result[0].entities[0].category, 'Phone Number') diff --git a/sdk/textanalytics/azure-ai-textanalytics/tests/test_recognize_pii_entities_async.py b/sdk/textanalytics/azure-ai-textanalytics/tests/test_recognize_pii_entities_async.py index fb2d67bcdcb94..ac673e423159c 100644 --- a/sdk/textanalytics/azure-ai-textanalytics/tests/test_recognize_pii_entities_async.py +++ b/sdk/textanalytics/azure-ai-textanalytics/tests/test_recognize_pii_entities_async.py @@ -18,6 +18,7 @@ TextDocumentInput, VERSION, TextAnalyticsApiVersion, + PiiEntityDomainType, ) # pre-apply the client_cls positional argument so it needn't be explicitly passed below @@ -572,3 +573,16 @@ async def test_recognize_pii_entities_v3(self, client): await client.recognize_pii_entities(["this should fail"]) assert "'recognize_pii_entities' endpoint is only available for API version v3.1-preview.1 and up" in str(excinfo.value) + + @GlobalTextAnalyticsAccountPreparer() + @TextAnalyticsClientPreparer() + async def test_phi_domain_filter(self, client): + # without the domain filter, this should return two entities: Microsoft as an org, + # and the phone number. With the domain filter, it should only return one. + result = await client.recognize_pii_entities( + ["I work at Microsoft and my phone number is 333-333-3333"], + domain_filter=PiiEntityDomainType.PROTECTED_HEALTH_INFORMATION + ) + self.assertEqual(len(result[0].entities), 1) + self.assertEqual(result[0].entities[0].text, '333-333-3333') + self.assertEqual(result[0].entities[0].category, 'Phone Number')