From 9b1b9ece93a92e96d590f0f4cf36f7b172a3a25c Mon Sep 17 00:00:00 2001 From: iscai-msft <43154838+iscai-msft@users.noreply.github.com> Date: Tue, 1 Sep 2020 10:53:12 -0400 Subject: [PATCH] [text analytics] add bing_id property to LinkedEntity class (#13446) --- .../azure-ai-textanalytics/CHANGELOG.md | 2 + .../azure/ai/textanalytics/_base_client.py | 4 ++ .../azure/ai/textanalytics/_models.py | 21 +++++++-- ...ecognize_linked_entities.test_bing_id.yaml | 47 +++++++++++++++++++ ...ze_linked_entities_async.test_bing_id.yaml | 36 ++++++++++++++ .../tests/test_recognize_linked_entities.py | 16 ++++++- .../test_recognize_linked_entities_async.py | 16 ++++++- .../azure-ai-textanalytics/tests/test_repr.py | 7 ++- 8 files changed, 142 insertions(+), 7 deletions(-) create mode 100644 sdk/textanalytics/azure-ai-textanalytics/tests/recordings/test_recognize_linked_entities.test_bing_id.yaml create mode 100644 sdk/textanalytics/azure-ai-textanalytics/tests/recordings/test_recognize_linked_entities_async.test_bing_id.yaml diff --git a/sdk/textanalytics/azure-ai-textanalytics/CHANGELOG.md b/sdk/textanalytics/azure-ai-textanalytics/CHANGELOG.md index 372b10411b81..d11798ae878f 100644 --- a/sdk/textanalytics/azure-ai-textanalytics/CHANGELOG.md +++ b/sdk/textanalytics/azure-ai-textanalytics/CHANGELOG.md @@ -11,6 +11,8 @@ pass in `v3.0` to the kwarg `api_version` when creating your TextAnalyticsClient - `offset` is the offset of the text from the start of the document - We now have added support for opinion mining. To use this feature, you need to make sure you are using the service's v3.1-preview.1 API. To get this support pass `show_opinion_mining` as True when calling the `analyze_sentiment` endpoint +- Add property `bing_id` to the `LinkedEntity` class. This property is only available for v3.1-preview.2 and up, and it is to be +used in conjunction with the Bing Entity Search API to fetch additional relevant information about the returned entity. ## 5.0.0 (2020-07-27) diff --git a/sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_base_client.py b/sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_base_client.py index c269772d87ff..8d60ff8dbf9d 100644 --- a/sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_base_client.py +++ b/sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_base_client.py @@ -15,6 +15,10 @@ class TextAnalyticsApiVersion(str, Enum): #: this is the default version V3_1_PREVIEW_1 = "v3.1-preview.1" + + # 3.1-preview.2 is not yet the default version since we don't have a + # reliable endpoint + V3_1_PREVIEW_2 = "v3.1-preview.2" V3_0 = "v3.0" def _authentication_policy(credential): diff --git a/sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_models.py b/sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_models.py index de975185f66e..339d70df7d6f 100644 --- a/sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_models.py +++ b/sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_models.py @@ -1,4 +1,4 @@ -# coding=utf-8 +# coding=utf-8 pylint: disable=too-many-lines # ------------------------------------ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. @@ -616,6 +616,11 @@ class LinkedEntity(DictMixin): :ivar data_source: Data source used to extract entity linking, such as Wiki/Bing etc. :vartype data_source: str + :ivar str bing_id: Bing unique identifier of the recognized entity. Use in conjunction + with the Bing Entity Search SDK to fetch additional relevant information. Only + available for API version v3.1-preview.2 and up. + .. versionadded:: v3.1-preview.2 + The *bing_id* property. """ def __init__(self, **kwargs): @@ -625,9 +630,11 @@ def __init__(self, **kwargs): self.data_source_entity_id = kwargs.get("data_source_entity_id", None) self.url = kwargs.get("url", None) self.data_source = kwargs.get("data_source", None) + self.bing_id = kwargs.get("bing_id", None) @classmethod def _from_generated(cls, entity): + bing_id = entity.bing_id if hasattr(entity, "bing_id") else None return cls( name=entity.name, matches=[LinkedEntityMatch._from_generated(e) for e in entity.matches], # pylint: disable=protected-access @@ -635,12 +642,20 @@ def _from_generated(cls, entity): data_source_entity_id=entity.id, url=entity.url, data_source=entity.data_source, + bing_id=bing_id, ) def __repr__(self): return "LinkedEntity(name={}, matches={}, language={}, data_source_entity_id={}, url={}, " \ - "data_source={})".format(self.name, repr(self.matches), self.language, self.data_source_entity_id, - self.url, self.data_source)[:1024] + "data_source={}, bing_id={})".format( + self.name, + repr(self.matches), + self.language, + self.data_source_entity_id, + self.url, + self.data_source, + self.bing_id, + )[:1024] class LinkedEntityMatch(DictMixin): diff --git a/sdk/textanalytics/azure-ai-textanalytics/tests/recordings/test_recognize_linked_entities.test_bing_id.yaml b/sdk/textanalytics/azure-ai-textanalytics/tests/recordings/test_recognize_linked_entities.test_bing_id.yaml new file mode 100644 index 000000000000..537c216e5a8b --- /dev/null +++ b/sdk/textanalytics/azure-ai-textanalytics/tests/recordings/test_recognize_linked_entities.test_bing_id.yaml @@ -0,0 +1,47 @@ +interactions: +- request: + body: '{"documents": [{"id": "0", "text": "Microsoft was founded by Bill Gates + and Paul Allen", "language": "en"}]}' + headers: + Accept: + - application/json, text/json + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '108' + Content-Type: + - application/json + User-Agent: + - azsdk-python-ai-textanalytics/5.0.1 Python/3.8.5 (macOS-10.13.6-x86_64-i386-64bit) + method: POST + uri: https://cognitiveusw2dev.azure-api.net/text/analytics/v3.1-preview.2/entities/linking?showStats=false&stringIndexType=UnicodeCodePoint + response: + body: + string: '{"documents":[{"id":"0","entities":[{"bingId":"0d47c987-0042-5576-15e8-97af601614fa","name":"Bill + Gates","matches":[{"text":"Bill Gates","offset":25,"length":10,"confidenceScore":0.52}],"language":"en","id":"Bill + Gates","url":"https://en.wikipedia.org/wiki/Bill_Gates","dataSource":"Wikipedia"},{"bingId":"df2c4376-9923-6a54-893f-2ee5a5badbc7","name":"Paul + Allen","matches":[{"text":"Paul Allen","offset":40,"length":10,"confidenceScore":0.54}],"language":"en","id":"Paul + Allen","url":"https://en.wikipedia.org/wiki/Paul_Allen","dataSource":"Wikipedia"},{"bingId":"a093e9b9-90f5-a3d5-c4b8-5855e1b01f85","name":"Microsoft","matches":[{"text":"Microsoft","offset":0,"length":9,"confidenceScore":0.49}],"language":"en","id":"Microsoft","url":"https://en.wikipedia.org/wiki/Microsoft","dataSource":"Wikipedia"}],"warnings":[]}],"errors":[],"modelVersion":"2020-02-01"}' + headers: + apim-request-id: + - 34b34e81-fcc2-4c1e-85b2-116f85196a4c + content-type: + - application/json; charset=utf-8 + csp-billing-usage: + - CognitiveServices.TextAnalytics.BatchScoring=1 + date: + - Mon, 31 Aug 2020 18:48:40 GMT + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + transfer-encoding: + - chunked + x-content-type-options: + - nosniff + x-envoy-upstream-service-time: + - '27' + status: + code: 200 + message: OK +version: 1 diff --git a/sdk/textanalytics/azure-ai-textanalytics/tests/recordings/test_recognize_linked_entities_async.test_bing_id.yaml b/sdk/textanalytics/azure-ai-textanalytics/tests/recordings/test_recognize_linked_entities_async.test_bing_id.yaml new file mode 100644 index 000000000000..2c4123289eb0 --- /dev/null +++ b/sdk/textanalytics/azure-ai-textanalytics/tests/recordings/test_recognize_linked_entities_async.test_bing_id.yaml @@ -0,0 +1,36 @@ +interactions: +- request: + body: '{"documents": [{"id": "0", "text": "Microsoft was founded by Bill Gates + and Paul Allen", "language": "en"}]}' + headers: + Accept: + - application/json, text/json + Content-Length: + - '108' + Content-Type: + - application/json + User-Agent: + - azsdk-python-ai-textanalytics/5.0.1 Python/3.8.5 (macOS-10.13.6-x86_64-i386-64bit) + method: POST + uri: https://cognitiveusw2dev.azure-api.net/text/analytics/v3.1-preview.2/entities/linking?showStats=false&stringIndexType=UnicodeCodePoint + response: + body: + string: '{"documents":[{"id":"0","entities":[{"bingId":"0d47c987-0042-5576-15e8-97af601614fa","name":"Bill + Gates","matches":[{"text":"Bill Gates","offset":25,"length":10,"confidenceScore":0.52}],"language":"en","id":"Bill + Gates","url":"https://en.wikipedia.org/wiki/Bill_Gates","dataSource":"Wikipedia"},{"bingId":"df2c4376-9923-6a54-893f-2ee5a5badbc7","name":"Paul + Allen","matches":[{"text":"Paul Allen","offset":40,"length":10,"confidenceScore":0.54}],"language":"en","id":"Paul + Allen","url":"https://en.wikipedia.org/wiki/Paul_Allen","dataSource":"Wikipedia"},{"bingId":"a093e9b9-90f5-a3d5-c4b8-5855e1b01f85","name":"Microsoft","matches":[{"text":"Microsoft","offset":0,"length":9,"confidenceScore":0.49}],"language":"en","id":"Microsoft","url":"https://en.wikipedia.org/wiki/Microsoft","dataSource":"Wikipedia"}],"warnings":[]}],"errors":[],"modelVersion":"2020-02-01"}' + headers: + apim-request-id: 70ab796e-3da1-4a55-86b4-16c4b19a97a8 + content-type: application/json; charset=utf-8 + csp-billing-usage: CognitiveServices.TextAnalytics.BatchScoring=1 + date: Mon, 31 Aug 2020 18:48:41 GMT + strict-transport-security: max-age=31536000; includeSubDomains; preload + transfer-encoding: chunked + x-content-type-options: nosniff + x-envoy-upstream-service-time: '26' + status: + code: 200 + message: OK + url: https://cognitiveusw2dev.azure-api.net/text/analytics/v3.1-preview.2/entities/linking?showStats=false&stringIndexType=UnicodeCodePoint +version: 1 diff --git a/sdk/textanalytics/azure-ai-textanalytics/tests/test_recognize_linked_entities.py b/sdk/textanalytics/azure-ai-textanalytics/tests/test_recognize_linked_entities.py index 2ca6a82027d7..4d2b16d205fd 100644 --- a/sdk/textanalytics/azure-ai-textanalytics/tests/test_recognize_linked_entities.py +++ b/sdk/textanalytics/azure-ai-textanalytics/tests/test_recognize_linked_entities.py @@ -3,7 +3,7 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. # ------------------------------------ - +import os import pytest import platform import functools @@ -586,3 +586,17 @@ def test_string_index_type_not_fail_v3(self, client): # make sure that the addition of the string_index_type kwarg for v3.1-preview.1 doesn't # cause v3.0 calls to fail client.recognize_linked_entities(["please don't fail"]) + + # currently only have this as playback since the dev endpoint is unreliable + @pytest.mark.playback_test_only + @GlobalTextAnalyticsAccountPreparer() + @TextAnalyticsClientPreparer(client_kwargs={ + "api_version": TextAnalyticsApiVersion.V3_1_PREVIEW_2, + "text_analytics_account_key": os.environ.get('AZURE_TEXT_ANALYTICS_KEY'), + "text_analytics_account": "https://cognitiveusw2dev.azure-api.net/" + }) + def test_bing_id(self, client): + result = client.recognize_linked_entities(["Microsoft was founded by Bill Gates and Paul Allen"]) + for doc in result: + for entity in doc.entities: + assert entity.bing_id # this checks if it's None and if it's empty diff --git a/sdk/textanalytics/azure-ai-textanalytics/tests/test_recognize_linked_entities_async.py b/sdk/textanalytics/azure-ai-textanalytics/tests/test_recognize_linked_entities_async.py index e055f9178a24..5581410b49bf 100644 --- a/sdk/textanalytics/azure-ai-textanalytics/tests/test_recognize_linked_entities_async.py +++ b/sdk/textanalytics/azure-ai-textanalytics/tests/test_recognize_linked_entities_async.py @@ -3,7 +3,7 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. # ------------------------------------ - +import os import pytest import platform import functools @@ -622,3 +622,17 @@ async def test_string_index_type_not_fail_v3(self, client): # make sure that the addition of the string_index_type kwarg for v3.1-preview.1 doesn't # cause v3.0 calls to fail await client.recognize_linked_entities(["please don't fail"]) + + # currently only have this as playback since the dev endpoint is unreliable + @pytest.mark.playback_test_only + @GlobalTextAnalyticsAccountPreparer() + @TextAnalyticsClientPreparer(client_kwargs={ + "api_version": TextAnalyticsApiVersion.V3_1_PREVIEW_2, + "text_analytics_account_key": os.environ.get('AZURE_TEXT_ANALYTICS_KEY'), + "text_analytics_account": "https://cognitiveusw2dev.azure-api.net/" + }) + async def test_bing_id(self, client): + result = await client.recognize_linked_entities(["Microsoft was founded by Bill Gates and Paul Allen"]) + for doc in result: + for entity in doc.entities: + assert entity.bing_id # this checks if it's None and if it's empty diff --git a/sdk/textanalytics/azure-ai-textanalytics/tests/test_repr.py b/sdk/textanalytics/azure-ai-textanalytics/tests/test_repr.py index eca9da158bff..8d0c1463f16e 100644 --- a/sdk/textanalytics/azure-ai-textanalytics/tests/test_repr.py +++ b/sdk/textanalytics/azure-ai-textanalytics/tests/test_repr.py @@ -116,12 +116,15 @@ def linked_entity(linked_entity_match): language="English", data_source_entity_id="Bill Gates", url="https://en.wikipedia.org/wiki/Bill_Gates", - data_source="wikipedia" + data_source="wikipedia", + bing_id="12345678" ) model_repr = ( "LinkedEntity(name=Bill Gates, matches=[{}, {}], "\ "language=English, data_source_entity_id=Bill Gates, "\ - "url=https://en.wikipedia.org/wiki/Bill_Gates, data_source=wikipedia)".format(linked_entity_match[1], linked_entity_match[1]) + "url=https://en.wikipedia.org/wiki/Bill_Gates, data_source=wikipedia, bing_id=12345678)".format( + linked_entity_match[1], linked_entity_match[1] + ) ) assert repr(model) == model_repr return model, model_repr