From 07773ac83dbb6d95619e12ea5104259e0b5f4b1a Mon Sep 17 00:00:00 2001 From: Mariana Rios Flores Date: Wed, 2 Sep 2020 18:00:33 -0700 Subject: [PATCH] [TA] Add PII back (#14794) * all pii code * tests * recordings * samples * pr feedback * add mocking * PR feedback --- .../Azure.AI.TextAnalytics/CHANGELOG.md | 1 + .../Azure.AI.TextAnalytics.netstandard2.0.cs | 49 +++ .../Azure.AI.TextAnalytics/src/PiiEntity.cs | 64 ++++ .../src/PiiEntityCollection.cs | 32 ++ .../src/PiiEntityDomainType.cs | 30 ++ .../src/RecognizePiiEntitiesOptions.cs | 26 ++ .../src/RecognizePiiEntitiesResult.cs | 44 +++ .../RecognizePiiEntitiesResultCollection.cs | 34 ++ .../src/TextAnalyticsClient.cs | 322 ++++++++++++++++++ .../src/TextAnalyticsModelFactory.cs | 68 +++- .../Azure.AI.TextAnalytics/src/Transforms.cs | 33 ++ .../tests/RecognizePiiEntitiesTests.cs | 169 +++++++++ ...ognizePiiEntitiesBatchConvenienceTest.json | 109 ++++++ ...ePiiEntitiesBatchConvenienceTestAsync.json | 109 ++++++ ...iesBatchConvenienceWithStatisticsTest.json | 123 +++++++ ...tchConvenienceWithStatisticsTestAsync.json | 123 +++++++ .../RecognizePiiEntitiesBatchTest.json | 109 ++++++ .../RecognizePiiEntitiesBatchTestAsync.json | 109 ++++++ ...ecognizePiiEntitiesBatchWithErrorTest.json | 126 +++++++ ...izePiiEntitiesBatchWithErrorTestAsync.json | 126 +++++++ ...izePiiEntitiesBatchWithStatisticsTest.json | 123 +++++++ ...iEntitiesBatchWithStatisticsTestAsync.json | 123 +++++++ .../RecognizePiiEntitiesTest.json | 76 +++++ .../RecognizePiiEntitiesTestAsync.json | 76 +++++ .../RecognizePiiEntitiesWithDomainTest.json | 69 ++++ ...cognizePiiEntitiesWithDomainTestAsync.json | 69 ++++ .../RecognizePiiEntitiesWithLanguageTest.json | 76 +++++ ...gnizePiiEntitiesWithLanguageTestAsync.json | 76 +++++ .../tests/TextAnalyticsClientLiveTests.cs | 1 - .../tests/TextAnalyticsClientTests.cs | 11 + .../samples/Sample5_RecognizePiiEntities.cs | 38 +++ .../Sample5_RecognizePiiEntitiesBatch.cs | 86 +++++ ...e5_RecognizePiiEntitiesBatchConvenience.cs | 55 +++ 33 files changed, 2683 insertions(+), 2 deletions(-) create mode 100644 sdk/textanalytics/Azure.AI.TextAnalytics/src/PiiEntity.cs create mode 100644 sdk/textanalytics/Azure.AI.TextAnalytics/src/PiiEntityCollection.cs create mode 100644 sdk/textanalytics/Azure.AI.TextAnalytics/src/PiiEntityDomainType.cs create mode 100644 sdk/textanalytics/Azure.AI.TextAnalytics/src/RecognizePiiEntitiesOptions.cs create mode 100644 sdk/textanalytics/Azure.AI.TextAnalytics/src/RecognizePiiEntitiesResult.cs create mode 100644 sdk/textanalytics/Azure.AI.TextAnalytics/src/RecognizePiiEntitiesResultCollection.cs create mode 100644 sdk/textanalytics/Azure.AI.TextAnalytics/tests/RecognizePiiEntitiesTests.cs create mode 100644 sdk/textanalytics/Azure.AI.TextAnalytics/tests/SessionRecords/RecognizePiiEntitiesTests/RecognizePiiEntitiesBatchConvenienceTest.json create mode 100644 sdk/textanalytics/Azure.AI.TextAnalytics/tests/SessionRecords/RecognizePiiEntitiesTests/RecognizePiiEntitiesBatchConvenienceTestAsync.json create mode 100644 sdk/textanalytics/Azure.AI.TextAnalytics/tests/SessionRecords/RecognizePiiEntitiesTests/RecognizePiiEntitiesBatchConvenienceWithStatisticsTest.json create mode 100644 sdk/textanalytics/Azure.AI.TextAnalytics/tests/SessionRecords/RecognizePiiEntitiesTests/RecognizePiiEntitiesBatchConvenienceWithStatisticsTestAsync.json create mode 100644 sdk/textanalytics/Azure.AI.TextAnalytics/tests/SessionRecords/RecognizePiiEntitiesTests/RecognizePiiEntitiesBatchTest.json create mode 100644 sdk/textanalytics/Azure.AI.TextAnalytics/tests/SessionRecords/RecognizePiiEntitiesTests/RecognizePiiEntitiesBatchTestAsync.json create mode 100644 sdk/textanalytics/Azure.AI.TextAnalytics/tests/SessionRecords/RecognizePiiEntitiesTests/RecognizePiiEntitiesBatchWithErrorTest.json create mode 100644 sdk/textanalytics/Azure.AI.TextAnalytics/tests/SessionRecords/RecognizePiiEntitiesTests/RecognizePiiEntitiesBatchWithErrorTestAsync.json create mode 100644 sdk/textanalytics/Azure.AI.TextAnalytics/tests/SessionRecords/RecognizePiiEntitiesTests/RecognizePiiEntitiesBatchWithStatisticsTest.json create mode 100644 sdk/textanalytics/Azure.AI.TextAnalytics/tests/SessionRecords/RecognizePiiEntitiesTests/RecognizePiiEntitiesBatchWithStatisticsTestAsync.json create mode 100644 sdk/textanalytics/Azure.AI.TextAnalytics/tests/SessionRecords/RecognizePiiEntitiesTests/RecognizePiiEntitiesTest.json create mode 100644 sdk/textanalytics/Azure.AI.TextAnalytics/tests/SessionRecords/RecognizePiiEntitiesTests/RecognizePiiEntitiesTestAsync.json create mode 100644 sdk/textanalytics/Azure.AI.TextAnalytics/tests/SessionRecords/RecognizePiiEntitiesTests/RecognizePiiEntitiesWithDomainTest.json create mode 100644 sdk/textanalytics/Azure.AI.TextAnalytics/tests/SessionRecords/RecognizePiiEntitiesTests/RecognizePiiEntitiesWithDomainTestAsync.json create mode 100644 sdk/textanalytics/Azure.AI.TextAnalytics/tests/SessionRecords/RecognizePiiEntitiesTests/RecognizePiiEntitiesWithLanguageTest.json create mode 100644 sdk/textanalytics/Azure.AI.TextAnalytics/tests/SessionRecords/RecognizePiiEntitiesTests/RecognizePiiEntitiesWithLanguageTestAsync.json create mode 100644 sdk/textanalytics/Azure.AI.TextAnalytics/tests/samples/Sample5_RecognizePiiEntities.cs create mode 100644 sdk/textanalytics/Azure.AI.TextAnalytics/tests/samples/Sample5_RecognizePiiEntitiesBatch.cs create mode 100644 sdk/textanalytics/Azure.AI.TextAnalytics/tests/samples/Sample5_RecognizePiiEntitiesBatchConvenience.cs diff --git a/sdk/textanalytics/Azure.AI.TextAnalytics/CHANGELOG.md b/sdk/textanalytics/Azure.AI.TextAnalytics/CHANGELOG.md index 85ca32d980ea8..04575da3af7ba 100644 --- a/sdk/textanalytics/Azure.AI.TextAnalytics/CHANGELOG.md +++ b/sdk/textanalytics/Azure.AI.TextAnalytics/CHANGELOG.md @@ -5,6 +5,7 @@ ### New Features - It defaults to the latest supported API version, which currently is `3.1-preview.2`. - `ErrorCode` value returned from the service is now surfaced in `RequestFailedException`. +- Added the `RecognizePiiEntities` endpoint which returns entities containing Personally Identifiable Information. This feature is available in the Text Analytics service v3.1-preview.1 and above. - Support added for Opinion Mining. This feature is available in the Text Analytics service v3.1-preview.1 and above. - Added `Offset` and `Length` properties for `CategorizedEntity`, `SentenceSentiment`, and `LinkedEntityMatch`. The default encoding is UTF-16 code units. For additional information see https://aka.ms/text-analytics-offsets - `TextAnalyticsError` and `TextAnalyticsWarning` now are marked as immutable. diff --git a/sdk/textanalytics/Azure.AI.TextAnalytics/api/Azure.AI.TextAnalytics.netstandard2.0.cs b/sdk/textanalytics/Azure.AI.TextAnalytics/api/Azure.AI.TextAnalytics.netstandard2.0.cs index 58d4a7371188f..dfbb0df0fecd8 100644 --- a/sdk/textanalytics/Azure.AI.TextAnalytics/api/Azure.AI.TextAnalytics.netstandard2.0.cs +++ b/sdk/textanalytics/Azure.AI.TextAnalytics/api/Azure.AI.TextAnalytics.netstandard2.0.cs @@ -179,6 +179,28 @@ public readonly partial struct OpinionSentiment public Azure.AI.TextAnalytics.TextSentiment Sentiment { get { throw null; } } public string Text { get { throw null; } } } + [System.Runtime.InteropServices.StructLayoutAttribute(System.Runtime.InteropServices.LayoutKind.Sequential)] + public readonly partial struct PiiEntity + { + private readonly object _dummy; + private readonly int _dummyPrimitive; + public Azure.AI.TextAnalytics.EntityCategory Category { get { throw null; } } + public double ConfidenceScore { get { throw null; } } + public int Length { get { throw null; } } + public int Offset { get { throw null; } } + public string SubCategory { get { throw null; } } + public string Text { get { throw null; } } + } + public partial class PiiEntityCollection : System.Collections.ObjectModel.ReadOnlyCollection + { + internal PiiEntityCollection() : base (default(System.Collections.Generic.IList)) { } + public string RedactedText { get { throw null; } } + public System.Collections.Generic.IReadOnlyCollection Warnings { get { throw null; } } + } + public enum PiiEntityDomainType + { + ProtectedHealthInformation = 0, + } public partial class RecognizeEntitiesResult : Azure.AI.TextAnalytics.TextAnalyticsResult { internal RecognizeEntitiesResult() { } @@ -201,6 +223,22 @@ internal RecognizeLinkedEntitiesResultCollection() : base (default(System.Collec public string ModelVersion { get { throw null; } } public Azure.AI.TextAnalytics.TextDocumentBatchStatistics Statistics { get { throw null; } } } + public partial class RecognizePiiEntitiesOptions : Azure.AI.TextAnalytics.TextAnalyticsRequestOptions + { + public RecognizePiiEntitiesOptions() { } + public Azure.AI.TextAnalytics.PiiEntityDomainType DomainFilter { get { throw null; } set { } } + } + public partial class RecognizePiiEntitiesResult : Azure.AI.TextAnalytics.TextAnalyticsResult + { + internal RecognizePiiEntitiesResult() { } + public Azure.AI.TextAnalytics.PiiEntityCollection Entities { get { throw null; } } + } + public partial class RecognizePiiEntitiesResultCollection : System.Collections.ObjectModel.ReadOnlyCollection + { + internal RecognizePiiEntitiesResultCollection() : base (default(System.Collections.Generic.IList)) { } + public string ModelVersion { get { throw null; } } + public Azure.AI.TextAnalytics.TextDocumentBatchStatistics Statistics { get { throw null; } } + } [System.Runtime.InteropServices.StructLayoutAttribute(System.Runtime.InteropServices.LayoutKind.Sequential)] public readonly partial struct SentenceSentiment { @@ -273,6 +311,12 @@ public TextAnalyticsClient(System.Uri endpoint, Azure.Core.TokenCredential crede public virtual Azure.Response RecognizeLinkedEntitiesBatch(System.Collections.Generic.IEnumerable documents, string language = null, Azure.AI.TextAnalytics.TextAnalyticsRequestOptions options = null, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; } public virtual System.Threading.Tasks.Task> RecognizeLinkedEntitiesBatchAsync(System.Collections.Generic.IEnumerable documents, Azure.AI.TextAnalytics.TextAnalyticsRequestOptions options = null, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; } public virtual System.Threading.Tasks.Task> RecognizeLinkedEntitiesBatchAsync(System.Collections.Generic.IEnumerable documents, string language = null, Azure.AI.TextAnalytics.TextAnalyticsRequestOptions options = null, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; } + public virtual Azure.Response RecognizePiiEntities(string document, string language = null, Azure.AI.TextAnalytics.RecognizePiiEntitiesOptions options = null, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; } + public virtual System.Threading.Tasks.Task> RecognizePiiEntitiesAsync(string document, string language = null, Azure.AI.TextAnalytics.RecognizePiiEntitiesOptions options = null, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; } + public virtual Azure.Response RecognizePiiEntitiesBatch(System.Collections.Generic.IEnumerable documents, Azure.AI.TextAnalytics.RecognizePiiEntitiesOptions options = null, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; } + public virtual Azure.Response RecognizePiiEntitiesBatch(System.Collections.Generic.IEnumerable documents, string language = null, Azure.AI.TextAnalytics.RecognizePiiEntitiesOptions options = null, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; } + public virtual System.Threading.Tasks.Task> RecognizePiiEntitiesBatchAsync(System.Collections.Generic.IEnumerable documents, Azure.AI.TextAnalytics.RecognizePiiEntitiesOptions options = null, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; } + public virtual System.Threading.Tasks.Task> RecognizePiiEntitiesBatchAsync(System.Collections.Generic.IEnumerable documents, string language = null, Azure.AI.TextAnalytics.RecognizePiiEntitiesOptions options = null, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; } [System.ComponentModel.EditorBrowsableAttribute(System.ComponentModel.EditorBrowsableState.Never)] public override string ToString() { throw null; } } @@ -359,12 +403,17 @@ public static partial class TextAnalyticsModelFactory public static Azure.AI.TextAnalytics.LinkedEntityMatch LinkedEntityMatch(string text, double score, int offset, int length) { throw null; } public static Azure.AI.TextAnalytics.MinedOpinion MinedOpinion(Azure.AI.TextAnalytics.AspectSentiment aspect, System.Collections.Generic.IReadOnlyList opinions) { throw null; } public static Azure.AI.TextAnalytics.OpinionSentiment OpinionSentiment(Azure.AI.TextAnalytics.TextSentiment sentiment, double positiveScore, double negativeScore, string text, bool isNegated, int offset, int length) { throw null; } + public static Azure.AI.TextAnalytics.PiiEntity PiiEntity(string text, string category, string subCategory, double score, int offset, int length) { throw null; } + public static Azure.AI.TextAnalytics.PiiEntityCollection PiiEntityCollection(System.Collections.Generic.IList entities, string redactedText, System.Collections.Generic.IList warnings = null) { throw null; } public static Azure.AI.TextAnalytics.RecognizeEntitiesResult RecognizeEntitiesResult(string id, Azure.AI.TextAnalytics.TextAnalyticsError error) { throw null; } public static Azure.AI.TextAnalytics.RecognizeEntitiesResult RecognizeEntitiesResult(string id, Azure.AI.TextAnalytics.TextDocumentStatistics statistics, Azure.AI.TextAnalytics.CategorizedEntityCollection entities) { throw null; } public static Azure.AI.TextAnalytics.RecognizeEntitiesResultCollection RecognizeEntitiesResultCollection(System.Collections.Generic.IEnumerable list, Azure.AI.TextAnalytics.TextDocumentBatchStatistics statistics, string modelVersion) { throw null; } public static Azure.AI.TextAnalytics.RecognizeLinkedEntitiesResult RecognizeLinkedEntitiesResult(string id, Azure.AI.TextAnalytics.TextAnalyticsError error) { throw null; } public static Azure.AI.TextAnalytics.RecognizeLinkedEntitiesResult RecognizeLinkedEntitiesResult(string id, Azure.AI.TextAnalytics.TextDocumentStatistics statistics, Azure.AI.TextAnalytics.LinkedEntityCollection linkedEntities) { throw null; } public static Azure.AI.TextAnalytics.RecognizeLinkedEntitiesResultCollection RecognizeLinkedEntitiesResultCollection(System.Collections.Generic.IEnumerable list, Azure.AI.TextAnalytics.TextDocumentBatchStatistics statistics, string modelVersion) { throw null; } + public static Azure.AI.TextAnalytics.RecognizePiiEntitiesResult RecognizePiiEntitiesResult(string id, Azure.AI.TextAnalytics.TextAnalyticsError error) { throw null; } + public static Azure.AI.TextAnalytics.RecognizePiiEntitiesResult RecognizePiiEntitiesResult(string id, Azure.AI.TextAnalytics.TextDocumentStatistics statistics, Azure.AI.TextAnalytics.PiiEntityCollection entities) { throw null; } + public static Azure.AI.TextAnalytics.RecognizePiiEntitiesResultCollection RecognizePiiEntitiesResultCollection(System.Collections.Generic.IEnumerable list, Azure.AI.TextAnalytics.TextDocumentBatchStatistics statistics, string modelVersion) { throw null; } [System.ComponentModel.EditorBrowsableAttribute(System.ComponentModel.EditorBrowsableState.Never)] public static Azure.AI.TextAnalytics.SentenceSentiment SentenceSentiment(Azure.AI.TextAnalytics.TextSentiment sentiment, string text, double positiveScore, double neutralScore, double negativeScore) { throw null; } public static Azure.AI.TextAnalytics.SentenceSentiment SentenceSentiment(Azure.AI.TextAnalytics.TextSentiment sentiment, string text, double positiveScore, double neutralScore, double negativeScore, int offset, int length, System.Collections.Generic.IReadOnlyList minedOpinions) { throw null; } diff --git a/sdk/textanalytics/Azure.AI.TextAnalytics/src/PiiEntity.cs b/sdk/textanalytics/Azure.AI.TextAnalytics/src/PiiEntity.cs new file mode 100644 index 0000000000000..a0c3ec964c87e --- /dev/null +++ b/sdk/textanalytics/Azure.AI.TextAnalytics/src/PiiEntity.cs @@ -0,0 +1,64 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using Azure.AI.TextAnalytics.Models; + +namespace Azure.AI.TextAnalytics +{ + /// + /// A word or phrase identified as a Personally Identifiable Information + /// that can be categorized as known type in a given taxonomy. + /// The set of categories recognized by the Text Analytics service is described at + /// . + /// + public readonly struct PiiEntity + { + internal PiiEntity(Entity entity) + { + Category = entity.Category; + Text = entity.Text; + SubCategory = entity.Subcategory; + ConfidenceScore = entity.ConfidenceScore; + Offset = entity.Offset; + Length = entity.Length; + } + + /// + /// Gets the entity text as it appears in the input document. + /// + public string Text { get; } + + /// + /// Gets the entity category inferred by the Text Analytics service's + /// named entity recognition model, such as Financial Account + /// Identification/Social Security Number/Phone Number, etc. + /// The list of available categories is described at + /// . + /// + public EntityCategory Category { get; } + + /// + /// Gets the sub category of the entity inferred by the Text Analytics service's + /// named entity recognition model. This property may not have a value if + /// a sub category doesn't exist for this entity. The list of available categories and + /// subcategories is described at . + /// + public string SubCategory { get; } + + /// + /// Gets a score between 0 and 1, indicating the confidence that the + /// text substring matches this inferred entity. + /// + public double ConfidenceScore { get; } + + /// + /// Gets the starting position (in UTF-16 code units) for the matching text in the input document. + /// + public int Offset { get; } + + /// + /// Gets the length (in UTF-16 code units) of the matching text in the input document. + /// + public int Length { get; } + } +} diff --git a/sdk/textanalytics/Azure.AI.TextAnalytics/src/PiiEntityCollection.cs b/sdk/textanalytics/Azure.AI.TextAnalytics/src/PiiEntityCollection.cs new file mode 100644 index 0000000000000..b68e03ff395de --- /dev/null +++ b/sdk/textanalytics/Azure.AI.TextAnalytics/src/PiiEntityCollection.cs @@ -0,0 +1,32 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Collections.ObjectModel; + +namespace Azure.AI.TextAnalytics +{ + /// + /// Collection of objects in a document. + /// + public class PiiEntityCollection : ReadOnlyCollection + { + internal PiiEntityCollection(IList entities, string redactedText, IList warnings) + : base(entities) + { + RedactedText = redactedText; + Warnings = new ReadOnlyCollection(warnings); + } + + /// + /// Gets the text of the input document with all of the Personally Identifiable Information + /// redacted out. + /// + public string RedactedText { get; } + + /// + /// Warnings encountered while processing the document. + /// + public IReadOnlyCollection Warnings { get; } + } +} diff --git a/sdk/textanalytics/Azure.AI.TextAnalytics/src/PiiEntityDomainType.cs b/sdk/textanalytics/Azure.AI.TextAnalytics/src/PiiEntityDomainType.cs new file mode 100644 index 0000000000000..f23a155d04518 --- /dev/null +++ b/sdk/textanalytics/Azure.AI.TextAnalytics/src/PiiEntityDomainType.cs @@ -0,0 +1,30 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +namespace Azure.AI.TextAnalytics +{ + /// + /// The different domains of PII entities that users can filter requests by. + /// + public enum PiiEntityDomainType + { + /// + /// Protected Health Information entities. + /// For more information see . + /// + ProtectedHealthInformation + } + + [System.Diagnostics.CodeAnalysis.SuppressMessage("StyleCop.CSharp.DocumentationRules", "SA1649:File name should match first type name", Justification = "Small extensions, good to keep here.")] + internal static class PiiEntityDomainTypeExtensions + { + internal static string GetString(this PiiEntityDomainType type) + { + return type switch + { + PiiEntityDomainType.ProtectedHealthInformation => "PHI", + _ => null, + }; + } + } +} diff --git a/sdk/textanalytics/Azure.AI.TextAnalytics/src/RecognizePiiEntitiesOptions.cs b/sdk/textanalytics/Azure.AI.TextAnalytics/src/RecognizePiiEntitiesOptions.cs new file mode 100644 index 0000000000000..b0dec7a9f179a --- /dev/null +++ b/sdk/textanalytics/Azure.AI.TextAnalytics/src/RecognizePiiEntitiesOptions.cs @@ -0,0 +1,26 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +namespace Azure.AI.TextAnalytics +{ + /// + /// Options that allow callers to specify details about how the operation + /// is run and what information is returned from it by the service. + /// + public class RecognizePiiEntitiesOptions : TextAnalyticsRequestOptions + { + /// + /// Initializes a new instance of the + /// class. + /// + public RecognizePiiEntitiesOptions() + { + } + + /// + /// Filters the response entities to ones only included in the specified domain. + /// For more information see . + /// + public PiiEntityDomainType DomainFilter { get; set; } + } +} diff --git a/sdk/textanalytics/Azure.AI.TextAnalytics/src/RecognizePiiEntitiesResult.cs b/sdk/textanalytics/Azure.AI.TextAnalytics/src/RecognizePiiEntitiesResult.cs new file mode 100644 index 0000000000000..1d76eb7ffe67a --- /dev/null +++ b/sdk/textanalytics/Azure.AI.TextAnalytics/src/RecognizePiiEntitiesResult.cs @@ -0,0 +1,44 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; + +namespace Azure.AI.TextAnalytics +{ + /// + /// The result of the recognize PII entities operation on a + /// document, containing a collection of the + /// objects containing Personally Identifiable Information that were + /// found in that document. + /// + public class RecognizePiiEntitiesResult : TextAnalyticsResult + { + private readonly PiiEntityCollection _entities; + + internal RecognizePiiEntitiesResult(string id, TextDocumentStatistics statistics, PiiEntityCollection entities) + : base(id, statistics) + { + _entities = entities; + } + + internal RecognizePiiEntitiesResult(string id, TextAnalyticsError error) : base(id, error) { } + + /// + /// Gets the collection of PII entities containing Personally + /// Identifiable Information in the document. + /// + public PiiEntityCollection Entities + { + get + { + if (HasError) + { +#pragma warning disable CA1065 // Do not raise exceptions in unexpected locations + throw new InvalidOperationException($"Cannot access result for document {Id}, due to error {Error.ErrorCode}: {Error.Message}"); +#pragma warning restore CA1065 // Do not raise exceptions in unexpected locations + } + return _entities; + } + } + } +} diff --git a/sdk/textanalytics/Azure.AI.TextAnalytics/src/RecognizePiiEntitiesResultCollection.cs b/sdk/textanalytics/Azure.AI.TextAnalytics/src/RecognizePiiEntitiesResultCollection.cs new file mode 100644 index 0000000000000..0f9f3f6d8159b --- /dev/null +++ b/sdk/textanalytics/Azure.AI.TextAnalytics/src/RecognizePiiEntitiesResultCollection.cs @@ -0,0 +1,34 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Collections.ObjectModel; + +namespace Azure.AI.TextAnalytics +{ + /// + /// Collection of objects corresponding + /// to a batch of documents, and information about the batch operation. + /// + public class RecognizePiiEntitiesResultCollection : ReadOnlyCollection + { + internal RecognizePiiEntitiesResultCollection(IList list, TextDocumentBatchStatistics statistics, string modelVersion) : base(list) + { + Statistics = statistics; + ModelVersion = modelVersion; + } + + /// + /// Gets statistics about the documents and how it was processed + /// by the service. This property will have a value when IncludeStatistics + /// is set to true in the client call. + /// + public TextDocumentBatchStatistics Statistics { get; } + + /// + /// Gets the version of the Text Analytics model used by this operation + /// on this batch of documents. + /// + public string ModelVersion { get; } + } +} diff --git a/sdk/textanalytics/Azure.AI.TextAnalytics/src/TextAnalyticsClient.cs b/sdk/textanalytics/Azure.AI.TextAnalytics/src/TextAnalyticsClient.cs index 20a120ac9cc60..114e83523565d 100644 --- a/sdk/textanalytics/Azure.AI.TextAnalytics/src/TextAnalyticsClient.cs +++ b/sdk/textanalytics/Azure.AI.TextAnalytics/src/TextAnalyticsClient.cs @@ -696,6 +696,328 @@ private Response RecognizeEntitiesBatch(Multi #endregion + #region Recognize PII Entities + + /// + /// Runs a predictive model to identify a collection of entities containing + /// Personally Identifiable Information found in the passed-in document, + /// and categorize those entities into types such as US social security + /// number, drivers license number, or credit card number. + /// For more information on available categories, see + /// . + /// For a list of languages supported by this operation, see + /// . + /// For document length limits, maximum batch size, and supported text encoding, see + /// . + /// + /// The document to analyze. + /// The language that the document is written in. + /// If unspecified, this value will be set to the default language in + /// in the request sent to the + /// service. If set to an empty string, the service will apply a model + /// where the language is explicitly set to "None". + /// The additional configurable that may be passed when + /// recognizing PII entities. Options include entity domain filters, model version, and more. + /// A + /// controlling the request lifetime. + /// A result containing the collection of entities identified + /// in the document, as well as a score indicating the confidence + /// that the entity correctly matches the identified substring. + /// Service returned a non-success + /// status code. + public virtual async Task> RecognizePiiEntitiesAsync(string document, string language = default, RecognizePiiEntitiesOptions options = default, CancellationToken cancellationToken = default) + { + Argument.AssertNotNullOrEmpty(document, nameof(document)); + options ??= new RecognizePiiEntitiesOptions(); + + using DiagnosticScope scope = _clientDiagnostics.CreateScope($"{nameof(TextAnalyticsClient)}.{nameof(RecognizePiiEntities)}"); + scope.AddAttribute("document", document); + scope.Start(); + + try + { + var documents = new List() { ConvertToMultiLanguageInput(document, language) }; + + Response result = await _serviceRestClient.EntitiesRecognitionPiiAsync( + new MultiLanguageBatchInput(documents), + options.ModelVersion, + options.IncludeStatistics, + options.DomainFilter.GetString(), + _stringCodeUnit, + cancellationToken: cancellationToken).ConfigureAwait(false); + Response response = result.GetRawResponse(); + + if (result.Value.Errors.Count > 0) + { + // only one document, so we can ignore the id and grab the first error message. + var error = Transforms.ConvertToError(result.Value.Errors[0].Error); + throw await _clientDiagnostics.CreateRequestFailedExceptionAsync(response, error.Message, error.ErrorCode.ToString(), CreateAdditionalInformation(error)).ConfigureAwait(false); + } + + return Response.FromValue(Transforms.ConvertToPiiEntityCollection(result.Value.Documents[0]), response); + } + catch (Exception e) + { + scope.Failed(e); + throw; + } + } + + /// + /// Runs a predictive model to identify a collection of entities containing + /// Personally Identifiable Information found in the passed-in document, + /// and categorize those entities into types such as US social security + /// number, drivers license number, or credit card number. + /// For more information on available categories, see + /// . + /// For a list of languages supported by this operation, see + /// . + /// For document length limits, maximum batch size, and supported text encoding, see + /// . + /// + /// The document to analyze. + /// The language that the document is written in. + /// If unspecified, this value will be set to the default language in + /// in the request sent to the + /// service. If set to an empty string, the service will apply a model + /// where the language is explicitly set to "None". + /// The additional configurable that may be passed when + /// recognizing PII entities. Options include entity domain filters, model version, and more. + /// A + /// controlling the request lifetime. + /// A result containing the collection of entities identified + /// in the document, as well as a score indicating the confidence + /// that the entity correctly matches the identified substring. + /// Service returned a non-success + /// status code. + public virtual Response RecognizePiiEntities(string document, string language = default, RecognizePiiEntitiesOptions options = default, CancellationToken cancellationToken = default) + { + Argument.AssertNotNullOrEmpty(document, nameof(document)); + options ??= new RecognizePiiEntitiesOptions(); + + using DiagnosticScope scope = _clientDiagnostics.CreateScope($"{nameof(TextAnalyticsClient)}.{nameof(RecognizePiiEntities)}"); + scope.AddAttribute("document", document); + scope.Start(); + + try + { + var documents = new List() { ConvertToMultiLanguageInput(document, language) }; + + Response result = _serviceRestClient.EntitiesRecognitionPii( + new MultiLanguageBatchInput(documents), + options.ModelVersion, + options.IncludeStatistics, + options.DomainFilter.GetString(), + _stringCodeUnit, + cancellationToken: cancellationToken); + Response response = result.GetRawResponse(); + + if (result.Value.Errors.Count > 0) + { + // only one document, so we can ignore the id and grab the first error message. + var error = Transforms.ConvertToError(result.Value.Errors[0].Error); + throw _clientDiagnostics.CreateRequestFailedException(response, error.Message, error.ErrorCode.ToString(), CreateAdditionalInformation(error)); + } + + return Response.FromValue(Transforms.ConvertToPiiEntityCollection(result.Value.Documents[0]), response); + } + catch (Exception e) + { + scope.Failed(e); + throw; + } + } + + /// + /// Runs a predictive model to identify a collection of entities containing + /// Personally Identifiable Information found in the passed-in document, + /// and categorize those entities into types such as US social security + /// number, drivers license number, or credit card number. + /// For more information on available categories, see + /// . + /// For a list of languages supported by this operation, see + /// . + /// For document length limits, maximum batch size, and supported text encoding, see + /// . + /// + /// The documents to analyze. + /// The language that the document is written in. + /// If unspecified, this value will be set to the default language in + /// in the request sent to the + /// service. If set to an empty string, the service will apply a model + /// where the language is explicitly set to "None". + /// The additional configurable that may be passed when + /// recognizing PII entities. Options include entity domain filters, model version, and more. + /// A + /// controlling the request lifetime. + /// A result containing the collection of entities identified + /// for each of the documents, as well as scores indicating the confidence + /// that a given entity correctly matches the identified substring. + /// Service returned a non-success + /// status code. + public virtual async Task> RecognizePiiEntitiesBatchAsync(IEnumerable documents, string language = default, RecognizePiiEntitiesOptions options = default, CancellationToken cancellationToken = default) + { + Argument.AssertNotNullOrEmpty(documents, nameof(documents)); + options ??= new RecognizePiiEntitiesOptions(); + MultiLanguageBatchInput documentInputs = ConvertToMultiLanguageInputs(documents, language); + + return await RecognizePiiEntitiesBatchAsync(documentInputs, options, cancellationToken).ConfigureAwait(false); + } + + /// + /// Runs a predictive model to identify a collection of entities containing + /// Personally Identifiable Information found in the passed-in document, + /// and categorize those entities into types such as US social security + /// number, drivers license number, or credit card number. + /// For more information on available categories, see + /// . + /// For a list of languages supported by this operation, see + /// . + /// For document length limits, maximum batch size, and supported text encoding, see + /// . + /// + /// The documents to analyze. + /// The language that the document is written in. + /// If unspecified, this value will be set to the default language in + /// in the request sent to the + /// service. If set to an empty string, the service will apply a model + /// where the language is explicitly set to "None". + /// The additional configurable that may be passed when + /// recognizing PII entities. Options include entity domain filters, model version, and more. + /// A + /// controlling the request lifetime. + /// A result containing the collection of entities identified + /// for each of the documents, as well as scores indicating the confidence + /// that a given entity correctly matches the identified substring. + /// Service returned a non-success + /// status code. + public virtual Response RecognizePiiEntitiesBatch(IEnumerable documents, string language = default, RecognizePiiEntitiesOptions options = default, CancellationToken cancellationToken = default) + { + Argument.AssertNotNullOrEmpty(documents, nameof(documents)); + options ??= new RecognizePiiEntitiesOptions(); + MultiLanguageBatchInput documentInputs = ConvertToMultiLanguageInputs(documents, language); + + return RecognizePiiEntitiesBatch(documentInputs, options, cancellationToken); + } + + /// + /// Runs a predictive model to identify a collection of entities containing + /// Personally Identifiable Information found in the passed-in document, + /// and categorize those entities into types such as US social security + /// number, drivers license number, or credit card number. + /// For more information on available categories, see + /// . + /// For a list of languages supported by this operation, see + /// . + /// For document length limits, maximum batch size, and supported text encoding, see + /// . + /// + /// The documents to analyze. + /// The additional configurable that may be passed when + /// recognizing PII entities. Options include entity domain filters, model version, and more. + /// A + /// controlling the request lifetime. + /// A result containing the collection of entities identified + /// for each of the documents, as well as scores indicating the confidence + /// that a given entity correctly matches the identified substring. + /// Service returned a non-success + /// status code. + public virtual async Task> RecognizePiiEntitiesBatchAsync(IEnumerable documents, RecognizePiiEntitiesOptions options = default, CancellationToken cancellationToken = default) + { + Argument.AssertNotNullOrEmpty(documents, nameof(documents)); + options ??= new RecognizePiiEntitiesOptions(); + MultiLanguageBatchInput documentInputs = ConvertToMultiLanguageInputs(documents); + + return await RecognizePiiEntitiesBatchAsync(documentInputs, options, cancellationToken).ConfigureAwait(false); + } + + /// + /// Runs a predictive model to identify a collection of entities containing + /// Personally Identifiable Information found in the passed-in document, + /// and categorize those entities into types such as US social security + /// number, drivers license number, or credit card number. + /// For more information on available categories, see + /// . + /// For a list of languages supported by this operation, see + /// . + /// For document length limits, maximum batch size, and supported text encoding, see + /// . + /// + /// The documents to analyze. + /// The additional configurable that may be passed when + /// recognizing PII entities. Options include entity domain filters, model version, and more. + /// A + /// controlling the request lifetime. + /// A result containing the collection of entities identified + /// for each of the documents, as well as scores indicating the confidence + /// that a given entity correctly matches the identified substring. + /// Service returned a non-success + /// status code. + public virtual Response RecognizePiiEntitiesBatch(IEnumerable documents, RecognizePiiEntitiesOptions options = default, CancellationToken cancellationToken = default) + { + Argument.AssertNotNullOrEmpty(documents, nameof(documents)); + options ??= new RecognizePiiEntitiesOptions(); + MultiLanguageBatchInput documentInputs = ConvertToMultiLanguageInputs(documents); + + return RecognizePiiEntitiesBatch(documentInputs, options, cancellationToken); + } + + private async Task> RecognizePiiEntitiesBatchAsync(MultiLanguageBatchInput batchInput, RecognizePiiEntitiesOptions options, CancellationToken cancellationToken) + { + using DiagnosticScope scope = _clientDiagnostics.CreateScope($"{nameof(TextAnalyticsClient)}.{nameof(RecognizePiiEntitiesBatch)}"); + scope.Start(); + + try + { + Response result = await _serviceRestClient.EntitiesRecognitionPiiAsync( + batchInput, + options.ModelVersion, + options.IncludeStatistics, + options.DomainFilter.GetString(), + _stringCodeUnit, + cancellationToken).ConfigureAwait(false); + var response = result.GetRawResponse(); + + IDictionary map = CreateIdToIndexMap(batchInput.Documents); + RecognizePiiEntitiesResultCollection results = Transforms.ConvertToRecognizePiiEntitiesResultCollection(result.Value, map); + return Response.FromValue(results, response); + } + catch (Exception e) + { + scope.Failed(e); + throw; + } + } + + private Response RecognizePiiEntitiesBatch(MultiLanguageBatchInput batchInput, RecognizePiiEntitiesOptions options, CancellationToken cancellationToken) + { + using DiagnosticScope scope = _clientDiagnostics.CreateScope($"{nameof(TextAnalyticsClient)}.{nameof(RecognizePiiEntitiesBatch)}"); + scope.Start(); + + try + { + Response result = _serviceRestClient.EntitiesRecognitionPii( + batchInput, + options.ModelVersion, + options.IncludeStatistics, + options.DomainFilter.GetString(), + _stringCodeUnit, + cancellationToken); + var response = result.GetRawResponse(); + + IDictionary map = CreateIdToIndexMap(batchInput.Documents); + RecognizePiiEntitiesResultCollection results = Transforms.ConvertToRecognizePiiEntitiesResultCollection(result.Value, map); + return Response.FromValue(results, response); + } + catch (Exception e) + { + scope.Failed(e); + throw; + } + } + + #endregion + #region Analyze Sentiment /// /// Runs a predictive model to identify the positive, negative, neutral diff --git a/sdk/textanalytics/Azure.AI.TextAnalytics/src/TextAnalyticsModelFactory.cs b/sdk/textanalytics/Azure.AI.TextAnalytics/src/TextAnalyticsModelFactory.cs index d007f5e6432cd..602b3e8ca7aa6 100644 --- a/sdk/textanalytics/Azure.AI.TextAnalytics/src/TextAnalyticsModelFactory.cs +++ b/sdk/textanalytics/Azure.AI.TextAnalytics/src/TextAnalyticsModelFactory.cs @@ -304,7 +304,7 @@ public static CategorizedEntityCollection CategorizedEntityCollection(IList /// Sets the property. /// Sets the property. - /// Sets the collection of . + /// Sets the collection of . /// A new instance of for mocking purposes. public static RecognizeEntitiesResult RecognizeEntitiesResult(string id, TextDocumentStatistics statistics, CategorizedEntityCollection entities) { @@ -336,6 +336,72 @@ public static RecognizeEntitiesResultCollection RecognizeEntitiesResultCollectio #endregion Recognize Entities + #region Recognize PII Entities + /// + /// Initializes a new instance of for mocking purposes. + /// + /// Sets the property. + /// Sets the property. + /// Sets the property. + /// Sets the property. + /// /// Sets the property. + /// Sets the property. + /// A new instance of for mocking purposes. + public static PiiEntity PiiEntity(string text, string category, string subCategory, double score, int offset, int length) + { + return new PiiEntity(new Entity(text, category, subCategory, offset, length, score)); + } + + /// + /// Initializes a new instance of for mocking purposes. + /// + /// Sets the collection of . + /// Sets the property. + /// Sets the property. + /// A new instance of for mocking purposes. + public static PiiEntityCollection PiiEntityCollection(IList entities, string redactedText, IList warnings = default) + { + warnings ??= new List(); + return new PiiEntityCollection(entities, redactedText, warnings); + } + + /// + /// Initializes a new instance of for mocking purposes. + /// + /// Sets the property. + /// Sets the property. + /// Sets the collection of . + /// A new instance of for mocking purposes. + public static RecognizePiiEntitiesResult RecognizePiiEntitiesResult(string id, TextDocumentStatistics statistics, PiiEntityCollection entities) + { + return new RecognizePiiEntitiesResult(id, statistics, entities); + } + + /// + /// Initializes a new instance of for mocking purposes. + /// + /// Sets the property. + /// Sets the property. + /// A new instance of for mocking purposes. + public static RecognizePiiEntitiesResult RecognizePiiEntitiesResult(string id, TextAnalyticsError error) + { + return new RecognizePiiEntitiesResult(id, error); + } + + /// + /// Initializes a new instance of for mocking purposes. + /// + /// Sets the collection of . + /// Sets the property. + /// Sets the property. + /// A new instance of for mocking purposes. + public static RecognizePiiEntitiesResultCollection RecognizePiiEntitiesResultCollection(IEnumerable list, TextDocumentBatchStatistics statistics, string modelVersion) + { + return new RecognizePiiEntitiesResultCollection(list.ToList(), statistics, modelVersion); + } + + #endregion Recognize PII Entities + #region Extract KeyPhrase /// /// Initializes a new instance of for mocking purposes. diff --git a/sdk/textanalytics/Azure.AI.TextAnalytics/src/Transforms.cs b/sdk/textanalytics/Azure.AI.TextAnalytics/src/Transforms.cs index 1e390e8bc4907..273e93b4ffac0 100644 --- a/sdk/textanalytics/Azure.AI.TextAnalytics/src/Transforms.cs +++ b/sdk/textanalytics/Azure.AI.TextAnalytics/src/Transforms.cs @@ -157,6 +157,39 @@ internal static RecognizeEntitiesResultCollection ConvertToRecognizeEntitiesResu #endregion + #region Recognize PII Entities + + internal static List ConvertToPiiEntityList(List entities) + => entities.Select((entity) => new PiiEntity(entity)).ToList(); + + internal static PiiEntityCollection ConvertToPiiEntityCollection(PiiDocumentEntities documentEntities) + { + return new PiiEntityCollection(ConvertToPiiEntityList(documentEntities.Entities.ToList()), documentEntities.RedactedText, ConvertToWarnings(documentEntities.Warnings)); + } + + internal static RecognizePiiEntitiesResultCollection ConvertToRecognizePiiEntitiesResultCollection(PiiEntitiesResult results, IDictionary idToIndexMap) + { + var recognizeEntities = new List(); + + //Read errors + foreach (DocumentError error in results.Errors) + { + recognizeEntities.Add(new RecognizePiiEntitiesResult(error.Id, ConvertToError(error.Error))); + } + + //Read document entities + foreach (PiiDocumentEntities docEntities in results.Documents) + { + recognizeEntities.Add(new RecognizePiiEntitiesResult(docEntities.Id, docEntities.Statistics ?? default, ConvertToPiiEntityCollection(docEntities))); + } + + recognizeEntities = SortHeterogeneousCollection(recognizeEntities, idToIndexMap); + + return new RecognizePiiEntitiesResultCollection(recognizeEntities, results.Statistics, results.ModelVersion); + } + + #endregion + #region Recognize Linked Entities internal static LinkedEntityCollection ConvertToLinkedEntityCollection(DocumentLinkedEntities documentEntities) diff --git a/sdk/textanalytics/Azure.AI.TextAnalytics/tests/RecognizePiiEntitiesTests.cs b/sdk/textanalytics/Azure.AI.TextAnalytics/tests/RecognizePiiEntitiesTests.cs new file mode 100644 index 0000000000000..8daed20c42540 --- /dev/null +++ b/sdk/textanalytics/Azure.AI.TextAnalytics/tests/RecognizePiiEntitiesTests.cs @@ -0,0 +1,169 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Threading.Tasks; +using NUnit.Framework; + +namespace Azure.AI.TextAnalytics.Tests +{ + public class RecognizePiiEntitiesTests : TextAnalyticsClientLiveTestBase + { + public RecognizePiiEntitiesTests(bool isAsync) : base(isAsync) { } + + private const string singleEnglish = "A developer with SSN 859-98-0987 whose phone number is 800-102-1100 is building tools with our APIs."; + + private static List batchConvenienceDocuments = new List + { + "A developer with SSN 859-98-0987 whose phone number is 800-102-1100 is building tools with our APIs.", + "Your ABA number - 111000025 - is the first 9 digits in the lower left hand corner of your personal check." + }; + + private static List batchDocuments = new List + { + new TextDocumentInput("1", "A developer with SSN 859-98-0987 whose phone number is 800-102-1100 is building tools with our APIs.") + { + Language = "en", + }, + new TextDocumentInput("2", "Your ABA number - 111000025 - is the first 9 digits in the lower left hand corner of your personal check.") + { + Language = "en", + } + }; + + [Test] + public async Task RecognizePiiEntitiesTest() + { + TextAnalyticsClient client = GetClient(); + string document = singleEnglish; + + PiiEntityCollection entities = await client.RecognizePiiEntitiesAsync(document); + + Assert.AreEqual(2, entities.Count); + Assert.IsNotNull(entities.RedactedText); + + var entitiesList = new List { "859-98-0987", "800-102-1100" }; + foreach (PiiEntity entity in entities) + { + Assert.IsTrue(entitiesList.Contains(entity.Text)); + Assert.Greater(entity.Length, 0); + } + } + + [Test] + public async Task RecognizePiiEntitiesWithLanguageTest() + { + TextAnalyticsClient client = GetClient(); + string document = singleEnglish; + + PiiEntityCollection entities = await client.RecognizePiiEntitiesAsync(document, "en"); + + Assert.AreEqual(2, entities.Count); + Assert.IsNotNull(entities.RedactedText); + } + + [Test] + public async Task RecognizePiiEntitiesWithDomainTest() + { + TextAnalyticsClient client = GetClient(); + string document = "I work at Microsoft and my email is atest@microsoft.com"; + + PiiEntityCollection entities = await client.RecognizePiiEntitiesAsync(document, "en", new RecognizePiiEntitiesOptions() { DomainFilter = PiiEntityDomainType.ProtectedHealthInformation } ); + + Assert.AreEqual(1, entities.Count); + Assert.AreEqual("atest@microsoft.com", entities.FirstOrDefault().Text); + Assert.AreEqual(EntityCategory.Email, entities.FirstOrDefault().Category); + Assert.IsNotNull(entities.RedactedText); + } + + [Test] + public async Task RecognizePiiEntitiesBatchWithErrorTest() + { + TextAnalyticsClient client = GetClient(); + var documents = new List + { + "A developer with SSN 859-98-0987 whose phone number is 800-102-1100 is building tools with our APIs.", + "", + "Your ABA number - 111000025 - is the first 9 digits in the lower left hand corner of your personal check.", + }; + + RecognizePiiEntitiesResultCollection results = await client.RecognizePiiEntitiesBatchAsync(documents); + + Assert.IsFalse(results[0].HasError); + Assert.IsFalse(results[2].HasError); + + var exceptionMessage = "Cannot access result for document 1, due to error InvalidDocument: Document text is empty."; + Assert.IsTrue(results[1].HasError); + InvalidOperationException ex = Assert.Throws(() => results[1].Entities.GetType()); + Assert.AreEqual(exceptionMessage, ex.Message); + } + + [Test] + public async Task RecognizePiiEntitiesBatchConvenienceTest() + { + TextAnalyticsClient client = GetClient(); + var documents = batchConvenienceDocuments; + + RecognizePiiEntitiesResultCollection results = await client.RecognizePiiEntitiesBatchAsync(documents); + + foreach (RecognizePiiEntitiesResult result in results) + { + Assert.GreaterOrEqual(result.Entities.Count(), 2); + } + } + + [Test] + public async Task RecognizePiiEntitiesBatchConvenienceWithStatisticsTest() + { + TextAnalyticsClient client = GetClient(); + var documents = batchConvenienceDocuments; + + RecognizePiiEntitiesResultCollection results = await client.RecognizePiiEntitiesBatchAsync(documents, "en", new RecognizePiiEntitiesOptions { IncludeStatistics = true }); + + foreach (RecognizePiiEntitiesResult result in results) + { + Assert.GreaterOrEqual(result.Entities.Count(), 2); + } + + Assert.Greater(results.Statistics.DocumentCount, 0); + Assert.AreEqual(0, results.Statistics.InvalidDocumentCount); + Assert.Greater(results.Statistics.TransactionCount, 0); + Assert.Greater(results.Statistics.ValidDocumentCount, 0); + } + + [Test] + public async Task RecognizePiiEntitiesBatchTest() + { + TextAnalyticsClient client = GetClient(); + List documents = batchDocuments; + + RecognizePiiEntitiesResultCollection results = await client.RecognizePiiEntitiesBatchAsync(documents); + + foreach (RecognizePiiEntitiesResult result in results) + { + Assert.GreaterOrEqual(result.Entities.Count(), 2); + } + } + + [Test] + public async Task RecognizePiiEntitiesBatchWithStatisticsTest() + { + TextAnalyticsClient client = GetClient(); + List documents = batchDocuments; + + RecognizePiiEntitiesResultCollection results = await client.RecognizePiiEntitiesBatchAsync(documents, new RecognizePiiEntitiesOptions { IncludeStatistics = true }); + + foreach (RecognizePiiEntitiesResult result in results) + { + Assert.GreaterOrEqual(result.Entities.Count(), 2); + } + + Assert.Greater(results.Statistics.DocumentCount, 0); + Assert.AreEqual(0, results.Statistics.InvalidDocumentCount); + Assert.Greater(results.Statistics.TransactionCount, 0); + Assert.Greater(results.Statistics.ValidDocumentCount, 0); + } + } +} diff --git a/sdk/textanalytics/Azure.AI.TextAnalytics/tests/SessionRecords/RecognizePiiEntitiesTests/RecognizePiiEntitiesBatchConvenienceTest.json b/sdk/textanalytics/Azure.AI.TextAnalytics/tests/SessionRecords/RecognizePiiEntitiesTests/RecognizePiiEntitiesBatchConvenienceTest.json new file mode 100644 index 0000000000000..ec465c95ac817 --- /dev/null +++ b/sdk/textanalytics/Azure.AI.TextAnalytics/tests/SessionRecords/RecognizePiiEntitiesTests/RecognizePiiEntitiesBatchConvenienceTest.json @@ -0,0 +1,109 @@ +{ + "Entries": [ + { + "RequestUri": "https://cognitiveusw2dev.azure-api.net/text/analytics/v3.1-preview.2/entities/recognition/pii?showStats=false\u0026domain=PHI\u0026stringIndexType=Utf16CodeUnit", + "RequestMethod": "POST", + "RequestHeaders": { + "Accept": [ + "application/json", + "text/json" + ], + "Content-Length": "294", + "Content-Type": "application/json", + "Ocp-Apim-Subscription-Key": "Sanitized", + "traceparent": "00-adc4d182eae1fe489cd39593b88ffbeb-fa795d352e969f47-00", + "User-Agent": [ + "azsdk-net-AI.TextAnalytics/5.1.0-dev.20200901.1", + "(.NET Core 4.6.29130.01; Microsoft Windows 10.0.19041 )" + ], + "x-ms-client-request-id": "c173c94341dba618c4f393086e026774", + "x-ms-return-client-request-id": "true" + }, + "RequestBody": { + "documents": [ + { + "id": "0", + "text": "A developer with SSN 859-98-0987 whose phone number is 800-102-1100 is building tools with our APIs.", + "language": "en" + }, + { + "id": "1", + "text": "Your ABA number - 111000025 - is the first 9 digits in the lower left hand corner of your personal check.", + "language": "en" + } + ] + }, + "StatusCode": 200, + "ResponseHeaders": { + "apim-request-id": "59b792a1-605d-4114-b364-36a6cac45c4d", + "Content-Type": "application/json; charset=utf-8", + "csp-billing-usage": "CognitiveServices.TextAnalytics.BatchScoring=2", + "Date": "Wed, 02 Sep 2020 13:56:57 GMT", + "Strict-Transport-Security": "max-age=31536000; includeSubDomains; preload", + "Transfer-Encoding": "chunked", + "X-Content-Type-Options": "nosniff", + "x-envoy-upstream-service-time": "119" + }, + "ResponseBody": { + "documents": [ + { + "redactedText": "A developer with SSN *********** whose phone number is ************ is building tools with our APIs.", + "id": "0", + "entities": [ + { + "text": "859-98-0987", + "category": "U.S. Social Security Number (SSN)", + "offset": 21, + "length": 11, + "confidenceScore": 0.65 + }, + { + "text": "800-102-1100", + "category": "Phone Number", + "offset": 55, + "length": 12, + "confidenceScore": 0.8 + } + ], + "warnings": [] + }, + { + "redactedText": "Your ABA number - ********* - is the first 9 digits in the lower left hand corner of your personal check.", + "id": "1", + "entities": [ + { + "text": "111000025", + "category": "Phone Number", + "offset": 18, + "length": 9, + "confidenceScore": 0.8 + }, + { + "text": "111000025", + "category": "ABA Routing Number", + "offset": 18, + "length": 9, + "confidenceScore": 0.75 + }, + { + "text": "111000025", + "category": "New Zealand Social Welfare Number", + "offset": 18, + "length": 9, + "confidenceScore": 0.65 + } + ], + "warnings": [] + } + ], + "errors": [], + "modelVersion": "2020-07-01" + } + } + ], + "Variables": { + "RandomSeed": "820182005", + "TEXT_ANALYTICS_API_KEY": "Sanitized", + "TEXT_ANALYTICS_ENDPOINT": "https://cognitiveusw2dev.azure-api.net" + } +} \ No newline at end of file diff --git a/sdk/textanalytics/Azure.AI.TextAnalytics/tests/SessionRecords/RecognizePiiEntitiesTests/RecognizePiiEntitiesBatchConvenienceTestAsync.json b/sdk/textanalytics/Azure.AI.TextAnalytics/tests/SessionRecords/RecognizePiiEntitiesTests/RecognizePiiEntitiesBatchConvenienceTestAsync.json new file mode 100644 index 0000000000000..5d5100e94e60d --- /dev/null +++ b/sdk/textanalytics/Azure.AI.TextAnalytics/tests/SessionRecords/RecognizePiiEntitiesTests/RecognizePiiEntitiesBatchConvenienceTestAsync.json @@ -0,0 +1,109 @@ +{ + "Entries": [ + { + "RequestUri": "https://cognitiveusw2dev.azure-api.net/text/analytics/v3.1-preview.2/entities/recognition/pii?showStats=false\u0026domain=PHI\u0026stringIndexType=Utf16CodeUnit", + "RequestMethod": "POST", + "RequestHeaders": { + "Accept": [ + "application/json", + "text/json" + ], + "Content-Length": "294", + "Content-Type": "application/json", + "Ocp-Apim-Subscription-Key": "Sanitized", + "traceparent": "00-541e310d19d73c4fba064af118c9975f-3be957f46bd1e441-00", + "User-Agent": [ + "azsdk-net-AI.TextAnalytics/5.1.0-dev.20200901.1", + "(.NET Core 4.6.29130.01; Microsoft Windows 10.0.19041 )" + ], + "x-ms-client-request-id": "2cf317139685f87e3e57184c7620cca1", + "x-ms-return-client-request-id": "true" + }, + "RequestBody": { + "documents": [ + { + "id": "0", + "text": "A developer with SSN 859-98-0987 whose phone number is 800-102-1100 is building tools with our APIs.", + "language": "en" + }, + { + "id": "1", + "text": "Your ABA number - 111000025 - is the first 9 digits in the lower left hand corner of your personal check.", + "language": "en" + } + ] + }, + "StatusCode": 200, + "ResponseHeaders": { + "apim-request-id": "e14e6939-fd42-4535-97cc-4656d444cea5", + "Content-Type": "application/json; charset=utf-8", + "csp-billing-usage": "CognitiveServices.TextAnalytics.BatchScoring=2", + "Date": "Wed, 02 Sep 2020 13:56:59 GMT", + "Strict-Transport-Security": "max-age=31536000; includeSubDomains; preload", + "Transfer-Encoding": "chunked", + "X-Content-Type-Options": "nosniff", + "x-envoy-upstream-service-time": "117" + }, + "ResponseBody": { + "documents": [ + { + "redactedText": "A developer with SSN *********** whose phone number is ************ is building tools with our APIs.", + "id": "0", + "entities": [ + { + "text": "859-98-0987", + "category": "U.S. Social Security Number (SSN)", + "offset": 21, + "length": 11, + "confidenceScore": 0.65 + }, + { + "text": "800-102-1100", + "category": "Phone Number", + "offset": 55, + "length": 12, + "confidenceScore": 0.8 + } + ], + "warnings": [] + }, + { + "redactedText": "Your ABA number - ********* - is the first 9 digits in the lower left hand corner of your personal check.", + "id": "1", + "entities": [ + { + "text": "111000025", + "category": "Phone Number", + "offset": 18, + "length": 9, + "confidenceScore": 0.8 + }, + { + "text": "111000025", + "category": "ABA Routing Number", + "offset": 18, + "length": 9, + "confidenceScore": 0.75 + }, + { + "text": "111000025", + "category": "New Zealand Social Welfare Number", + "offset": 18, + "length": 9, + "confidenceScore": 0.65 + } + ], + "warnings": [] + } + ], + "errors": [], + "modelVersion": "2020-07-01" + } + } + ], + "Variables": { + "RandomSeed": "573069134", + "TEXT_ANALYTICS_API_KEY": "Sanitized", + "TEXT_ANALYTICS_ENDPOINT": "https://cognitiveusw2dev.azure-api.net" + } +} \ No newline at end of file diff --git a/sdk/textanalytics/Azure.AI.TextAnalytics/tests/SessionRecords/RecognizePiiEntitiesTests/RecognizePiiEntitiesBatchConvenienceWithStatisticsTest.json b/sdk/textanalytics/Azure.AI.TextAnalytics/tests/SessionRecords/RecognizePiiEntitiesTests/RecognizePiiEntitiesBatchConvenienceWithStatisticsTest.json new file mode 100644 index 0000000000000..8364e0afffd71 --- /dev/null +++ b/sdk/textanalytics/Azure.AI.TextAnalytics/tests/SessionRecords/RecognizePiiEntitiesTests/RecognizePiiEntitiesBatchConvenienceWithStatisticsTest.json @@ -0,0 +1,123 @@ +{ + "Entries": [ + { + "RequestUri": "https://cognitiveusw2dev.azure-api.net/text/analytics/v3.1-preview.2/entities/recognition/pii?showStats=true\u0026domain=PHI\u0026stringIndexType=Utf16CodeUnit", + "RequestMethod": "POST", + "RequestHeaders": { + "Accept": [ + "application/json", + "text/json" + ], + "Content-Length": "294", + "Content-Type": "application/json", + "Ocp-Apim-Subscription-Key": "Sanitized", + "traceparent": "00-9870f217395e3b4789208acf3d82da73-2912e1e71abadd40-00", + "User-Agent": [ + "azsdk-net-AI.TextAnalytics/5.1.0-dev.20200901.1", + "(.NET Core 4.6.29130.01; Microsoft Windows 10.0.19041 )" + ], + "x-ms-client-request-id": "eecc011d319477b6a3836264a1185b02", + "x-ms-return-client-request-id": "true" + }, + "RequestBody": { + "documents": [ + { + "id": "0", + "text": "A developer with SSN 859-98-0987 whose phone number is 800-102-1100 is building tools with our APIs.", + "language": "en" + }, + { + "id": "1", + "text": "Your ABA number - 111000025 - is the first 9 digits in the lower left hand corner of your personal check.", + "language": "en" + } + ] + }, + "StatusCode": 200, + "ResponseHeaders": { + "apim-request-id": "6f57c9ed-eed7-4461-92a4-4cd79d3dd886", + "Content-Type": "application/json; charset=utf-8", + "csp-billing-usage": "CognitiveServices.TextAnalytics.BatchScoring=2", + "Date": "Wed, 02 Sep 2020 13:56:57 GMT", + "Strict-Transport-Security": "max-age=31536000; includeSubDomains; preload", + "Transfer-Encoding": "chunked", + "X-Content-Type-Options": "nosniff", + "x-envoy-upstream-service-time": "119" + }, + "ResponseBody": { + "statistics": { + "documentsCount": 2, + "validDocumentsCount": 2, + "erroneousDocumentsCount": 0, + "transactionsCount": 2 + }, + "documents": [ + { + "redactedText": "A developer with SSN *********** whose phone number is ************ is building tools with our APIs.", + "id": "0", + "statistics": { + "charactersCount": 100, + "transactionsCount": 1 + }, + "entities": [ + { + "text": "859-98-0987", + "category": "U.S. Social Security Number (SSN)", + "offset": 21, + "length": 11, + "confidenceScore": 0.65 + }, + { + "text": "800-102-1100", + "category": "Phone Number", + "offset": 55, + "length": 12, + "confidenceScore": 0.8 + } + ], + "warnings": [] + }, + { + "redactedText": "Your ABA number - ********* - is the first 9 digits in the lower left hand corner of your personal check.", + "id": "1", + "statistics": { + "charactersCount": 105, + "transactionsCount": 1 + }, + "entities": [ + { + "text": "111000025", + "category": "Phone Number", + "offset": 18, + "length": 9, + "confidenceScore": 0.8 + }, + { + "text": "111000025", + "category": "ABA Routing Number", + "offset": 18, + "length": 9, + "confidenceScore": 0.75 + }, + { + "text": "111000025", + "category": "New Zealand Social Welfare Number", + "offset": 18, + "length": 9, + "confidenceScore": 0.65 + } + ], + "warnings": [] + } + ], + "errors": [], + "modelVersion": "2020-07-01" + } + } + ], + "Variables": { + "RandomSeed": "1097726284", + "TEXT_ANALYTICS_API_KEY": "Sanitized", + "TEXT_ANALYTICS_ENDPOINT": "https://cognitiveusw2dev.azure-api.net" + } +} \ No newline at end of file diff --git a/sdk/textanalytics/Azure.AI.TextAnalytics/tests/SessionRecords/RecognizePiiEntitiesTests/RecognizePiiEntitiesBatchConvenienceWithStatisticsTestAsync.json b/sdk/textanalytics/Azure.AI.TextAnalytics/tests/SessionRecords/RecognizePiiEntitiesTests/RecognizePiiEntitiesBatchConvenienceWithStatisticsTestAsync.json new file mode 100644 index 0000000000000..1581dfe64121b --- /dev/null +++ b/sdk/textanalytics/Azure.AI.TextAnalytics/tests/SessionRecords/RecognizePiiEntitiesTests/RecognizePiiEntitiesBatchConvenienceWithStatisticsTestAsync.json @@ -0,0 +1,123 @@ +{ + "Entries": [ + { + "RequestUri": "https://cognitiveusw2dev.azure-api.net/text/analytics/v3.1-preview.2/entities/recognition/pii?showStats=true\u0026domain=PHI\u0026stringIndexType=Utf16CodeUnit", + "RequestMethod": "POST", + "RequestHeaders": { + "Accept": [ + "application/json", + "text/json" + ], + "Content-Length": "294", + "Content-Type": "application/json", + "Ocp-Apim-Subscription-Key": "Sanitized", + "traceparent": "00-1618855e5a26b84aaa8cca13cd21196e-7a5c6e7c329c8a46-00", + "User-Agent": [ + "azsdk-net-AI.TextAnalytics/5.1.0-dev.20200901.1", + "(.NET Core 4.6.29130.01; Microsoft Windows 10.0.19041 )" + ], + "x-ms-client-request-id": "8ceec0e33c8ae486f712d7bec0815450", + "x-ms-return-client-request-id": "true" + }, + "RequestBody": { + "documents": [ + { + "id": "0", + "text": "A developer with SSN 859-98-0987 whose phone number is 800-102-1100 is building tools with our APIs.", + "language": "en" + }, + { + "id": "1", + "text": "Your ABA number - 111000025 - is the first 9 digits in the lower left hand corner of your personal check.", + "language": "en" + } + ] + }, + "StatusCode": 200, + "ResponseHeaders": { + "apim-request-id": "dc4ed54c-57a2-4b26-9bc2-045989f60234", + "Content-Type": "application/json; charset=utf-8", + "csp-billing-usage": "CognitiveServices.TextAnalytics.BatchScoring=2", + "Date": "Wed, 02 Sep 2020 13:56:59 GMT", + "Strict-Transport-Security": "max-age=31536000; includeSubDomains; preload", + "Transfer-Encoding": "chunked", + "X-Content-Type-Options": "nosniff", + "x-envoy-upstream-service-time": "122" + }, + "ResponseBody": { + "statistics": { + "documentsCount": 2, + "validDocumentsCount": 2, + "erroneousDocumentsCount": 0, + "transactionsCount": 2 + }, + "documents": [ + { + "redactedText": "A developer with SSN *********** whose phone number is ************ is building tools with our APIs.", + "id": "0", + "statistics": { + "charactersCount": 100, + "transactionsCount": 1 + }, + "entities": [ + { + "text": "859-98-0987", + "category": "U.S. Social Security Number (SSN)", + "offset": 21, + "length": 11, + "confidenceScore": 0.65 + }, + { + "text": "800-102-1100", + "category": "Phone Number", + "offset": 55, + "length": 12, + "confidenceScore": 0.8 + } + ], + "warnings": [] + }, + { + "redactedText": "Your ABA number - ********* - is the first 9 digits in the lower left hand corner of your personal check.", + "id": "1", + "statistics": { + "charactersCount": 105, + "transactionsCount": 1 + }, + "entities": [ + { + "text": "111000025", + "category": "Phone Number", + "offset": 18, + "length": 9, + "confidenceScore": 0.8 + }, + { + "text": "111000025", + "category": "ABA Routing Number", + "offset": 18, + "length": 9, + "confidenceScore": 0.75 + }, + { + "text": "111000025", + "category": "New Zealand Social Welfare Number", + "offset": 18, + "length": 9, + "confidenceScore": 0.65 + } + ], + "warnings": [] + } + ], + "errors": [], + "modelVersion": "2020-07-01" + } + } + ], + "Variables": { + "RandomSeed": "822339012", + "TEXT_ANALYTICS_API_KEY": "Sanitized", + "TEXT_ANALYTICS_ENDPOINT": "https://cognitiveusw2dev.azure-api.net" + } +} \ No newline at end of file diff --git a/sdk/textanalytics/Azure.AI.TextAnalytics/tests/SessionRecords/RecognizePiiEntitiesTests/RecognizePiiEntitiesBatchTest.json b/sdk/textanalytics/Azure.AI.TextAnalytics/tests/SessionRecords/RecognizePiiEntitiesTests/RecognizePiiEntitiesBatchTest.json new file mode 100644 index 0000000000000..ccb27c01b2202 --- /dev/null +++ b/sdk/textanalytics/Azure.AI.TextAnalytics/tests/SessionRecords/RecognizePiiEntitiesTests/RecognizePiiEntitiesBatchTest.json @@ -0,0 +1,109 @@ +{ + "Entries": [ + { + "RequestUri": "https://cognitiveusw2dev.azure-api.net/text/analytics/v3.1-preview.2/entities/recognition/pii?showStats=false\u0026domain=PHI\u0026stringIndexType=Utf16CodeUnit", + "RequestMethod": "POST", + "RequestHeaders": { + "Accept": [ + "application/json", + "text/json" + ], + "Content-Length": "294", + "Content-Type": "application/json", + "Ocp-Apim-Subscription-Key": "Sanitized", + "traceparent": "00-e2da076e49ff444aa40451440bc50d8e-28b6cc8498794b49-00", + "User-Agent": [ + "azsdk-net-AI.TextAnalytics/5.1.0-dev.20200901.1", + "(.NET Core 4.6.29130.01; Microsoft Windows 10.0.19041 )" + ], + "x-ms-client-request-id": "f53bbc38a2b6814e4c65b541758c5e0a", + "x-ms-return-client-request-id": "true" + }, + "RequestBody": { + "documents": [ + { + "id": "1", + "text": "A developer with SSN 859-98-0987 whose phone number is 800-102-1100 is building tools with our APIs.", + "language": "en" + }, + { + "id": "2", + "text": "Your ABA number - 111000025 - is the first 9 digits in the lower left hand corner of your personal check.", + "language": "en" + } + ] + }, + "StatusCode": 200, + "ResponseHeaders": { + "apim-request-id": "a6e395cb-0ae2-45f7-9a00-df1b3987bc0b", + "Content-Type": "application/json; charset=utf-8", + "csp-billing-usage": "CognitiveServices.TextAnalytics.BatchScoring=2", + "Date": "Wed, 02 Sep 2020 13:56:57 GMT", + "Strict-Transport-Security": "max-age=31536000; includeSubDomains; preload", + "Transfer-Encoding": "chunked", + "X-Content-Type-Options": "nosniff", + "x-envoy-upstream-service-time": "117" + }, + "ResponseBody": { + "documents": [ + { + "redactedText": "A developer with SSN *********** whose phone number is ************ is building tools with our APIs.", + "id": "1", + "entities": [ + { + "text": "859-98-0987", + "category": "U.S. Social Security Number (SSN)", + "offset": 21, + "length": 11, + "confidenceScore": 0.65 + }, + { + "text": "800-102-1100", + "category": "Phone Number", + "offset": 55, + "length": 12, + "confidenceScore": 0.8 + } + ], + "warnings": [] + }, + { + "redactedText": "Your ABA number - ********* - is the first 9 digits in the lower left hand corner of your personal check.", + "id": "2", + "entities": [ + { + "text": "111000025", + "category": "Phone Number", + "offset": 18, + "length": 9, + "confidenceScore": 0.8 + }, + { + "text": "111000025", + "category": "ABA Routing Number", + "offset": 18, + "length": 9, + "confidenceScore": 0.75 + }, + { + "text": "111000025", + "category": "New Zealand Social Welfare Number", + "offset": 18, + "length": 9, + "confidenceScore": 0.65 + } + ], + "warnings": [] + } + ], + "errors": [], + "modelVersion": "2020-07-01" + } + } + ], + "Variables": { + "RandomSeed": "169579662", + "TEXT_ANALYTICS_API_KEY": "Sanitized", + "TEXT_ANALYTICS_ENDPOINT": "https://cognitiveusw2dev.azure-api.net" + } +} \ No newline at end of file diff --git a/sdk/textanalytics/Azure.AI.TextAnalytics/tests/SessionRecords/RecognizePiiEntitiesTests/RecognizePiiEntitiesBatchTestAsync.json b/sdk/textanalytics/Azure.AI.TextAnalytics/tests/SessionRecords/RecognizePiiEntitiesTests/RecognizePiiEntitiesBatchTestAsync.json new file mode 100644 index 0000000000000..0ee1c19b1853f --- /dev/null +++ b/sdk/textanalytics/Azure.AI.TextAnalytics/tests/SessionRecords/RecognizePiiEntitiesTests/RecognizePiiEntitiesBatchTestAsync.json @@ -0,0 +1,109 @@ +{ + "Entries": [ + { + "RequestUri": "https://cognitiveusw2dev.azure-api.net/text/analytics/v3.1-preview.2/entities/recognition/pii?showStats=false\u0026domain=PHI\u0026stringIndexType=Utf16CodeUnit", + "RequestMethod": "POST", + "RequestHeaders": { + "Accept": [ + "application/json", + "text/json" + ], + "Content-Length": "294", + "Content-Type": "application/json", + "Ocp-Apim-Subscription-Key": "Sanitized", + "traceparent": "00-b4271ff58300ab45b3f030c3874ed32a-7b05e4d64050dd41-00", + "User-Agent": [ + "azsdk-net-AI.TextAnalytics/5.1.0-dev.20200901.1", + "(.NET Core 4.6.29130.01; Microsoft Windows 10.0.19041 )" + ], + "x-ms-client-request-id": "79f5f1883ef14a0947e802df013a4e11", + "x-ms-return-client-request-id": "true" + }, + "RequestBody": { + "documents": [ + { + "id": "1", + "text": "A developer with SSN 859-98-0987 whose phone number is 800-102-1100 is building tools with our APIs.", + "language": "en" + }, + { + "id": "2", + "text": "Your ABA number - 111000025 - is the first 9 digits in the lower left hand corner of your personal check.", + "language": "en" + } + ] + }, + "StatusCode": 200, + "ResponseHeaders": { + "apim-request-id": "feafa17d-a25a-4f19-a940-66080787479f", + "Content-Type": "application/json; charset=utf-8", + "csp-billing-usage": "CognitiveServices.TextAnalytics.BatchScoring=2", + "Date": "Wed, 02 Sep 2020 13:57:00 GMT", + "Strict-Transport-Security": "max-age=31536000; includeSubDomains; preload", + "Transfer-Encoding": "chunked", + "X-Content-Type-Options": "nosniff", + "x-envoy-upstream-service-time": "114" + }, + "ResponseBody": { + "documents": [ + { + "redactedText": "A developer with SSN *********** whose phone number is ************ is building tools with our APIs.", + "id": "1", + "entities": [ + { + "text": "859-98-0987", + "category": "U.S. Social Security Number (SSN)", + "offset": 21, + "length": 11, + "confidenceScore": 0.65 + }, + { + "text": "800-102-1100", + "category": "Phone Number", + "offset": 55, + "length": 12, + "confidenceScore": 0.8 + } + ], + "warnings": [] + }, + { + "redactedText": "Your ABA number - ********* - is the first 9 digits in the lower left hand corner of your personal check.", + "id": "2", + "entities": [ + { + "text": "111000025", + "category": "Phone Number", + "offset": 18, + "length": 9, + "confidenceScore": 0.8 + }, + { + "text": "111000025", + "category": "ABA Routing Number", + "offset": 18, + "length": 9, + "confidenceScore": 0.75 + }, + { + "text": "111000025", + "category": "New Zealand Social Welfare Number", + "offset": 18, + "length": 9, + "confidenceScore": 0.65 + } + ], + "warnings": [] + } + ], + "errors": [], + "modelVersion": "2020-07-01" + } + } + ], + "Variables": { + "RandomSeed": "1167133842", + "TEXT_ANALYTICS_API_KEY": "Sanitized", + "TEXT_ANALYTICS_ENDPOINT": "https://cognitiveusw2dev.azure-api.net" + } +} \ No newline at end of file diff --git a/sdk/textanalytics/Azure.AI.TextAnalytics/tests/SessionRecords/RecognizePiiEntitiesTests/RecognizePiiEntitiesBatchWithErrorTest.json b/sdk/textanalytics/Azure.AI.TextAnalytics/tests/SessionRecords/RecognizePiiEntitiesTests/RecognizePiiEntitiesBatchWithErrorTest.json new file mode 100644 index 0000000000000..91c0ae9017ae4 --- /dev/null +++ b/sdk/textanalytics/Azure.AI.TextAnalytics/tests/SessionRecords/RecognizePiiEntitiesTests/RecognizePiiEntitiesBatchWithErrorTest.json @@ -0,0 +1,126 @@ +{ + "Entries": [ + { + "RequestUri": "https://cognitiveusw2dev.azure-api.net/text/analytics/v3.1-preview.2/entities/recognition/pii?showStats=false\u0026domain=PHI\u0026stringIndexType=Utf16CodeUnit", + "RequestMethod": "POST", + "RequestHeaders": { + "Accept": [ + "application/json", + "text/json" + ], + "Content-Length": "331", + "Content-Type": "application/json", + "Ocp-Apim-Subscription-Key": "Sanitized", + "traceparent": "00-9537551555e5e24d806dc744d2c6af86-e8cc50b2c39df04b-00", + "User-Agent": [ + "azsdk-net-AI.TextAnalytics/5.1.0-dev.20200901.1", + "(.NET Core 4.6.29130.01; Microsoft Windows 10.0.19041 )" + ], + "x-ms-client-request-id": "e382b2e96bf3675f4d7d7960b8b32916", + "x-ms-return-client-request-id": "true" + }, + "RequestBody": { + "documents": [ + { + "id": "0", + "text": "A developer with SSN 859-98-0987 whose phone number is 800-102-1100 is building tools with our APIs.", + "language": "en" + }, + { + "id": "1", + "text": "", + "language": "en" + }, + { + "id": "2", + "text": "Your ABA number - 111000025 - is the first 9 digits in the lower left hand corner of your personal check.", + "language": "en" + } + ] + }, + "StatusCode": 200, + "ResponseHeaders": { + "apim-request-id": "c9a17d80-f7fb-4ea7-b5b1-ca691854fb87", + "Content-Type": "application/json; charset=utf-8", + "csp-billing-usage": "CognitiveServices.TextAnalytics.BatchScoring=2", + "Date": "Wed, 02 Sep 2020 13:56:57 GMT", + "Strict-Transport-Security": "max-age=31536000; includeSubDomains; preload", + "Transfer-Encoding": "chunked", + "X-Content-Type-Options": "nosniff", + "x-envoy-upstream-service-time": "110" + }, + "ResponseBody": { + "documents": [ + { + "redactedText": "A developer with SSN *********** whose phone number is ************ is building tools with our APIs.", + "id": "0", + "entities": [ + { + "text": "859-98-0987", + "category": "U.S. Social Security Number (SSN)", + "offset": 21, + "length": 11, + "confidenceScore": 0.65 + }, + { + "text": "800-102-1100", + "category": "Phone Number", + "offset": 55, + "length": 12, + "confidenceScore": 0.8 + } + ], + "warnings": [] + }, + { + "redactedText": "Your ABA number - ********* - is the first 9 digits in the lower left hand corner of your personal check.", + "id": "2", + "entities": [ + { + "text": "111000025", + "category": "Phone Number", + "offset": 18, + "length": 9, + "confidenceScore": 0.8 + }, + { + "text": "111000025", + "category": "ABA Routing Number", + "offset": 18, + "length": 9, + "confidenceScore": 0.75 + }, + { + "text": "111000025", + "category": "New Zealand Social Welfare Number", + "offset": 18, + "length": 9, + "confidenceScore": 0.65 + } + ], + "warnings": [] + } + ], + "errors": [ + { + "id": "1", + "error": { + "code": "InvalidArgument", + "message": "Invalid document in request.", + "innererror": { + "code": "InvalidDocument", + "message": "Document text is empty." + } + } + } + ], + "modelVersion": "2020-07-01" + } + } + ], + "Variables": { + "RandomSeed": "239145501", + "TEXT_ANALYTICS_API_KEY": "Sanitized", + "TEXT_ANALYTICS_ENDPOINT": "https://cognitiveusw2dev.azure-api.net" + } +} \ No newline at end of file diff --git a/sdk/textanalytics/Azure.AI.TextAnalytics/tests/SessionRecords/RecognizePiiEntitiesTests/RecognizePiiEntitiesBatchWithErrorTestAsync.json b/sdk/textanalytics/Azure.AI.TextAnalytics/tests/SessionRecords/RecognizePiiEntitiesTests/RecognizePiiEntitiesBatchWithErrorTestAsync.json new file mode 100644 index 0000000000000..f661decb7755e --- /dev/null +++ b/sdk/textanalytics/Azure.AI.TextAnalytics/tests/SessionRecords/RecognizePiiEntitiesTests/RecognizePiiEntitiesBatchWithErrorTestAsync.json @@ -0,0 +1,126 @@ +{ + "Entries": [ + { + "RequestUri": "https://cognitiveusw2dev.azure-api.net/text/analytics/v3.1-preview.2/entities/recognition/pii?showStats=false\u0026domain=PHI\u0026stringIndexType=Utf16CodeUnit", + "RequestMethod": "POST", + "RequestHeaders": { + "Accept": [ + "application/json", + "text/json" + ], + "Content-Length": "331", + "Content-Type": "application/json", + "Ocp-Apim-Subscription-Key": "Sanitized", + "traceparent": "00-8e01688edcbefc49bf7cf9349e02cf04-2d56e9fab3eac248-00", + "User-Agent": [ + "azsdk-net-AI.TextAnalytics/5.1.0-dev.20200901.1", + "(.NET Core 4.6.29130.01; Microsoft Windows 10.0.19041 )" + ], + "x-ms-client-request-id": "e218864f9605e01c86941869a0be6841", + "x-ms-return-client-request-id": "true" + }, + "RequestBody": { + "documents": [ + { + "id": "0", + "text": "A developer with SSN 859-98-0987 whose phone number is 800-102-1100 is building tools with our APIs.", + "language": "en" + }, + { + "id": "1", + "text": "", + "language": "en" + }, + { + "id": "2", + "text": "Your ABA number - 111000025 - is the first 9 digits in the lower left hand corner of your personal check.", + "language": "en" + } + ] + }, + "StatusCode": 200, + "ResponseHeaders": { + "apim-request-id": "dd6a7f8f-d0aa-4eb3-93ab-03fa4dc9e14f", + "Content-Type": "application/json; charset=utf-8", + "csp-billing-usage": "CognitiveServices.TextAnalytics.BatchScoring=2", + "Date": "Wed, 02 Sep 2020 13:57:00 GMT", + "Strict-Transport-Security": "max-age=31536000; includeSubDomains; preload", + "Transfer-Encoding": "chunked", + "X-Content-Type-Options": "nosniff", + "x-envoy-upstream-service-time": "123" + }, + "ResponseBody": { + "documents": [ + { + "redactedText": "A developer with SSN *********** whose phone number is ************ is building tools with our APIs.", + "id": "0", + "entities": [ + { + "text": "859-98-0987", + "category": "U.S. Social Security Number (SSN)", + "offset": 21, + "length": 11, + "confidenceScore": 0.65 + }, + { + "text": "800-102-1100", + "category": "Phone Number", + "offset": 55, + "length": 12, + "confidenceScore": 0.8 + } + ], + "warnings": [] + }, + { + "redactedText": "Your ABA number - ********* - is the first 9 digits in the lower left hand corner of your personal check.", + "id": "2", + "entities": [ + { + "text": "111000025", + "category": "Phone Number", + "offset": 18, + "length": 9, + "confidenceScore": 0.8 + }, + { + "text": "111000025", + "category": "ABA Routing Number", + "offset": 18, + "length": 9, + "confidenceScore": 0.75 + }, + { + "text": "111000025", + "category": "New Zealand Social Welfare Number", + "offset": 18, + "length": 9, + "confidenceScore": 0.65 + } + ], + "warnings": [] + } + ], + "errors": [ + { + "id": "1", + "error": { + "code": "InvalidArgument", + "message": "Invalid document in request.", + "innererror": { + "code": "InvalidDocument", + "message": "Document text is empty." + } + } + } + ], + "modelVersion": "2020-07-01" + } + } + ], + "Variables": { + "RandomSeed": "1607409987", + "TEXT_ANALYTICS_API_KEY": "Sanitized", + "TEXT_ANALYTICS_ENDPOINT": "https://cognitiveusw2dev.azure-api.net" + } +} \ No newline at end of file diff --git a/sdk/textanalytics/Azure.AI.TextAnalytics/tests/SessionRecords/RecognizePiiEntitiesTests/RecognizePiiEntitiesBatchWithStatisticsTest.json b/sdk/textanalytics/Azure.AI.TextAnalytics/tests/SessionRecords/RecognizePiiEntitiesTests/RecognizePiiEntitiesBatchWithStatisticsTest.json new file mode 100644 index 0000000000000..a0c68b073caab --- /dev/null +++ b/sdk/textanalytics/Azure.AI.TextAnalytics/tests/SessionRecords/RecognizePiiEntitiesTests/RecognizePiiEntitiesBatchWithStatisticsTest.json @@ -0,0 +1,123 @@ +{ + "Entries": [ + { + "RequestUri": "https://cognitiveusw2dev.azure-api.net/text/analytics/v3.1-preview.2/entities/recognition/pii?showStats=true\u0026domain=PHI\u0026stringIndexType=Utf16CodeUnit", + "RequestMethod": "POST", + "RequestHeaders": { + "Accept": [ + "application/json", + "text/json" + ], + "Content-Length": "294", + "Content-Type": "application/json", + "Ocp-Apim-Subscription-Key": "Sanitized", + "traceparent": "00-77a42fa1eecaf9418f91c9ce4a3c46de-c1919d2b2228ea48-00", + "User-Agent": [ + "azsdk-net-AI.TextAnalytics/5.1.0-dev.20200901.1", + "(.NET Core 4.6.29130.01; Microsoft Windows 10.0.19041 )" + ], + "x-ms-client-request-id": "9d7c7445e676a32cbc4854ae746d53b1", + "x-ms-return-client-request-id": "true" + }, + "RequestBody": { + "documents": [ + { + "id": "1", + "text": "A developer with SSN 859-98-0987 whose phone number is 800-102-1100 is building tools with our APIs.", + "language": "en" + }, + { + "id": "2", + "text": "Your ABA number - 111000025 - is the first 9 digits in the lower left hand corner of your personal check.", + "language": "en" + } + ] + }, + "StatusCode": 200, + "ResponseHeaders": { + "apim-request-id": "cdd1d83d-75ef-4392-a6c1-e89492fb5a51", + "Content-Type": "application/json; charset=utf-8", + "csp-billing-usage": "CognitiveServices.TextAnalytics.BatchScoring=2", + "Date": "Wed, 02 Sep 2020 13:56:59 GMT", + "Strict-Transport-Security": "max-age=31536000; includeSubDomains; preload", + "Transfer-Encoding": "chunked", + "X-Content-Type-Options": "nosniff", + "x-envoy-upstream-service-time": "112" + }, + "ResponseBody": { + "statistics": { + "documentsCount": 2, + "validDocumentsCount": 2, + "erroneousDocumentsCount": 0, + "transactionsCount": 2 + }, + "documents": [ + { + "redactedText": "A developer with SSN *********** whose phone number is ************ is building tools with our APIs.", + "id": "1", + "statistics": { + "charactersCount": 100, + "transactionsCount": 1 + }, + "entities": [ + { + "text": "859-98-0987", + "category": "U.S. Social Security Number (SSN)", + "offset": 21, + "length": 11, + "confidenceScore": 0.65 + }, + { + "text": "800-102-1100", + "category": "Phone Number", + "offset": 55, + "length": 12, + "confidenceScore": 0.8 + } + ], + "warnings": [] + }, + { + "redactedText": "Your ABA number - ********* - is the first 9 digits in the lower left hand corner of your personal check.", + "id": "2", + "statistics": { + "charactersCount": 105, + "transactionsCount": 1 + }, + "entities": [ + { + "text": "111000025", + "category": "Phone Number", + "offset": 18, + "length": 9, + "confidenceScore": 0.8 + }, + { + "text": "111000025", + "category": "ABA Routing Number", + "offset": 18, + "length": 9, + "confidenceScore": 0.75 + }, + { + "text": "111000025", + "category": "New Zealand Social Welfare Number", + "offset": 18, + "length": 9, + "confidenceScore": 0.65 + } + ], + "warnings": [] + } + ], + "errors": [], + "modelVersion": "2020-07-01" + } + } + ], + "Variables": { + "RandomSeed": "448790314", + "TEXT_ANALYTICS_API_KEY": "Sanitized", + "TEXT_ANALYTICS_ENDPOINT": "https://cognitiveusw2dev.azure-api.net" + } +} \ No newline at end of file diff --git a/sdk/textanalytics/Azure.AI.TextAnalytics/tests/SessionRecords/RecognizePiiEntitiesTests/RecognizePiiEntitiesBatchWithStatisticsTestAsync.json b/sdk/textanalytics/Azure.AI.TextAnalytics/tests/SessionRecords/RecognizePiiEntitiesTests/RecognizePiiEntitiesBatchWithStatisticsTestAsync.json new file mode 100644 index 0000000000000..671baa27b8969 --- /dev/null +++ b/sdk/textanalytics/Azure.AI.TextAnalytics/tests/SessionRecords/RecognizePiiEntitiesTests/RecognizePiiEntitiesBatchWithStatisticsTestAsync.json @@ -0,0 +1,123 @@ +{ + "Entries": [ + { + "RequestUri": "https://cognitiveusw2dev.azure-api.net/text/analytics/v3.1-preview.2/entities/recognition/pii?showStats=true\u0026domain=PHI\u0026stringIndexType=Utf16CodeUnit", + "RequestMethod": "POST", + "RequestHeaders": { + "Accept": [ + "application/json", + "text/json" + ], + "Content-Length": "294", + "Content-Type": "application/json", + "Ocp-Apim-Subscription-Key": "Sanitized", + "traceparent": "00-ffdb9aab24fa6a43b96f84cc1abfe8ca-cff5da144bf49d4c-00", + "User-Agent": [ + "azsdk-net-AI.TextAnalytics/5.1.0-dev.20200901.1", + "(.NET Core 4.6.29130.01; Microsoft Windows 10.0.19041 )" + ], + "x-ms-client-request-id": "6372c6fc37bbef8068bba6a5d3c4483a", + "x-ms-return-client-request-id": "true" + }, + "RequestBody": { + "documents": [ + { + "id": "1", + "text": "A developer with SSN 859-98-0987 whose phone number is 800-102-1100 is building tools with our APIs.", + "language": "en" + }, + { + "id": "2", + "text": "Your ABA number - 111000025 - is the first 9 digits in the lower left hand corner of your personal check.", + "language": "en" + } + ] + }, + "StatusCode": 200, + "ResponseHeaders": { + "apim-request-id": "9801fd03-7d6d-4253-8607-e7cdfaa3c431", + "Content-Type": "application/json; charset=utf-8", + "csp-billing-usage": "CognitiveServices.TextAnalytics.BatchScoring=2", + "Date": "Wed, 02 Sep 2020 13:57:00 GMT", + "Strict-Transport-Security": "max-age=31536000; includeSubDomains; preload", + "Transfer-Encoding": "chunked", + "X-Content-Type-Options": "nosniff", + "x-envoy-upstream-service-time": "125" + }, + "ResponseBody": { + "statistics": { + "documentsCount": 2, + "validDocumentsCount": 2, + "erroneousDocumentsCount": 0, + "transactionsCount": 2 + }, + "documents": [ + { + "redactedText": "A developer with SSN *********** whose phone number is ************ is building tools with our APIs.", + "id": "1", + "statistics": { + "charactersCount": 100, + "transactionsCount": 1 + }, + "entities": [ + { + "text": "859-98-0987", + "category": "U.S. Social Security Number (SSN)", + "offset": 21, + "length": 11, + "confidenceScore": 0.65 + }, + { + "text": "800-102-1100", + "category": "Phone Number", + "offset": 55, + "length": 12, + "confidenceScore": 0.8 + } + ], + "warnings": [] + }, + { + "redactedText": "Your ABA number - ********* - is the first 9 digits in the lower left hand corner of your personal check.", + "id": "2", + "statistics": { + "charactersCount": 105, + "transactionsCount": 1 + }, + "entities": [ + { + "text": "111000025", + "category": "Phone Number", + "offset": 18, + "length": 9, + "confidenceScore": 0.8 + }, + { + "text": "111000025", + "category": "ABA Routing Number", + "offset": 18, + "length": 9, + "confidenceScore": 0.75 + }, + { + "text": "111000025", + "category": "New Zealand Social Welfare Number", + "offset": 18, + "length": 9, + "confidenceScore": 0.65 + } + ], + "warnings": [] + } + ], + "errors": [], + "modelVersion": "2020-07-01" + } + } + ], + "Variables": { + "RandomSeed": "747198553", + "TEXT_ANALYTICS_API_KEY": "Sanitized", + "TEXT_ANALYTICS_ENDPOINT": "https://cognitiveusw2dev.azure-api.net" + } +} \ No newline at end of file diff --git a/sdk/textanalytics/Azure.AI.TextAnalytics/tests/SessionRecords/RecognizePiiEntitiesTests/RecognizePiiEntitiesTest.json b/sdk/textanalytics/Azure.AI.TextAnalytics/tests/SessionRecords/RecognizePiiEntitiesTests/RecognizePiiEntitiesTest.json new file mode 100644 index 0000000000000..5d4b7dbc326ec --- /dev/null +++ b/sdk/textanalytics/Azure.AI.TextAnalytics/tests/SessionRecords/RecognizePiiEntitiesTests/RecognizePiiEntitiesTest.json @@ -0,0 +1,76 @@ +{ + "Entries": [ + { + "RequestUri": "https://cognitiveusw2dev.azure-api.net/text/analytics/v3.1-preview.2/entities/recognition/pii?showStats=false\u0026domain=PHI\u0026stringIndexType=Utf16CodeUnit", + "RequestMethod": "POST", + "RequestHeaders": { + "Accept": [ + "application/json", + "text/json" + ], + "Content-Length": "152", + "Content-Type": "application/json", + "Ocp-Apim-Subscription-Key": "Sanitized", + "traceparent": "00-8bb75dbfe2b57a49a0d25e7ccca5510b-e2024cd6abf84246-00", + "User-Agent": [ + "azsdk-net-AI.TextAnalytics/5.1.0-dev.20200901.1", + "(.NET Core 4.6.29130.01; Microsoft Windows 10.0.19041 )" + ], + "x-ms-client-request-id": "40f3d388777c07f3a5efa0af174b7c81", + "x-ms-return-client-request-id": "true" + }, + "RequestBody": { + "documents": [ + { + "id": "0", + "text": "A developer with SSN 859-98-0987 whose phone number is 800-102-1100 is building tools with our APIs.", + "language": "en" + } + ] + }, + "StatusCode": 200, + "ResponseHeaders": { + "apim-request-id": "00b06f2f-3bd4-49da-a851-d4aa2b82398b", + "Content-Type": "application/json; charset=utf-8", + "csp-billing-usage": "CognitiveServices.TextAnalytics.BatchScoring=1", + "Date": "Wed, 02 Sep 2020 13:56:59 GMT", + "Strict-Transport-Security": "max-age=31536000; includeSubDomains; preload", + "Transfer-Encoding": "chunked", + "X-Content-Type-Options": "nosniff", + "x-envoy-upstream-service-time": "102" + }, + "ResponseBody": { + "documents": [ + { + "redactedText": "A developer with SSN *********** whose phone number is ************ is building tools with our APIs.", + "id": "0", + "entities": [ + { + "text": "859-98-0987", + "category": "U.S. Social Security Number (SSN)", + "offset": 21, + "length": 11, + "confidenceScore": 0.65 + }, + { + "text": "800-102-1100", + "category": "Phone Number", + "offset": 55, + "length": 12, + "confidenceScore": 0.8 + } + ], + "warnings": [] + } + ], + "errors": [], + "modelVersion": "2020-07-01" + } + } + ], + "Variables": { + "RandomSeed": "1989383672", + "TEXT_ANALYTICS_API_KEY": "Sanitized", + "TEXT_ANALYTICS_ENDPOINT": "https://cognitiveusw2dev.azure-api.net" + } +} \ No newline at end of file diff --git a/sdk/textanalytics/Azure.AI.TextAnalytics/tests/SessionRecords/RecognizePiiEntitiesTests/RecognizePiiEntitiesTestAsync.json b/sdk/textanalytics/Azure.AI.TextAnalytics/tests/SessionRecords/RecognizePiiEntitiesTests/RecognizePiiEntitiesTestAsync.json new file mode 100644 index 0000000000000..ab664b1441598 --- /dev/null +++ b/sdk/textanalytics/Azure.AI.TextAnalytics/tests/SessionRecords/RecognizePiiEntitiesTests/RecognizePiiEntitiesTestAsync.json @@ -0,0 +1,76 @@ +{ + "Entries": [ + { + "RequestUri": "https://cognitiveusw2dev.azure-api.net/text/analytics/v3.1-preview.2/entities/recognition/pii?showStats=false\u0026domain=PHI\u0026stringIndexType=Utf16CodeUnit", + "RequestMethod": "POST", + "RequestHeaders": { + "Accept": [ + "application/json", + "text/json" + ], + "Content-Length": "152", + "Content-Type": "application/json", + "Ocp-Apim-Subscription-Key": "Sanitized", + "traceparent": "00-dc9a93dbbe6fc84189aa65e4831a091d-e14b08c834a59943-00", + "User-Agent": [ + "azsdk-net-AI.TextAnalytics/5.1.0-dev.20200901.1", + "(.NET Core 4.6.29130.01; Microsoft Windows 10.0.19041 )" + ], + "x-ms-client-request-id": "b40e3ff9f3697ab649fbbfa57575c10c", + "x-ms-return-client-request-id": "true" + }, + "RequestBody": { + "documents": [ + { + "id": "0", + "text": "A developer with SSN 859-98-0987 whose phone number is 800-102-1100 is building tools with our APIs.", + "language": "en" + } + ] + }, + "StatusCode": 200, + "ResponseHeaders": { + "apim-request-id": "ea150190-5744-4f54-ae5f-daf1a4e36c93", + "Content-Type": "application/json; charset=utf-8", + "csp-billing-usage": "CognitiveServices.TextAnalytics.BatchScoring=1", + "Date": "Wed, 02 Sep 2020 13:57:00 GMT", + "Strict-Transport-Security": "max-age=31536000; includeSubDomains; preload", + "Transfer-Encoding": "chunked", + "X-Content-Type-Options": "nosniff", + "x-envoy-upstream-service-time": "105" + }, + "ResponseBody": { + "documents": [ + { + "redactedText": "A developer with SSN *********** whose phone number is ************ is building tools with our APIs.", + "id": "0", + "entities": [ + { + "text": "859-98-0987", + "category": "U.S. Social Security Number (SSN)", + "offset": 21, + "length": 11, + "confidenceScore": 0.65 + }, + { + "text": "800-102-1100", + "category": "Phone Number", + "offset": 55, + "length": 12, + "confidenceScore": 0.8 + } + ], + "warnings": [] + } + ], + "errors": [], + "modelVersion": "2020-07-01" + } + } + ], + "Variables": { + "RandomSeed": "1876268396", + "TEXT_ANALYTICS_API_KEY": "Sanitized", + "TEXT_ANALYTICS_ENDPOINT": "https://cognitiveusw2dev.azure-api.net" + } +} \ No newline at end of file diff --git a/sdk/textanalytics/Azure.AI.TextAnalytics/tests/SessionRecords/RecognizePiiEntitiesTests/RecognizePiiEntitiesWithDomainTest.json b/sdk/textanalytics/Azure.AI.TextAnalytics/tests/SessionRecords/RecognizePiiEntitiesTests/RecognizePiiEntitiesWithDomainTest.json new file mode 100644 index 0000000000000..3ec3ee78a6ebf --- /dev/null +++ b/sdk/textanalytics/Azure.AI.TextAnalytics/tests/SessionRecords/RecognizePiiEntitiesTests/RecognizePiiEntitiesWithDomainTest.json @@ -0,0 +1,69 @@ +{ + "Entries": [ + { + "RequestUri": "https://cognitiveusw2dev.azure-api.net/text/analytics/v3.1-preview.2/entities/recognition/pii?showStats=false\u0026domain=PHI\u0026stringIndexType=Utf16CodeUnit", + "RequestMethod": "POST", + "RequestHeaders": { + "Accept": [ + "application/json", + "text/json" + ], + "Content-Length": "107", + "Content-Type": "application/json", + "Ocp-Apim-Subscription-Key": "Sanitized", + "traceparent": "00-9d30de129face845bb06f29d5d8dd81a-f59cc3eac2cf2f47-00", + "User-Agent": [ + "azsdk-net-AI.TextAnalytics/5.1.0-dev.20200901.1", + "(.NET Core 4.6.29130.01; Microsoft Windows 10.0.19041 )" + ], + "x-ms-client-request-id": "5757e3544f245670b8a4c82d4b10bc4c", + "x-ms-return-client-request-id": "true" + }, + "RequestBody": { + "documents": [ + { + "id": "0", + "text": "I work at Microsoft and my email is atest@microsoft.com", + "language": "en" + } + ] + }, + "StatusCode": 200, + "ResponseHeaders": { + "apim-request-id": "7272ad2e-b969-4b01-8056-a0f91199ae4c", + "Content-Type": "application/json; charset=utf-8", + "csp-billing-usage": "CognitiveServices.TextAnalytics.BatchScoring=1", + "Date": "Wed, 02 Sep 2020 13:56:59 GMT", + "Strict-Transport-Security": "max-age=31536000; includeSubDomains; preload", + "Transfer-Encoding": "chunked", + "X-Content-Type-Options": "nosniff", + "x-envoy-upstream-service-time": "104" + }, + "ResponseBody": { + "documents": [ + { + "redactedText": "I work at Microsoft and my email is *******************", + "id": "0", + "entities": [ + { + "text": "atest@microsoft.com", + "category": "Email", + "offset": 36, + "length": 19, + "confidenceScore": 0.8 + } + ], + "warnings": [] + } + ], + "errors": [], + "modelVersion": "2020-07-01" + } + } + ], + "Variables": { + "RandomSeed": "114454170", + "TEXT_ANALYTICS_API_KEY": "Sanitized", + "TEXT_ANALYTICS_ENDPOINT": "https://cognitiveusw2dev.azure-api.net" + } +} \ No newline at end of file diff --git a/sdk/textanalytics/Azure.AI.TextAnalytics/tests/SessionRecords/RecognizePiiEntitiesTests/RecognizePiiEntitiesWithDomainTestAsync.json b/sdk/textanalytics/Azure.AI.TextAnalytics/tests/SessionRecords/RecognizePiiEntitiesTests/RecognizePiiEntitiesWithDomainTestAsync.json new file mode 100644 index 0000000000000..a09ed11a82be7 --- /dev/null +++ b/sdk/textanalytics/Azure.AI.TextAnalytics/tests/SessionRecords/RecognizePiiEntitiesTests/RecognizePiiEntitiesWithDomainTestAsync.json @@ -0,0 +1,69 @@ +{ + "Entries": [ + { + "RequestUri": "https://cognitiveusw2dev.azure-api.net/text/analytics/v3.1-preview.2/entities/recognition/pii?showStats=false\u0026domain=PHI\u0026stringIndexType=Utf16CodeUnit", + "RequestMethod": "POST", + "RequestHeaders": { + "Accept": [ + "application/json", + "text/json" + ], + "Content-Length": "107", + "Content-Type": "application/json", + "Ocp-Apim-Subscription-Key": "Sanitized", + "traceparent": "00-62434d07c653554587e5ccd85ea2fc59-29df2b02ae9a1c4c-00", + "User-Agent": [ + "azsdk-net-AI.TextAnalytics/5.1.0-dev.20200901.1", + "(.NET Core 4.6.29130.01; Microsoft Windows 10.0.19041 )" + ], + "x-ms-client-request-id": "4a37fade57ea6b5a5ff8ab22891b6b20", + "x-ms-return-client-request-id": "true" + }, + "RequestBody": { + "documents": [ + { + "id": "0", + "text": "I work at Microsoft and my email is atest@microsoft.com", + "language": "en" + } + ] + }, + "StatusCode": 200, + "ResponseHeaders": { + "apim-request-id": "8ada8951-8f27-48ed-a7c9-9acbbcb043fd", + "Content-Type": "application/json; charset=utf-8", + "csp-billing-usage": "CognitiveServices.TextAnalytics.BatchScoring=1", + "Date": "Wed, 02 Sep 2020 13:57:00 GMT", + "Strict-Transport-Security": "max-age=31536000; includeSubDomains; preload", + "Transfer-Encoding": "chunked", + "X-Content-Type-Options": "nosniff", + "x-envoy-upstream-service-time": "91" + }, + "ResponseBody": { + "documents": [ + { + "redactedText": "I work at Microsoft and my email is *******************", + "id": "0", + "entities": [ + { + "text": "atest@microsoft.com", + "category": "Email", + "offset": 36, + "length": 19, + "confidenceScore": 0.8 + } + ], + "warnings": [] + } + ], + "errors": [], + "modelVersion": "2020-07-01" + } + } + ], + "Variables": { + "RandomSeed": "820241717", + "TEXT_ANALYTICS_API_KEY": "Sanitized", + "TEXT_ANALYTICS_ENDPOINT": "https://cognitiveusw2dev.azure-api.net" + } +} \ No newline at end of file diff --git a/sdk/textanalytics/Azure.AI.TextAnalytics/tests/SessionRecords/RecognizePiiEntitiesTests/RecognizePiiEntitiesWithLanguageTest.json b/sdk/textanalytics/Azure.AI.TextAnalytics/tests/SessionRecords/RecognizePiiEntitiesTests/RecognizePiiEntitiesWithLanguageTest.json new file mode 100644 index 0000000000000..00a90f8c5ca84 --- /dev/null +++ b/sdk/textanalytics/Azure.AI.TextAnalytics/tests/SessionRecords/RecognizePiiEntitiesTests/RecognizePiiEntitiesWithLanguageTest.json @@ -0,0 +1,76 @@ +{ + "Entries": [ + { + "RequestUri": "https://cognitiveusw2dev.azure-api.net/text/analytics/v3.1-preview.2/entities/recognition/pii?showStats=false\u0026domain=PHI\u0026stringIndexType=Utf16CodeUnit", + "RequestMethod": "POST", + "RequestHeaders": { + "Accept": [ + "application/json", + "text/json" + ], + "Content-Length": "152", + "Content-Type": "application/json", + "Ocp-Apim-Subscription-Key": "Sanitized", + "traceparent": "00-37cbc09db2447f44b7dba3642357b663-e2f92bf5d2b17a43-00", + "User-Agent": [ + "azsdk-net-AI.TextAnalytics/5.1.0-dev.20200901.1", + "(.NET Core 4.6.29130.01; Microsoft Windows 10.0.19041 )" + ], + "x-ms-client-request-id": "2a5fd4f3bf3db4629b14e3f9050dd01c", + "x-ms-return-client-request-id": "true" + }, + "RequestBody": { + "documents": [ + { + "id": "0", + "text": "A developer with SSN 859-98-0987 whose phone number is 800-102-1100 is building tools with our APIs.", + "language": "en" + } + ] + }, + "StatusCode": 200, + "ResponseHeaders": { + "apim-request-id": "42fc8d9d-0145-4a7d-929c-cd285d3454dd", + "Content-Type": "application/json; charset=utf-8", + "csp-billing-usage": "CognitiveServices.TextAnalytics.BatchScoring=1", + "Date": "Wed, 02 Sep 2020 13:56:59 GMT", + "Strict-Transport-Security": "max-age=31536000; includeSubDomains; preload", + "Transfer-Encoding": "chunked", + "X-Content-Type-Options": "nosniff", + "x-envoy-upstream-service-time": "98" + }, + "ResponseBody": { + "documents": [ + { + "redactedText": "A developer with SSN *********** whose phone number is ************ is building tools with our APIs.", + "id": "0", + "entities": [ + { + "text": "859-98-0987", + "category": "U.S. Social Security Number (SSN)", + "offset": 21, + "length": 11, + "confidenceScore": 0.65 + }, + { + "text": "800-102-1100", + "category": "Phone Number", + "offset": 55, + "length": 12, + "confidenceScore": 0.8 + } + ], + "warnings": [] + } + ], + "errors": [], + "modelVersion": "2020-07-01" + } + } + ], + "Variables": { + "RandomSeed": "2103634278", + "TEXT_ANALYTICS_API_KEY": "Sanitized", + "TEXT_ANALYTICS_ENDPOINT": "https://cognitiveusw2dev.azure-api.net" + } +} \ No newline at end of file diff --git a/sdk/textanalytics/Azure.AI.TextAnalytics/tests/SessionRecords/RecognizePiiEntitiesTests/RecognizePiiEntitiesWithLanguageTestAsync.json b/sdk/textanalytics/Azure.AI.TextAnalytics/tests/SessionRecords/RecognizePiiEntitiesTests/RecognizePiiEntitiesWithLanguageTestAsync.json new file mode 100644 index 0000000000000..366796191ec80 --- /dev/null +++ b/sdk/textanalytics/Azure.AI.TextAnalytics/tests/SessionRecords/RecognizePiiEntitiesTests/RecognizePiiEntitiesWithLanguageTestAsync.json @@ -0,0 +1,76 @@ +{ + "Entries": [ + { + "RequestUri": "https://cognitiveusw2dev.azure-api.net/text/analytics/v3.1-preview.2/entities/recognition/pii?showStats=false\u0026domain=PHI\u0026stringIndexType=Utf16CodeUnit", + "RequestMethod": "POST", + "RequestHeaders": { + "Accept": [ + "application/json", + "text/json" + ], + "Content-Length": "152", + "Content-Type": "application/json", + "Ocp-Apim-Subscription-Key": "Sanitized", + "traceparent": "00-cc058aac3f216e4aa9a7e16e7e305b3b-e03ae9a987006b45-00", + "User-Agent": [ + "azsdk-net-AI.TextAnalytics/5.1.0-dev.20200901.1", + "(.NET Core 4.6.29130.01; Microsoft Windows 10.0.19041 )" + ], + "x-ms-client-request-id": "df1d0a85dc552d663ac944ac69a80a0d", + "x-ms-return-client-request-id": "true" + }, + "RequestBody": { + "documents": [ + { + "id": "0", + "text": "A developer with SSN 859-98-0987 whose phone number is 800-102-1100 is building tools with our APIs.", + "language": "en" + } + ] + }, + "StatusCode": 200, + "ResponseHeaders": { + "apim-request-id": "2fc119e8-cea5-46d1-889d-cb0f54793652", + "Content-Type": "application/json; charset=utf-8", + "csp-billing-usage": "CognitiveServices.TextAnalytics.BatchScoring=1", + "Date": "Wed, 02 Sep 2020 13:57:00 GMT", + "Strict-Transport-Security": "max-age=31536000; includeSubDomains; preload", + "Transfer-Encoding": "chunked", + "X-Content-Type-Options": "nosniff", + "x-envoy-upstream-service-time": "106" + }, + "ResponseBody": { + "documents": [ + { + "redactedText": "A developer with SSN *********** whose phone number is ************ is building tools with our APIs.", + "id": "0", + "entities": [ + { + "text": "859-98-0987", + "category": "U.S. Social Security Number (SSN)", + "offset": 21, + "length": 11, + "confidenceScore": 0.65 + }, + { + "text": "800-102-1100", + "category": "Phone Number", + "offset": 55, + "length": 12, + "confidenceScore": 0.8 + } + ], + "warnings": [] + } + ], + "errors": [], + "modelVersion": "2020-07-01" + } + } + ], + "Variables": { + "RandomSeed": "1408135123", + "TEXT_ANALYTICS_API_KEY": "Sanitized", + "TEXT_ANALYTICS_ENDPOINT": "https://cognitiveusw2dev.azure-api.net" + } +} \ No newline at end of file diff --git a/sdk/textanalytics/Azure.AI.TextAnalytics/tests/TextAnalyticsClientLiveTests.cs b/sdk/textanalytics/Azure.AI.TextAnalytics/tests/TextAnalyticsClientLiveTests.cs index 1e0bd6a6c2a45..546176b6a84d6 100644 --- a/sdk/textanalytics/Azure.AI.TextAnalytics/tests/TextAnalyticsClientLiveTests.cs +++ b/sdk/textanalytics/Azure.AI.TextAnalytics/tests/TextAnalyticsClientLiveTests.cs @@ -1,7 +1,6 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -using System; using System.Collections.Generic; using System.Linq; using System.Threading.Tasks; diff --git a/sdk/textanalytics/Azure.AI.TextAnalytics/tests/TextAnalyticsClientTests.cs b/sdk/textanalytics/Azure.AI.TextAnalytics/tests/TextAnalyticsClientTests.cs index 948563f47bf24..84c5a1ce94bb0 100644 --- a/sdk/textanalytics/Azure.AI.TextAnalytics/tests/TextAnalyticsClientTests.cs +++ b/sdk/textanalytics/Azure.AI.TextAnalytics/tests/TextAnalyticsClientTests.cs @@ -57,6 +57,17 @@ public void RecognizeEntitiesArgumentValidation() Assert.ThrowsAsync(() => Client.RecognizeEntitiesBatchAsync(null, new TextAnalyticsRequestOptions())); } + [Test] + public void RecognizePiiEntitiesArgumentValidation() + { + var documents = new List(); + Assert.ThrowsAsync(() => Client.RecognizePiiEntitiesAsync("")); + Assert.ThrowsAsync(() => Client.RecognizePiiEntitiesAsync((string)null)); + Assert.ThrowsAsync(() => Client.RecognizePiiEntitiesBatchAsync((List)null)); + Assert.ThrowsAsync(() => Client.RecognizePiiEntitiesBatchAsync(documents)); + Assert.ThrowsAsync(() => Client.RecognizePiiEntitiesBatchAsync(null, new RecognizePiiEntitiesOptions())); + } + [Test] public void AnalyzeSentimentArgumentValidation() { diff --git a/sdk/textanalytics/Azure.AI.TextAnalytics/tests/samples/Sample5_RecognizePiiEntities.cs b/sdk/textanalytics/Azure.AI.TextAnalytics/tests/samples/Sample5_RecognizePiiEntities.cs new file mode 100644 index 0000000000000..ab8afb8131737 --- /dev/null +++ b/sdk/textanalytics/Azure.AI.TextAnalytics/tests/samples/Sample5_RecognizePiiEntities.cs @@ -0,0 +1,38 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using Azure.AI.TextAnalytics.Tests; +using Azure.Core.TestFramework; +using NUnit.Framework; + +namespace Azure.AI.TextAnalytics.Samples +{ + [LiveOnly] + public partial class TextAnalyticsSamples : SamplesBase + { + [Test] + public void RecognizePiiEntities() + { + string endpoint = TestEnvironment.Endpoint; + string apiKey = TestEnvironment.ApiKey; + + #region Snippet:TextAnalyticsSample5CreateClient + var client = new TextAnalyticsClient(new Uri(endpoint), new AzureKeyCredential(apiKey)); + #endregion + + #region Snippet:RecognizePiiEntities + string document = "A developer with SSN 859-98-0987 whose phone number is 800-102-1100 is building tools with our APIs."; + + PiiEntityCollection entities = client.RecognizePiiEntities(document).Value; + + Console.WriteLine($"Redacted Text: {entities.RedactedText}"); + Console.WriteLine($"Recognized {entities.Count} PII entit{(entities.Count > 1 ? "ies" : "y")}:"); + foreach (PiiEntity entity in entities) + { + Console.WriteLine($"Text: {entity.Text}, Category: {entity.Category}, SubCategory: {entity.SubCategory}, Confidence score: {entity.ConfidenceScore}"); + } + #endregion + } + } +} diff --git a/sdk/textanalytics/Azure.AI.TextAnalytics/tests/samples/Sample5_RecognizePiiEntitiesBatch.cs b/sdk/textanalytics/Azure.AI.TextAnalytics/tests/samples/Sample5_RecognizePiiEntitiesBatch.cs new file mode 100644 index 0000000000000..50131bf2229b4 --- /dev/null +++ b/sdk/textanalytics/Azure.AI.TextAnalytics/tests/samples/Sample5_RecognizePiiEntitiesBatch.cs @@ -0,0 +1,86 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using Azure.AI.TextAnalytics.Tests; +using Azure.Core.TestFramework; +using NUnit.Framework; + +namespace Azure.AI.TextAnalytics.Samples +{ + [LiveOnly] + public partial class TextAnalyticsSamples : SamplesBase + { + [Test] + public void RecognizePiiEntitiesBatch() + { + string endpoint = TestEnvironment.Endpoint; + string apiKey = TestEnvironment.ApiKey; + + // Instantiate a client that will be used to call the service. + var client = new TextAnalyticsClient(new Uri(endpoint), new AzureKeyCredential(apiKey)); + + #region Snippet:TextAnalyticsSample5RecognizePiiEntitiesBatch + var documents = new List + { + new TextDocumentInput("1", "A developer with SSN 859-98-0987 whose phone number is 800-102-1100 is building tools with our APIs.") + { + Language = "en", + }, + new TextDocumentInput("2", "Your ABA number - 111000025 - is the first 9 digits in the lower left hand corner of your personal check.") + { + Language = "en", + } + }; + + RecognizePiiEntitiesResultCollection results = client.RecognizePiiEntitiesBatch(documents, new RecognizePiiEntitiesOptions { IncludeStatistics = true }); + #endregion + + int i = 0; + Console.WriteLine($"Results of Azure Text Analytics \"Pii Entity Recognition\" Model, version: \"{results.ModelVersion}\""); + Console.WriteLine(""); + + foreach (RecognizePiiEntitiesResult result in results) + { + TextDocumentInput document = documents[i++]; + + Console.WriteLine($"On document (Id={document.Id}, Language=\"{document.Language}\", Text=\"{document.Text}\"):"); + + if (result.HasError) + { + Console.WriteLine($" Document error code: {result.Error.ErrorCode}."); + Console.WriteLine($" Message: {result.Error.Message}."); + } + else + { + if (result.Entities.Count > 0) + { + Console.WriteLine($" Redacted Text: {result.Entities.RedactedText}"); + Console.WriteLine($" Recognized the following {result.Entities.Count} PII entit{(result.Entities.Count > 1 ? "ies" : "y ")}:"); + foreach (PiiEntity entity in result.Entities) + { + Console.WriteLine($" Text: {entity.Text}, Category: {entity.Category}, SubCategory: {entity.SubCategory}, Confidence score: {entity.ConfidenceScore}"); + } + } + else + { + Console.WriteLine("No entities were found."); + } + + Console.WriteLine($" Document statistics:"); + Console.WriteLine($" Character count (in Unicode graphemes): {result.Statistics.CharacterCount}"); + Console.WriteLine($" Transaction count: {result.Statistics.TransactionCount}"); + Console.WriteLine(""); + } + } + + Console.WriteLine($"Batch operation statistics:"); + Console.WriteLine($" Document count: {results.Statistics.DocumentCount}"); + Console.WriteLine($" Valid document count: {results.Statistics.ValidDocumentCount}"); + Console.WriteLine($" Invalid document count: {results.Statistics.InvalidDocumentCount}"); + Console.WriteLine($" Transaction count: {results.Statistics.TransactionCount}"); + Console.WriteLine(""); + } + } +} diff --git a/sdk/textanalytics/Azure.AI.TextAnalytics/tests/samples/Sample5_RecognizePiiEntitiesBatchConvenience.cs b/sdk/textanalytics/Azure.AI.TextAnalytics/tests/samples/Sample5_RecognizePiiEntitiesBatchConvenience.cs new file mode 100644 index 0000000000000..d7c9850717ba9 --- /dev/null +++ b/sdk/textanalytics/Azure.AI.TextAnalytics/tests/samples/Sample5_RecognizePiiEntitiesBatchConvenience.cs @@ -0,0 +1,55 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using Azure.AI.TextAnalytics.Tests; +using Azure.Core.TestFramework; +using NUnit.Framework; + +namespace Azure.AI.TextAnalytics.Samples +{ + [LiveOnly] + public partial class TextAnalyticsSamples : SamplesBase + { + [Test] + public void RecognizePiiEntitiesBatchConvenience() + { + string endpoint = TestEnvironment.Endpoint; + string apiKey = TestEnvironment.ApiKey; + + // Instantiate a client that will be used to call the service. + var client = new TextAnalyticsClient(new Uri(endpoint), new AzureKeyCredential(apiKey)); + + var documents = new List + { + "A developer with SSN 859-98-0987 whose phone number is 800-102-1100 is building tools with our APIs.", + "Your ABA number - 111000025 - is the first 9 digits in the lower left hand corner of your personal check." + }; + + #region Snippet:TextAnalyticsSample5RecognizePiiEntitiesConvenience + RecognizePiiEntitiesResultCollection results = client.RecognizePiiEntitiesBatch(documents); + #endregion + + int i = 0; + foreach (RecognizePiiEntitiesResult result in results) + { + Console.WriteLine($"For document: {documents[i++]}"); + if (result.Entities.Count > 0) + { + Console.WriteLine($"Redacted Text: {result.Entities.RedactedText}"); + Console.WriteLine($"The following {result.Entities.Count} PII entit{(result.Entities.Count > 1 ? "ies were" : "y was")} found:"); + + foreach (PiiEntity entity in result.Entities) + { + Console.WriteLine($" Text: {entity.Text}, Category: {entity.Category}, SubCategory: {entity.SubCategory}, Confidence score: {entity.ConfidenceScore}"); + } + } + else + { + Console.WriteLine("No entities were found."); + } + } + } + } +}