Skip to content

Commit

Permalink
[TA] Add PII back (#14794)
Browse files Browse the repository at this point in the history
* all pii code

* tests

* recordings

* samples

* pr feedback

* add mocking

* PR feedback
  • Loading branch information
maririos authored Sep 3, 2020
1 parent 642217c commit 07773ac
Show file tree
Hide file tree
Showing 33 changed files with 2,683 additions and 2 deletions.
1 change: 1 addition & 0 deletions sdk/textanalytics/Azure.AI.TextAnalytics/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
### New Features
- It defaults to the latest supported API version, which currently is `3.1-preview.2`.
- `ErrorCode` value returned from the service is now surfaced in `RequestFailedException`.
- Added the `RecognizePiiEntities` endpoint which returns entities containing Personally Identifiable Information. This feature is available in the Text Analytics service v3.1-preview.1 and above.
- Support added for Opinion Mining. This feature is available in the Text Analytics service v3.1-preview.1 and above.
- Added `Offset` and `Length` properties for `CategorizedEntity`, `SentenceSentiment`, and `LinkedEntityMatch`. The default encoding is UTF-16 code units. For additional information see https://aka.ms/text-analytics-offsets
- `TextAnalyticsError` and `TextAnalyticsWarning` now are marked as immutable.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,28 @@ public readonly partial struct OpinionSentiment
public Azure.AI.TextAnalytics.TextSentiment Sentiment { get { throw null; } }
public string Text { get { throw null; } }
}
[System.Runtime.InteropServices.StructLayoutAttribute(System.Runtime.InteropServices.LayoutKind.Sequential)]
public readonly partial struct PiiEntity
{
private readonly object _dummy;
private readonly int _dummyPrimitive;
public Azure.AI.TextAnalytics.EntityCategory Category { get { throw null; } }
public double ConfidenceScore { get { throw null; } }
public int Length { get { throw null; } }
public int Offset { get { throw null; } }
public string SubCategory { get { throw null; } }
public string Text { get { throw null; } }
}
public partial class PiiEntityCollection : System.Collections.ObjectModel.ReadOnlyCollection<Azure.AI.TextAnalytics.PiiEntity>
{
internal PiiEntityCollection() : base (default(System.Collections.Generic.IList<Azure.AI.TextAnalytics.PiiEntity>)) { }
public string RedactedText { get { throw null; } }
public System.Collections.Generic.IReadOnlyCollection<Azure.AI.TextAnalytics.TextAnalyticsWarning> Warnings { get { throw null; } }
}
public enum PiiEntityDomainType
{
ProtectedHealthInformation = 0,
}
public partial class RecognizeEntitiesResult : Azure.AI.TextAnalytics.TextAnalyticsResult
{
internal RecognizeEntitiesResult() { }
Expand All @@ -201,6 +223,22 @@ internal RecognizeLinkedEntitiesResultCollection() : base (default(System.Collec
public string ModelVersion { get { throw null; } }
public Azure.AI.TextAnalytics.TextDocumentBatchStatistics Statistics { get { throw null; } }
}
public partial class RecognizePiiEntitiesOptions : Azure.AI.TextAnalytics.TextAnalyticsRequestOptions
{
public RecognizePiiEntitiesOptions() { }
public Azure.AI.TextAnalytics.PiiEntityDomainType DomainFilter { get { throw null; } set { } }
}
public partial class RecognizePiiEntitiesResult : Azure.AI.TextAnalytics.TextAnalyticsResult
{
internal RecognizePiiEntitiesResult() { }
public Azure.AI.TextAnalytics.PiiEntityCollection Entities { get { throw null; } }
}
public partial class RecognizePiiEntitiesResultCollection : System.Collections.ObjectModel.ReadOnlyCollection<Azure.AI.TextAnalytics.RecognizePiiEntitiesResult>
{
internal RecognizePiiEntitiesResultCollection() : base (default(System.Collections.Generic.IList<Azure.AI.TextAnalytics.RecognizePiiEntitiesResult>)) { }
public string ModelVersion { get { throw null; } }
public Azure.AI.TextAnalytics.TextDocumentBatchStatistics Statistics { get { throw null; } }
}
[System.Runtime.InteropServices.StructLayoutAttribute(System.Runtime.InteropServices.LayoutKind.Sequential)]
public readonly partial struct SentenceSentiment
{
Expand Down Expand Up @@ -273,6 +311,12 @@ public TextAnalyticsClient(System.Uri endpoint, Azure.Core.TokenCredential crede
public virtual Azure.Response<Azure.AI.TextAnalytics.RecognizeLinkedEntitiesResultCollection> RecognizeLinkedEntitiesBatch(System.Collections.Generic.IEnumerable<string> documents, string language = null, Azure.AI.TextAnalytics.TextAnalyticsRequestOptions options = null, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }
public virtual System.Threading.Tasks.Task<Azure.Response<Azure.AI.TextAnalytics.RecognizeLinkedEntitiesResultCollection>> RecognizeLinkedEntitiesBatchAsync(System.Collections.Generic.IEnumerable<Azure.AI.TextAnalytics.TextDocumentInput> documents, Azure.AI.TextAnalytics.TextAnalyticsRequestOptions options = null, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }
public virtual System.Threading.Tasks.Task<Azure.Response<Azure.AI.TextAnalytics.RecognizeLinkedEntitiesResultCollection>> RecognizeLinkedEntitiesBatchAsync(System.Collections.Generic.IEnumerable<string> documents, string language = null, Azure.AI.TextAnalytics.TextAnalyticsRequestOptions options = null, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }
public virtual Azure.Response<Azure.AI.TextAnalytics.PiiEntityCollection> RecognizePiiEntities(string document, string language = null, Azure.AI.TextAnalytics.RecognizePiiEntitiesOptions options = null, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }
public virtual System.Threading.Tasks.Task<Azure.Response<Azure.AI.TextAnalytics.PiiEntityCollection>> RecognizePiiEntitiesAsync(string document, string language = null, Azure.AI.TextAnalytics.RecognizePiiEntitiesOptions options = null, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }
public virtual Azure.Response<Azure.AI.TextAnalytics.RecognizePiiEntitiesResultCollection> RecognizePiiEntitiesBatch(System.Collections.Generic.IEnumerable<Azure.AI.TextAnalytics.TextDocumentInput> documents, Azure.AI.TextAnalytics.RecognizePiiEntitiesOptions options = null, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }
public virtual Azure.Response<Azure.AI.TextAnalytics.RecognizePiiEntitiesResultCollection> RecognizePiiEntitiesBatch(System.Collections.Generic.IEnumerable<string> documents, string language = null, Azure.AI.TextAnalytics.RecognizePiiEntitiesOptions options = null, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }
public virtual System.Threading.Tasks.Task<Azure.Response<Azure.AI.TextAnalytics.RecognizePiiEntitiesResultCollection>> RecognizePiiEntitiesBatchAsync(System.Collections.Generic.IEnumerable<Azure.AI.TextAnalytics.TextDocumentInput> documents, Azure.AI.TextAnalytics.RecognizePiiEntitiesOptions options = null, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }
public virtual System.Threading.Tasks.Task<Azure.Response<Azure.AI.TextAnalytics.RecognizePiiEntitiesResultCollection>> RecognizePiiEntitiesBatchAsync(System.Collections.Generic.IEnumerable<string> documents, string language = null, Azure.AI.TextAnalytics.RecognizePiiEntitiesOptions options = null, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }
[System.ComponentModel.EditorBrowsableAttribute(System.ComponentModel.EditorBrowsableState.Never)]
public override string ToString() { throw null; }
}
Expand Down Expand Up @@ -359,12 +403,17 @@ public static partial class TextAnalyticsModelFactory
public static Azure.AI.TextAnalytics.LinkedEntityMatch LinkedEntityMatch(string text, double score, int offset, int length) { throw null; }
public static Azure.AI.TextAnalytics.MinedOpinion MinedOpinion(Azure.AI.TextAnalytics.AspectSentiment aspect, System.Collections.Generic.IReadOnlyList<Azure.AI.TextAnalytics.OpinionSentiment> opinions) { throw null; }
public static Azure.AI.TextAnalytics.OpinionSentiment OpinionSentiment(Azure.AI.TextAnalytics.TextSentiment sentiment, double positiveScore, double negativeScore, string text, bool isNegated, int offset, int length) { throw null; }
public static Azure.AI.TextAnalytics.PiiEntity PiiEntity(string text, string category, string subCategory, double score, int offset, int length) { throw null; }
public static Azure.AI.TextAnalytics.PiiEntityCollection PiiEntityCollection(System.Collections.Generic.IList<Azure.AI.TextAnalytics.PiiEntity> entities, string redactedText, System.Collections.Generic.IList<Azure.AI.TextAnalytics.TextAnalyticsWarning> warnings = null) { throw null; }
public static Azure.AI.TextAnalytics.RecognizeEntitiesResult RecognizeEntitiesResult(string id, Azure.AI.TextAnalytics.TextAnalyticsError error) { throw null; }
public static Azure.AI.TextAnalytics.RecognizeEntitiesResult RecognizeEntitiesResult(string id, Azure.AI.TextAnalytics.TextDocumentStatistics statistics, Azure.AI.TextAnalytics.CategorizedEntityCollection entities) { throw null; }
public static Azure.AI.TextAnalytics.RecognizeEntitiesResultCollection RecognizeEntitiesResultCollection(System.Collections.Generic.IEnumerable<Azure.AI.TextAnalytics.RecognizeEntitiesResult> list, Azure.AI.TextAnalytics.TextDocumentBatchStatistics statistics, string modelVersion) { throw null; }
public static Azure.AI.TextAnalytics.RecognizeLinkedEntitiesResult RecognizeLinkedEntitiesResult(string id, Azure.AI.TextAnalytics.TextAnalyticsError error) { throw null; }
public static Azure.AI.TextAnalytics.RecognizeLinkedEntitiesResult RecognizeLinkedEntitiesResult(string id, Azure.AI.TextAnalytics.TextDocumentStatistics statistics, Azure.AI.TextAnalytics.LinkedEntityCollection linkedEntities) { throw null; }
public static Azure.AI.TextAnalytics.RecognizeLinkedEntitiesResultCollection RecognizeLinkedEntitiesResultCollection(System.Collections.Generic.IEnumerable<Azure.AI.TextAnalytics.RecognizeLinkedEntitiesResult> list, Azure.AI.TextAnalytics.TextDocumentBatchStatistics statistics, string modelVersion) { throw null; }
public static Azure.AI.TextAnalytics.RecognizePiiEntitiesResult RecognizePiiEntitiesResult(string id, Azure.AI.TextAnalytics.TextAnalyticsError error) { throw null; }
public static Azure.AI.TextAnalytics.RecognizePiiEntitiesResult RecognizePiiEntitiesResult(string id, Azure.AI.TextAnalytics.TextDocumentStatistics statistics, Azure.AI.TextAnalytics.PiiEntityCollection entities) { throw null; }
public static Azure.AI.TextAnalytics.RecognizePiiEntitiesResultCollection RecognizePiiEntitiesResultCollection(System.Collections.Generic.IEnumerable<Azure.AI.TextAnalytics.RecognizePiiEntitiesResult> list, Azure.AI.TextAnalytics.TextDocumentBatchStatistics statistics, string modelVersion) { throw null; }
[System.ComponentModel.EditorBrowsableAttribute(System.ComponentModel.EditorBrowsableState.Never)]
public static Azure.AI.TextAnalytics.SentenceSentiment SentenceSentiment(Azure.AI.TextAnalytics.TextSentiment sentiment, string text, double positiveScore, double neutralScore, double negativeScore) { throw null; }
public static Azure.AI.TextAnalytics.SentenceSentiment SentenceSentiment(Azure.AI.TextAnalytics.TextSentiment sentiment, string text, double positiveScore, double neutralScore, double negativeScore, int offset, int length, System.Collections.Generic.IReadOnlyList<Azure.AI.TextAnalytics.MinedOpinion> minedOpinions) { throw null; }
Expand Down
64 changes: 64 additions & 0 deletions sdk/textanalytics/Azure.AI.TextAnalytics/src/PiiEntity.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

using Azure.AI.TextAnalytics.Models;

namespace Azure.AI.TextAnalytics
{
/// <summary>
/// A word or phrase identified as a Personally Identifiable Information
/// that can be categorized as known type in a given taxonomy.
/// The set of categories recognized by the Text Analytics service is described at
/// <a href="https://aka.ms/tanerpii"/>.
/// </summary>
public readonly struct PiiEntity
{
internal PiiEntity(Entity entity)
{
Category = entity.Category;
Text = entity.Text;
SubCategory = entity.Subcategory;
ConfidenceScore = entity.ConfidenceScore;
Offset = entity.Offset;
Length = entity.Length;
}

/// <summary>
/// Gets the entity text as it appears in the input document.
/// </summary>
public string Text { get; }

/// <summary>
/// Gets the entity category inferred by the Text Analytics service's
/// named entity recognition model, such as Financial Account
/// Identification/Social Security Number/Phone Number, etc.
/// The list of available categories is described at
/// <a href="https://aka.ms/tanerpii"/>.
/// </summary>
public EntityCategory Category { get; }

/// <summary>
/// Gets the sub category of the entity inferred by the Text Analytics service's
/// named entity recognition model. This property may not have a value if
/// a sub category doesn't exist for this entity. The list of available categories and
/// subcategories is described at <a href="https://aka.ms/tanerpii"/>.
/// </summary>
public string SubCategory { get; }

/// <summary>
/// Gets a score between 0 and 1, indicating the confidence that the
/// text substring matches this inferred entity.
/// </summary>
public double ConfidenceScore { get; }

/// <summary>
/// Gets the starting position (in UTF-16 code units) for the matching text in the input document.
/// </summary>
public int Offset { get; }

/// <summary>
/// Gets the length (in UTF-16 code units) of the matching text in the input document.
/// </summary>
public int Length { get; }
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

using System.Collections.Generic;
using System.Collections.ObjectModel;

namespace Azure.AI.TextAnalytics
{
/// <summary>
/// Collection of <see cref="PiiEntity"/> objects in a document.
/// </summary>
public class PiiEntityCollection : ReadOnlyCollection<PiiEntity>
{
internal PiiEntityCollection(IList<PiiEntity> entities, string redactedText, IList<TextAnalyticsWarning> warnings)
: base(entities)
{
RedactedText = redactedText;
Warnings = new ReadOnlyCollection<TextAnalyticsWarning>(warnings);
}

/// <summary>
/// Gets the text of the input document with all of the Personally Identifiable Information
/// redacted out.
/// </summary>
public string RedactedText { get; }

/// <summary>
/// Warnings encountered while processing the document.
/// </summary>
public IReadOnlyCollection<TextAnalyticsWarning> Warnings { get; }
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

namespace Azure.AI.TextAnalytics
{
/// <summary>
/// The different domains of PII entities that users can filter requests by.
/// </summary>
public enum PiiEntityDomainType
{
/// <summary>
/// Protected Health Information entities.
/// For more information see <a href="https://aka.ms/tanerpii"/>.
/// </summary>
ProtectedHealthInformation
}

[System.Diagnostics.CodeAnalysis.SuppressMessage("StyleCop.CSharp.DocumentationRules", "SA1649:File name should match first type name", Justification = "Small extensions, good to keep here.")]
internal static class PiiEntityDomainTypeExtensions
{
internal static string GetString(this PiiEntityDomainType type)
{
return type switch
{
PiiEntityDomainType.ProtectedHealthInformation => "PHI",
_ => null,
};
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

namespace Azure.AI.TextAnalytics
{
/// <summary>
/// Options that allow callers to specify details about how the operation
/// is run and what information is returned from it by the service.
/// </summary>
public class RecognizePiiEntitiesOptions : TextAnalyticsRequestOptions
{
/// <summary>
/// Initializes a new instance of the <see cref="RecognizePiiEntitiesOptions"/>
/// class.
/// </summary>
public RecognizePiiEntitiesOptions()
{
}

/// <summary>
/// Filters the response entities to ones only included in the specified domain.
/// For more information see <a href="https://aka.ms/tanerpii"/>.
/// </summary>
public PiiEntityDomainType DomainFilter { get; set; }
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

using System;

namespace Azure.AI.TextAnalytics
{
/// <summary>
/// The result of the recognize PII entities operation on a
/// document, containing a collection of the <see cref="PiiEntity"/>
/// objects containing Personally Identifiable Information that were
/// found in that document.
/// </summary>
public class RecognizePiiEntitiesResult : TextAnalyticsResult
{
private readonly PiiEntityCollection _entities;

internal RecognizePiiEntitiesResult(string id, TextDocumentStatistics statistics, PiiEntityCollection entities)
: base(id, statistics)
{
_entities = entities;
}

internal RecognizePiiEntitiesResult(string id, TextAnalyticsError error) : base(id, error) { }

/// <summary>
/// Gets the collection of PII entities containing Personally
/// Identifiable Information in the document.
/// </summary>
public PiiEntityCollection Entities
{
get
{
if (HasError)
{
#pragma warning disable CA1065 // Do not raise exceptions in unexpected locations
throw new InvalidOperationException($"Cannot access result for document {Id}, due to error {Error.ErrorCode}: {Error.Message}");
#pragma warning restore CA1065 // Do not raise exceptions in unexpected locations
}
return _entities;
}
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

using System.Collections.Generic;
using System.Collections.ObjectModel;

namespace Azure.AI.TextAnalytics
{
/// <summary>
/// Collection of <see cref="RecognizePiiEntitiesResult"/> objects corresponding
/// to a batch of documents, and information about the batch operation.
/// </summary>
public class RecognizePiiEntitiesResultCollection : ReadOnlyCollection<RecognizePiiEntitiesResult>
{
internal RecognizePiiEntitiesResultCollection(IList<RecognizePiiEntitiesResult> list, TextDocumentBatchStatistics statistics, string modelVersion) : base(list)
{
Statistics = statistics;
ModelVersion = modelVersion;
}

/// <summary>
/// Gets statistics about the documents and how it was processed
/// by the service. This property will have a value when IncludeStatistics
/// is set to true in the client call.
/// </summary>
public TextDocumentBatchStatistics Statistics { get; }

/// <summary>
/// Gets the version of the Text Analytics model used by this operation
/// on this batch of documents.
/// </summary>
public string ModelVersion { get; }
}
}
Loading

0 comments on commit 07773ac

Please sign in to comment.