-
Notifications
You must be signed in to change notification settings - Fork 4.8k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* all pii code * tests * recordings * samples * pr feedback * add mocking * PR feedback
- Loading branch information
Showing
33 changed files
with
2,683 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
// Copyright (c) Microsoft Corporation. All rights reserved. | ||
// Licensed under the MIT License. | ||
|
||
using Azure.AI.TextAnalytics.Models; | ||
|
||
namespace Azure.AI.TextAnalytics | ||
{ | ||
/// <summary> | ||
/// A word or phrase identified as a Personally Identifiable Information | ||
/// that can be categorized as known type in a given taxonomy. | ||
/// The set of categories recognized by the Text Analytics service is described at | ||
/// <a href="https://aka.ms/tanerpii"/>. | ||
/// </summary> | ||
public readonly struct PiiEntity | ||
{ | ||
internal PiiEntity(Entity entity) | ||
{ | ||
Category = entity.Category; | ||
Text = entity.Text; | ||
SubCategory = entity.Subcategory; | ||
ConfidenceScore = entity.ConfidenceScore; | ||
Offset = entity.Offset; | ||
Length = entity.Length; | ||
} | ||
|
||
/// <summary> | ||
/// Gets the entity text as it appears in the input document. | ||
/// </summary> | ||
public string Text { get; } | ||
|
||
/// <summary> | ||
/// Gets the entity category inferred by the Text Analytics service's | ||
/// named entity recognition model, such as Financial Account | ||
/// Identification/Social Security Number/Phone Number, etc. | ||
/// The list of available categories is described at | ||
/// <a href="https://aka.ms/tanerpii"/>. | ||
/// </summary> | ||
public EntityCategory Category { get; } | ||
|
||
/// <summary> | ||
/// Gets the sub category of the entity inferred by the Text Analytics service's | ||
/// named entity recognition model. This property may not have a value if | ||
/// a sub category doesn't exist for this entity. The list of available categories and | ||
/// subcategories is described at <a href="https://aka.ms/tanerpii"/>. | ||
/// </summary> | ||
public string SubCategory { get; } | ||
|
||
/// <summary> | ||
/// Gets a score between 0 and 1, indicating the confidence that the | ||
/// text substring matches this inferred entity. | ||
/// </summary> | ||
public double ConfidenceScore { get; } | ||
|
||
/// <summary> | ||
/// Gets the starting position (in UTF-16 code units) for the matching text in the input document. | ||
/// </summary> | ||
public int Offset { get; } | ||
|
||
/// <summary> | ||
/// Gets the length (in UTF-16 code units) of the matching text in the input document. | ||
/// </summary> | ||
public int Length { get; } | ||
} | ||
} |
32 changes: 32 additions & 0 deletions
32
sdk/textanalytics/Azure.AI.TextAnalytics/src/PiiEntityCollection.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
// Copyright (c) Microsoft Corporation. All rights reserved. | ||
// Licensed under the MIT License. | ||
|
||
using System.Collections.Generic; | ||
using System.Collections.ObjectModel; | ||
|
||
namespace Azure.AI.TextAnalytics | ||
{ | ||
/// <summary> | ||
/// Collection of <see cref="PiiEntity"/> objects in a document. | ||
/// </summary> | ||
public class PiiEntityCollection : ReadOnlyCollection<PiiEntity> | ||
{ | ||
internal PiiEntityCollection(IList<PiiEntity> entities, string redactedText, IList<TextAnalyticsWarning> warnings) | ||
: base(entities) | ||
{ | ||
RedactedText = redactedText; | ||
Warnings = new ReadOnlyCollection<TextAnalyticsWarning>(warnings); | ||
} | ||
|
||
/// <summary> | ||
/// Gets the text of the input document with all of the Personally Identifiable Information | ||
/// redacted out. | ||
/// </summary> | ||
public string RedactedText { get; } | ||
|
||
/// <summary> | ||
/// Warnings encountered while processing the document. | ||
/// </summary> | ||
public IReadOnlyCollection<TextAnalyticsWarning> Warnings { get; } | ||
} | ||
} |
30 changes: 30 additions & 0 deletions
30
sdk/textanalytics/Azure.AI.TextAnalytics/src/PiiEntityDomainType.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
// Copyright (c) Microsoft Corporation. All rights reserved. | ||
// Licensed under the MIT License. | ||
|
||
namespace Azure.AI.TextAnalytics | ||
{ | ||
/// <summary> | ||
/// The different domains of PII entities that users can filter requests by. | ||
/// </summary> | ||
public enum PiiEntityDomainType | ||
{ | ||
/// <summary> | ||
/// Protected Health Information entities. | ||
/// For more information see <a href="https://aka.ms/tanerpii"/>. | ||
/// </summary> | ||
ProtectedHealthInformation | ||
} | ||
|
||
[System.Diagnostics.CodeAnalysis.SuppressMessage("StyleCop.CSharp.DocumentationRules", "SA1649:File name should match first type name", Justification = "Small extensions, good to keep here.")] | ||
internal static class PiiEntityDomainTypeExtensions | ||
{ | ||
internal static string GetString(this PiiEntityDomainType type) | ||
{ | ||
return type switch | ||
{ | ||
PiiEntityDomainType.ProtectedHealthInformation => "PHI", | ||
_ => null, | ||
}; | ||
} | ||
} | ||
} |
26 changes: 26 additions & 0 deletions
26
sdk/textanalytics/Azure.AI.TextAnalytics/src/RecognizePiiEntitiesOptions.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
// Copyright (c) Microsoft Corporation. All rights reserved. | ||
// Licensed under the MIT License. | ||
|
||
namespace Azure.AI.TextAnalytics | ||
{ | ||
/// <summary> | ||
/// Options that allow callers to specify details about how the operation | ||
/// is run and what information is returned from it by the service. | ||
/// </summary> | ||
public class RecognizePiiEntitiesOptions : TextAnalyticsRequestOptions | ||
{ | ||
/// <summary> | ||
/// Initializes a new instance of the <see cref="RecognizePiiEntitiesOptions"/> | ||
/// class. | ||
/// </summary> | ||
public RecognizePiiEntitiesOptions() | ||
{ | ||
} | ||
|
||
/// <summary> | ||
/// Filters the response entities to ones only included in the specified domain. | ||
/// For more information see <a href="https://aka.ms/tanerpii"/>. | ||
/// </summary> | ||
public PiiEntityDomainType DomainFilter { get; set; } | ||
} | ||
} |
44 changes: 44 additions & 0 deletions
44
sdk/textanalytics/Azure.AI.TextAnalytics/src/RecognizePiiEntitiesResult.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
// Copyright (c) Microsoft Corporation. All rights reserved. | ||
// Licensed under the MIT License. | ||
|
||
using System; | ||
|
||
namespace Azure.AI.TextAnalytics | ||
{ | ||
/// <summary> | ||
/// The result of the recognize PII entities operation on a | ||
/// document, containing a collection of the <see cref="PiiEntity"/> | ||
/// objects containing Personally Identifiable Information that were | ||
/// found in that document. | ||
/// </summary> | ||
public class RecognizePiiEntitiesResult : TextAnalyticsResult | ||
{ | ||
private readonly PiiEntityCollection _entities; | ||
|
||
internal RecognizePiiEntitiesResult(string id, TextDocumentStatistics statistics, PiiEntityCollection entities) | ||
: base(id, statistics) | ||
{ | ||
_entities = entities; | ||
} | ||
|
||
internal RecognizePiiEntitiesResult(string id, TextAnalyticsError error) : base(id, error) { } | ||
|
||
/// <summary> | ||
/// Gets the collection of PII entities containing Personally | ||
/// Identifiable Information in the document. | ||
/// </summary> | ||
public PiiEntityCollection Entities | ||
{ | ||
get | ||
{ | ||
if (HasError) | ||
{ | ||
#pragma warning disable CA1065 // Do not raise exceptions in unexpected locations | ||
throw new InvalidOperationException($"Cannot access result for document {Id}, due to error {Error.ErrorCode}: {Error.Message}"); | ||
#pragma warning restore CA1065 // Do not raise exceptions in unexpected locations | ||
} | ||
return _entities; | ||
} | ||
} | ||
} | ||
} |
34 changes: 34 additions & 0 deletions
34
sdk/textanalytics/Azure.AI.TextAnalytics/src/RecognizePiiEntitiesResultCollection.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
// Copyright (c) Microsoft Corporation. All rights reserved. | ||
// Licensed under the MIT License. | ||
|
||
using System.Collections.Generic; | ||
using System.Collections.ObjectModel; | ||
|
||
namespace Azure.AI.TextAnalytics | ||
{ | ||
/// <summary> | ||
/// Collection of <see cref="RecognizePiiEntitiesResult"/> objects corresponding | ||
/// to a batch of documents, and information about the batch operation. | ||
/// </summary> | ||
public class RecognizePiiEntitiesResultCollection : ReadOnlyCollection<RecognizePiiEntitiesResult> | ||
{ | ||
internal RecognizePiiEntitiesResultCollection(IList<RecognizePiiEntitiesResult> list, TextDocumentBatchStatistics statistics, string modelVersion) : base(list) | ||
{ | ||
Statistics = statistics; | ||
ModelVersion = modelVersion; | ||
} | ||
|
||
/// <summary> | ||
/// Gets statistics about the documents and how it was processed | ||
/// by the service. This property will have a value when IncludeStatistics | ||
/// is set to true in the client call. | ||
/// </summary> | ||
public TextDocumentBatchStatistics Statistics { get; } | ||
|
||
/// <summary> | ||
/// Gets the version of the Text Analytics model used by this operation | ||
/// on this batch of documents. | ||
/// </summary> | ||
public string ModelVersion { get; } | ||
} | ||
} |
Oops, something went wrong.