Skip to content

Commit

Permalink
[TA] Add domainFilter feature to PII endpoint (#14714)
Browse files Browse the repository at this point in the history
* add domainFilter feature
  • Loading branch information
mssfang authored Sep 4, 2020
1 parent 67864dc commit eecf0ed
Show file tree
Hide file tree
Showing 18 changed files with 470 additions and 71 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,9 @@
import com.azure.ai.textanalytics.models.EntityCategory;
import com.azure.ai.textanalytics.models.PiiEntity;
import com.azure.ai.textanalytics.models.PiiEntityCollection;
import com.azure.ai.textanalytics.models.PiiEntityDomainType;
import com.azure.ai.textanalytics.models.RecognizePiiEntitiesResult;
import com.azure.ai.textanalytics.models.TextAnalyticsRequestOptions;
import com.azure.ai.textanalytics.models.RecognizePiiEntityOptions;
import com.azure.ai.textanalytics.models.TextAnalyticsWarning;
import com.azure.ai.textanalytics.models.TextDocumentInput;
import com.azure.ai.textanalytics.models.WarningCode;
Expand Down Expand Up @@ -67,14 +68,17 @@ class RecognizePiiEntityAsyncClient {
*
* @param document A single document.
* @param language The language code.
* @param options The additional configurable {@link RecognizePiiEntityOptions options} that may be passed when
* recognizing PII entities.
*
* @return The {@link Mono} of {@link PiiEntityCollection}.
*/
Mono<PiiEntityCollection> recognizePiiEntities(String document, String language) {
Mono<PiiEntityCollection> recognizePiiEntities(String document, String language,
RecognizePiiEntityOptions options) {
try {
Objects.requireNonNull(document, "'document' cannot be null.");
return recognizePiiEntitiesBatch(
Collections.singletonList(new TextDocumentInput("0", document).setLanguage(language)), null)
Collections.singletonList(new TextDocumentInput("0", document).setLanguage(language)), options)
.map(resultCollectionResponse -> {
PiiEntityCollection entityCollection = null;
// for each loop will have only one entry inside
Expand All @@ -97,12 +101,13 @@ Mono<PiiEntityCollection> recognizePiiEntities(String document, String language)
* Helper function for calling service with max overloaded parameters.
*
* @param documents The list of documents to recognize Personally Identifiable Information entities for.
* @param options The {@link TextAnalyticsRequestOptions} request options.
* @param options The additional configurable {@link RecognizePiiEntityOptions options} that may be passed when
* recognizing PII entities.
*
* @return A mono {@link Response} that contains {@link RecognizePiiEntitiesResultCollection}.
*/
Mono<Response<RecognizePiiEntitiesResultCollection>> recognizePiiEntitiesBatch(
Iterable<TextDocumentInput> documents, TextAnalyticsRequestOptions options) {
Iterable<TextDocumentInput> documents, RecognizePiiEntityOptions options) {
try {
inputDocumentsValidation(documents);
return withContext(context -> getRecognizePiiEntitiesResponse(documents, options, context));
Expand All @@ -115,13 +120,14 @@ Mono<Response<RecognizePiiEntitiesResultCollection>> recognizePiiEntitiesBatch(
* Helper function for calling service with max overloaded parameters with {@link Context} is given.
*
* @param documents The list of documents to recognize Personally Identifiable Information entities for.
* @param options The {@link TextAnalyticsRequestOptions} request options.
* @param options The additional configurable {@link RecognizePiiEntityOptions options} that may be passed when
* recognizing PII entities.
* @param context Additional context that is passed through the Http pipeline during the service call.
*
* @return A mono {@link Response} that contains {@link RecognizePiiEntitiesResultCollection}.
*/
Mono<Response<RecognizePiiEntitiesResultCollection>> recognizePiiEntitiesBatchWithContext(
Iterable<TextDocumentInput> documents, TextAnalyticsRequestOptions options, Context context) {
Iterable<TextDocumentInput> documents, RecognizePiiEntityOptions options, Context context) {
try {
inputDocumentsValidation(documents);
return getRecognizePiiEntitiesResponse(documents, options, context);
Expand Down Expand Up @@ -184,19 +190,28 @@ private Response<RecognizePiiEntitiesResultCollection> toRecognizePiiEntitiesRes
* {@link RecognizePiiEntitiesResultCollection} from a {@link SimpleResponse} of {@link EntitiesResult}.
*
* @param documents The list of documents to recognize Personally Identifiable Information entities for.
* @param options The {@link TextAnalyticsRequestOptions} request options.
* @param options The additional configurable {@link RecognizePiiEntityOptions options} that may be passed when
* recognizing PII entities.
* @param context Additional context that is passed through the Http pipeline during the service call.
*
* @return A mono {@link Response} that contains {@link RecognizePiiEntitiesResultCollection}.
*/
private Mono<Response<RecognizePiiEntitiesResultCollection>> getRecognizePiiEntitiesResponse(
Iterable<TextDocumentInput> documents, TextAnalyticsRequestOptions options, Context context) {
Iterable<TextDocumentInput> documents, RecognizePiiEntityOptions options, Context context) {
String modelVersion = null;
Boolean includeStatistics = null;
String domainFilter = null;
if (options != null) {
modelVersion = options.getModelVersion();
includeStatistics = options.isIncludeStatistics();
final PiiEntityDomainType domainType = options.getDomainFilter();
if (domainType != null) {
domainFilter = domainType.toString();
}
}
return service.entitiesRecognitionPiiWithResponseAsync(
new MultiLanguageBatchInput().setDocuments(toMultiLanguageInput(documents)),
options == null ? null : options.getModelVersion(),
options == null ? null : options.isIncludeStatistics(),
null,
StringIndexType.UTF16CODE_UNIT,
modelVersion, includeStatistics, domainFilter, StringIndexType.UTF16CODE_UNIT,
context.addData(AZ_TRACING_NAMESPACE_KEY, COGNITIVE_TRACING_NAMESPACE_VALUE))
.doOnSubscribe(ignoredValue -> logger.info(
"Start recognizing Personally Identifiable Information entities for a batch of documents."))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import com.azure.ai.textanalytics.models.KeyPhrasesCollection;
import com.azure.ai.textanalytics.models.LinkedEntityCollection;
import com.azure.ai.textanalytics.models.PiiEntityCollection;
import com.azure.ai.textanalytics.models.RecognizePiiEntityOptions;
import com.azure.ai.textanalytics.models.TextAnalyticsError;
import com.azure.ai.textanalytics.models.TextAnalyticsException;
import com.azure.ai.textanalytics.models.TextAnalyticsRequestOptions;
Expand Down Expand Up @@ -427,7 +428,38 @@ public Mono<PiiEntityCollection> recognizePiiEntities(String document) {
*/
@ServiceMethod(returns = ReturnType.SINGLE)
public Mono<PiiEntityCollection> recognizePiiEntities(String document, String language) {
return recognizePiiEntityAsyncClient.recognizePiiEntities(document, language);
return recognizePiiEntityAsyncClient.recognizePiiEntities(document, language, null);
}

/**
* Returns a list of Personally Identifiable Information(PII) entities in the provided document
* with provided language code.
*
* For a list of supported entity types, check: <a href="https://aka.ms/tanerpii">this</a>.
* For a list of enabled languages, check: <a href="https://aka.ms/talangs">this</a>.
*
* <p><strong>Code sample</strong></p>
* <p>Recognize the PII entities details in a document with provided language code and RecognizePiiEntityOptions.
* Subscribes to the call asynchronously and prints out the entity details when a response is received.</p>
*
* {@codesnippet com.azure.ai.textanalytics.TextAnalyticsAsyncClient.recognizePiiEntities#string-string-RecognizePiiEntityOptions}
*
* @param document the text to recognize PII entities details for.
* For text length limits, maximum batch size, and supported text encoding, see
* <a href="https://docs.microsoft.com/azure/cognitive-services/text-analytics/overview#data-limits">data limits</a>.
* @param language The 2 letter ISO 639-1 representation of language. If not set, uses "en" for English as default.
* @param options The additional configurable {@link RecognizePiiEntityOptions options} that may be passed when
* recognizing PII entities.
*
* @return A {@link Mono} contains a {@link PiiEntityCollection recognized PII entities collection}.
*
* @throws NullPointerException if {@code document} is null.
* @throws TextAnalyticsException if the response returned with an {@link TextAnalyticsError error}.
*/
@ServiceMethod(returns = ReturnType.SINGLE)
public Mono<PiiEntityCollection> recognizePiiEntities(String document, String language,
RecognizePiiEntityOptions options) {
return recognizePiiEntityAsyncClient.recognizePiiEntities(document, language, options);
}

/**
Expand All @@ -438,14 +470,14 @@ public Mono<PiiEntityCollection> recognizePiiEntities(String document, String la
* <p>Recognize Personally Identifiable Information entities in a document with the provided language code.
* Subscribes to the call asynchronously and prints out the entity details when a response is received.</p>
*
* {@codesnippet com.azure.ai.textanalytics.TextAnalyticsAsyncClient.recognizePiiEntitiesBatch#Iterable-String-TextAnalyticsRequestOptions}
* {@codesnippet com.azure.ai.textanalytics.TextAnalyticsAsyncClient.recognizePiiEntitiesBatch#Iterable-String-RecognizePiiEntityOptions}
*
* @param documents A list of documents to recognize PII entities for.
* For text length limits, maximum batch size, and supported text encoding, see
* <a href="https://docs.microsoft.com/azure/cognitive-services/text-analytics/overview#data-limits">data limits</a>.
* @param language The 2 letter ISO 639-1 representation of language. If not set, uses "en" for English as default.
* @param options The {@link TextAnalyticsRequestOptions options} to configure the scoring model for documents
* and show statistics.
* @param options The additional configurable {@link RecognizePiiEntityOptions options} that may be passed when
* recognizing PII entities.
*
* @return A {@link Mono} contains a {@link RecognizePiiEntitiesResultCollection}.
*
Expand All @@ -454,7 +486,7 @@ public Mono<PiiEntityCollection> recognizePiiEntities(String document, String la
*/
@ServiceMethod(returns = ReturnType.SINGLE)
public Mono<RecognizePiiEntitiesResultCollection> recognizePiiEntitiesBatch(
Iterable<String> documents, String language, TextAnalyticsRequestOptions options) {
Iterable<String> documents, String language, RecognizePiiEntityOptions options) {
try {
inputDocumentsValidation(documents);
return recognizePiiEntitiesBatchWithResponse(
Expand All @@ -477,13 +509,13 @@ public Mono<RecognizePiiEntitiesResultCollection> recognizePiiEntitiesBatch(
* with provided request options.
* Subscribes to the call asynchronously and prints out the entity details when a response is received.</p>
*
* {@codesnippet com.azure.ai.textanalytics.TextAnalyticsAsyncClient.recognizePiiEntitiesBatch#Iterable-TextAnalyticsRequestOptions}
* {@codesnippet com.azure.ai.textanalytics.TextAnalyticsAsyncClient.recognizePiiEntitiesBatch#Iterable-RecognizePiiEntityOptions}
*
* @param documents A list of {@link TextDocumentInput documents} to recognize PII entities for.
* For text length limits, maximum batch size, and supported text encoding, see
* <a href="https://docs.microsoft.com/azure/cognitive-services/text-analytics/overview#data-limits">data limits</a>.
* @param options The {@link TextAnalyticsRequestOptions options} to configure the scoring model for documents
* and show statistics.
* @param options The additional configurable {@link RecognizePiiEntityOptions options} that may be passed when
* recognizing PII entities.
*
* @return A {@link Mono} contains a {@link Response} which contains a {@link RecognizePiiEntitiesResultCollection}.
*
Expand All @@ -492,7 +524,7 @@ public Mono<RecognizePiiEntitiesResultCollection> recognizePiiEntitiesBatch(
*/
@ServiceMethod(returns = ReturnType.SINGLE)
public Mono<Response<RecognizePiiEntitiesResultCollection>> recognizePiiEntitiesBatchWithResponse(
Iterable<TextDocumentInput> documents, TextAnalyticsRequestOptions options) {
Iterable<TextDocumentInput> documents, RecognizePiiEntityOptions options) {
return recognizePiiEntityAsyncClient.recognizePiiEntitiesBatch(documents, options);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import com.azure.ai.textanalytics.models.LinkedEntity;
import com.azure.ai.textanalytics.models.LinkedEntityCollection;
import com.azure.ai.textanalytics.models.PiiEntityCollection;
import com.azure.ai.textanalytics.models.RecognizePiiEntityOptions;
import com.azure.ai.textanalytics.models.TextAnalyticsError;
import com.azure.ai.textanalytics.models.TextAnalyticsException;
import com.azure.ai.textanalytics.models.TextAnalyticsRequestOptions;
Expand Down Expand Up @@ -233,7 +234,6 @@ public CategorizedEntityCollection recognizeEntities(String document) {
*/
@ServiceMethod(returns = ReturnType.SINGLE)
public CategorizedEntityCollection recognizeEntities(String document, String language) {
Objects.requireNonNull(document, "'document' cannot be null.");
return client.recognizeEntities(document, language).block();
}

Expand Down Expand Up @@ -344,10 +344,40 @@ public PiiEntityCollection recognizePiiEntities(String document) {
*/
@ServiceMethod(returns = ReturnType.SINGLE)
public PiiEntityCollection recognizePiiEntities(String document, String language) {
Objects.requireNonNull(document, "'document' cannot be null.");
return client.recognizePiiEntities(document, language).block();
}

/**
* Returns a list of Personally Identifiable Information(PII) entities in the provided document
* with provided language code.
*
* For a list of supported entity types, check: <a href="https://aka.ms/tanerpii">this</a>
* For a list of enabled languages, check: <a href="https://aka.ms/talangs">this</a>
*
* <p><strong>Code Sample</strong></p>
* <p>Recognizes the PII entities details in a document with a provided language code and
* RecognizePiiEntityOptions.</p>
*
* {@codesnippet com.azure.ai.textanalytics.TextAnalyticsClient.recognizePiiEntities#String-String-RecognizePiiEntityOptions}
*
* @param document The document to recognize PII entities details for.
* For text length limits, maximum batch size, and supported text encoding, see
* <a href="https://docs.microsoft.com/azure/cognitive-services/text-analytics/overview#data-limits">data limits</a>.
* @param language The 2 letter ISO 639-1 representation of language. If not set, uses "en" for English as default.
* @param options The additional configurable {@link RecognizePiiEntityOptions options} that may be passed when
* recognizing PII entities.
*
* @return The {@link PiiEntityCollection recognized PII entities collection}.
*
* @throws NullPointerException if {@code document} is null.
* @throws TextAnalyticsException if the response returned with an {@link TextAnalyticsError error}.
*/
@ServiceMethod(returns = ReturnType.SINGLE)
public PiiEntityCollection recognizePiiEntities(String document, String language,
RecognizePiiEntityOptions options) {
return client.recognizePiiEntities(document, language, options).block();
}

/**
* Returns a list of Personally Identifiable Information(PII) entities for the provided list of documents with
* provided language code and request options.
Expand All @@ -356,14 +386,14 @@ public PiiEntityCollection recognizePiiEntities(String document, String language
* <p>Recognizes the PII entities details in a list of documents with a provided language code
* and request options.</p>
*
* {@codesnippet com.azure.ai.textanalytics.TextAnalyticsClient.recognizePiiEntitiesBatch#Iterable-String-TextAnalyticsRequestOptions}
* {@codesnippet com.azure.ai.textanalytics.TextAnalyticsClient.recognizePiiEntitiesBatch#Iterable-String-RecognizePiiEntityOptions}
*
* @param documents A list of documents to recognize PII entities for.
* For text length limits, maximum batch size, and supported text encoding, see
* <a href="https://docs.microsoft.com/azure/cognitive-services/text-analytics/overview#data-limits">data limits</a>.
* @param language The 2 letter ISO 639-1 representation of language. If not set, uses "en" for English as default.
* @param options The {@link TextAnalyticsRequestOptions options} to configure the scoring model for documents
* and show statistics.
* @param options The additional configurable {@link RecognizePiiEntityOptions options} that may be passed when
* recognizing PII entities.
*
* @return A {@link RecognizePiiEntitiesResultCollection}.
*
Expand All @@ -372,7 +402,7 @@ public PiiEntityCollection recognizePiiEntities(String document, String language
*/
@ServiceMethod(returns = ReturnType.SINGLE)
public RecognizePiiEntitiesResultCollection recognizePiiEntitiesBatch(
Iterable<String> documents, String language, TextAnalyticsRequestOptions options) {
Iterable<String> documents, String language, RecognizePiiEntityOptions options) {
return client.recognizePiiEntitiesBatch(documents, language, options).block();
}

Expand All @@ -384,13 +414,13 @@ public RecognizePiiEntitiesResultCollection recognizePiiEntitiesBatch(
* <p>Recognizes the PII entities details with http response in a list of {@link TextDocumentInput document}
* with provided request options.</p>
*
* {@codesnippet com.azure.ai.textanalytics.TextAnalyticsClient.recognizePiiEntitiesBatch#Iterable-TextAnalyticsRequestOptions-Context}
* {@codesnippet com.azure.ai.textanalytics.TextAnalyticsClient.recognizePiiEntitiesBatch#Iterable-RecognizePiiEntityOptions-Context}
*
* @param documents A list of {@link TextDocumentInput documents} to recognize PII entities for.
* For text length limits, maximum batch size, and supported text encoding, see
* <a href="https://docs.microsoft.com/azure/cognitive-services/text-analytics/overview#data-limits">data limits</a>.
* @param options The {@link TextAnalyticsRequestOptions options} to configure the scoring model for documents
* and show statistics.
* @param options The additional configurable {@link RecognizePiiEntityOptions options} that may be passed when
* recognizing PII entities.
* @param context Additional context that is passed through the Http pipeline during the service call.
*
* @return A {@link Response} that contains a {@link RecognizePiiEntitiesResultCollection}.
Expand All @@ -400,7 +430,7 @@ public RecognizePiiEntitiesResultCollection recognizePiiEntitiesBatch(
*/
@ServiceMethod(returns = ReturnType.SINGLE)
public Response<RecognizePiiEntitiesResultCollection> recognizePiiEntitiesBatchWithResponse(
Iterable<TextDocumentInput> documents, TextAnalyticsRequestOptions options, Context context) {
Iterable<TextDocumentInput> documents, RecognizePiiEntityOptions options, Context context) {
return client.recognizePiiEntityAsyncClient.recognizePiiEntitiesBatchWithContext(documents, options,
context).block();
}
Expand Down
Loading

0 comments on commit eecf0ed

Please sign in to comment.