-
Notifications
You must be signed in to change notification settings - Fork 584
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
predefined pattern recognizer : IN_VEHICLE_REGISTRATION (#1288)
* IN_PAN pattern recognizer Added India PAN (Permanent Account Number) recognizer * refined IN_PAN regex refined the regex for better recognition and enhanced the test cases accordingly * Update recognizer_registry.py Fixed lint error that was missed earlier. * Fixed Lint errors Added test cases , verification and context data * Added more test cases in test_in_pan_recognizer.py Added negative test cases per review comments. * added IN_AADHAAR recognizer * Update in_aadhaar_recognizer.py linted code * Update in_aadhaar_recognizer.py update pattern recognizer value per suggestion in review * added utility function class added PresidioAnalyzerUtils class with generic functions. removed usage of stdnum * Create test_analyzer_utils.py added test cases for analyzer_utils.py in prescribed format * Update test_recognizer_registry.py added to the count of predefined recognizers * added predefined recognizer : IN_VEHICLE_REGISTRATION Added India specific predefined pattern recognizer for vehicle registration number * review comments incorporated reinstated python 3.9 compatibility, reorganized code * review comments incorporated Logic reverted from analyzer_utils to recognizer classfile * added null/min vehicle number size added min size check to avoid failures per review comment * incorporated review comments --------- Co-authored-by: Omri Mendels <omri374@users.noreply.github.com>
- Loading branch information
Showing
8 changed files
with
518 additions
and
11 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
403 changes: 403 additions & 0 deletions
403
...o-analyzer/presidio_analyzer/predefined_recognizers/in_vehicle_registration_recognizer.py
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
102 changes: 102 additions & 0 deletions
102
presidio-analyzer/tests/test_in_vehicle_registration_recognizer.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
import pytest | ||
|
||
from tests import assert_result | ||
from presidio_analyzer.predefined_recognizers import InVehicleRegistrationRecognizer | ||
|
||
|
||
@pytest.fixture(scope="module") | ||
def recognizer(): | ||
return InVehicleRegistrationRecognizer() | ||
|
||
|
||
@pytest.fixture(scope="module") | ||
def entities(): | ||
return ["IN_VEHICLE_REGISTRATION"] | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"text, expected_len, expected_position, expected_score", | ||
[ | ||
# fmt: off | ||
("KA53ME3456", 1, (0, 10), 1), | ||
("KA99ME3456", 1, (0, 10), 0.50), | ||
("MN2412", 1, (0, 6), 0.01), | ||
("MCX1243", 1, (0, 7), 0.2), | ||
("I15432", 1, (0, 6), 0.01), | ||
("DL3CJI0001", 1, (0, 10), 1), | ||
("ABNE123456", 0, (), (),), | ||
("My Bike's registration number is OD02BA2341 with a lot of text beyond", | ||
1, (33, 43), 1), | ||
# fmt: on | ||
], | ||
) | ||
def test_when_regn_in_text_then_all_regns_found( | ||
text, | ||
expected_len, | ||
expected_position, | ||
expected_score, | ||
recognizer, | ||
entities, | ||
): | ||
results = recognizer.analyze(text, entities) | ||
|
||
assert len(results) == expected_len | ||
if results: | ||
assert_result( | ||
results[0], | ||
entities[0], | ||
expected_position[0], | ||
expected_position[1], | ||
expected_score, | ||
) | ||
|
||
|
||
def test_list_length(): | ||
""" | ||
Tests for static counts of each metadata lists defined | ||
:return: True/False | ||
""" | ||
assert len(InVehicleRegistrationRecognizer.in_old_states) == 3 | ||
assert len(InVehicleRegistrationRecognizer.in_non_standard_state_or_ut) == 1 | ||
assert len(InVehicleRegistrationRecognizer.in_states) == 29 | ||
assert len(InVehicleRegistrationRecognizer.in_old_union_territories) == 2 | ||
assert len(InVehicleRegistrationRecognizer.in_vehicle_dist_wb) == 97 | ||
assert len(InVehicleRegistrationRecognizer.in_vehicle_dist_up) == 85 | ||
assert len(InVehicleRegistrationRecognizer.in_vehicle_dist_uk) == 20 | ||
assert len(InVehicleRegistrationRecognizer.in_vehicle_dist_ts) == 37 | ||
assert len(InVehicleRegistrationRecognizer.in_vehicle_dist_tr) == 8 | ||
assert len(InVehicleRegistrationRecognizer.in_vehicle_dist_tn) == 98 | ||
assert len(InVehicleRegistrationRecognizer.in_vehicle_dist_sk) == 8 | ||
assert len(InVehicleRegistrationRecognizer.in_vehicle_dist_rj) == 57 | ||
assert len(InVehicleRegistrationRecognizer.in_vehicle_dist_py) == 5 | ||
assert len(InVehicleRegistrationRecognizer.in_vehicle_dist_pb) == 98 | ||
assert len(InVehicleRegistrationRecognizer.in_vehicle_dist_or) == 30 | ||
assert len(InVehicleRegistrationRecognizer.in_vehicle_dist_od) == 34 | ||
assert len(InVehicleRegistrationRecognizer.in_vehicle_dist_nl) == 10 | ||
assert len(InVehicleRegistrationRecognizer.in_vehicle_dist_mz) == 8 | ||
assert len(InVehicleRegistrationRecognizer.in_vehicle_dist_mp) == 70 | ||
assert len(InVehicleRegistrationRecognizer.in_vehicle_dist_mn) == 7 | ||
assert len(InVehicleRegistrationRecognizer.in_vehicle_dist_ml) == 10 | ||
assert len(InVehicleRegistrationRecognizer.in_vehicle_dist_mh) == 50 | ||
assert len(InVehicleRegistrationRecognizer.in_vehicle_dist_ld) == 9 | ||
assert len(InVehicleRegistrationRecognizer.in_vehicle_dist_la) == 2 | ||
assert len(InVehicleRegistrationRecognizer.in_vehicle_dist_kl) == 98 | ||
assert len(InVehicleRegistrationRecognizer.in_vehicle_dist_ka) == 70 | ||
assert len(InVehicleRegistrationRecognizer.in_vehicle_dist_jh) == 23 | ||
assert len(InVehicleRegistrationRecognizer.in_vehicle_dist_hr) == 98 | ||
assert len(InVehicleRegistrationRecognizer.in_vehicle_dist_hp) == 98 | ||
assert len(InVehicleRegistrationRecognizer.in_vehicle_dist_gj) == 39 | ||
assert len(InVehicleRegistrationRecognizer.in_vehicle_dist_ga) == 12 | ||
assert len(InVehicleRegistrationRecognizer.in_vehicle_dist_dl) == 13 | ||
assert len(InVehicleRegistrationRecognizer.in_vehicle_dist_dn) == 1 | ||
assert len(InVehicleRegistrationRecognizer.in_vehicle_dist_dd) == 3 | ||
assert len(InVehicleRegistrationRecognizer.in_vehicle_dist_ch) == 4 | ||
assert len(InVehicleRegistrationRecognizer.in_vehicle_dist_cg) == 30 | ||
assert len(InVehicleRegistrationRecognizer.in_vehicle_dist_br) == 38 | ||
assert len(InVehicleRegistrationRecognizer.in_vehicle_dist_as) == 33 | ||
assert len(InVehicleRegistrationRecognizer.in_vehicle_dist_ar) == 20 | ||
assert len(InVehicleRegistrationRecognizer.in_vehicle_dist_ap) == 2 | ||
assert len(InVehicleRegistrationRecognizer.in_vehicle_dist_an) == 1 | ||
assert len(InVehicleRegistrationRecognizer.in_vehicle_diplomatic_codes) == 3 | ||
assert len(InVehicleRegistrationRecognizer.in_vehicle_armed_forces_codes) == 11 | ||
assert len(InVehicleRegistrationRecognizer.in_vehicle_foreign_mission_codes) == 41 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters