Skip to content

Commit

Permalink
Merge pull request #4349 from osmandapp/arabic_norm
Browse files Browse the repository at this point in the history
Arabic normalizer
  • Loading branch information
alex-osm authored Feb 4, 2025
2 parents a488913 + e8b04d0 commit 19c803f
Show file tree
Hide file tree
Showing 5 changed files with 130 additions and 0 deletions.
6 changes: 6 additions & 0 deletions OsmAnd.xcodeproj/project.pbxproj
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,7 @@
15CD7EA725B1C1A700BCB36A /* ic_small_time_start@2x.png in Resources */ = {isa = PBXBuildFile; fileRef = 15CD7EA425B1C1A700BCB36A /* ic_small_time_start@2x.png */; };
15CD7EAD25B1C1B900BCB36A /* ic_small_waypoints@3x.png in Resources */ = {isa = PBXBuildFile; fileRef = 15CD7EAA25B1C1B700BCB36A /* ic_small_waypoints@3x.png */; };
15CD7EAF25B1C1B900BCB36A /* ic_small_waypoints@2x.png in Resources */ = {isa = PBXBuildFile; fileRef = 15CD7EAC25B1C1B800BCB36A /* ic_small_waypoints@2x.png */; };
2C8E8A562D5104E600746A69 /* OAArabicNormalizer.mm in Sources */ = {isa = PBXBuildFile; fileRef = 2C8E8A552D5104E600746A69 /* OAArabicNormalizer.mm */; };
320076142BFC775100CDDDAF /* SpeedLimitWarningViewController.swift in Sources */ = {isa = PBXBuildFile; fileRef = 320076132BFC775100CDDDAF /* SpeedLimitWarningViewController.swift */; };
320427D62BF4D5250085DCA1 /* SpeedometerWidgetSettingsViewController.swift in Sources */ = {isa = PBXBuildFile; fileRef = 320427D52BF4D5250085DCA1 /* SpeedometerWidgetSettingsViewController.swift */; };
32048FC52A25E70200AA4B71 /* BaseOAuthHelper.swift in Sources */ = {isa = PBXBuildFile; fileRef = 32048FC42A25E70200AA4B71 /* BaseOAuthHelper.swift */; };
Expand Down Expand Up @@ -3474,6 +3475,8 @@
15CD7EAA25B1C1B700BCB36A /* ic_small_waypoints@3x.png */ = {isa = PBXFileReference; lastKnownFileType = image.png; name = "ic_small_waypoints@3x.png"; path = "Resources/Icons/ic_small_waypoints@3x.png"; sourceTree = "<group>"; };
15CD7EAC25B1C1B800BCB36A /* ic_small_waypoints@2x.png */ = {isa = PBXFileReference; lastKnownFileType = image.png; name = "ic_small_waypoints@2x.png"; path = "Resources/Icons/ic_small_waypoints@2x.png"; sourceTree = "<group>"; };
2503BB823D105783DC4982C8 /* Pods-OsmAnd Maps.debug.xcconfig */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = text.xcconfig; name = "Pods-OsmAnd Maps.debug.xcconfig"; path = "Target Support Files/Pods-OsmAnd Maps/Pods-OsmAnd Maps.debug.xcconfig"; sourceTree = "<group>"; };
2C8E8A542D5104E600746A69 /* OAArabicNormalizer.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = OAArabicNormalizer.h; sourceTree = "<group>"; };
2C8E8A552D5104E600746A69 /* OAArabicNormalizer.mm */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.objcpp; path = OAArabicNormalizer.mm; sourceTree = "<group>"; };
320076132BFC775100CDDDAF /* SpeedLimitWarningViewController.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SpeedLimitWarningViewController.swift; sourceTree = "<group>"; };
320427D52BF4D5250085DCA1 /* SpeedometerWidgetSettingsViewController.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SpeedometerWidgetSettingsViewController.swift; sourceTree = "<group>"; };
32048FC42A25E70200AA4B71 /* BaseOAuthHelper.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = BaseOAuthHelper.swift; sourceTree = "<group>"; };
Expand Down Expand Up @@ -13075,6 +13078,8 @@
DA5A808E26C563A500F274C7 /* Helpers */ = {
isa = PBXGroup;
children = (
2C8E8A542D5104E600746A69 /* OAArabicNormalizer.h */,
2C8E8A552D5104E600746A69 /* OAArabicNormalizer.mm */,
FA5D71E02CE24C4E0062BC4D /* LockHelper.swift */,
FA45851C2C946046003A5AD7 /* SharedLibHelpers */,
32AB48672C9C50A1005EF1D4 /* DownloadingListHelper */,
Expand Down Expand Up @@ -16252,6 +16257,7 @@
4656BD362C4855D200B69928 /* ColorizationType.swift in Sources */,
DA5A816026C563A700F274C7 /* OAParkingPositionPlugin.mm in Sources */,
DA5A816826C563A700F274C7 /* OAWikiImageCard.mm in Sources */,
2C8E8A562D5104E600746A69 /* OAArabicNormalizer.mm in Sources */,
DA5A843226C563A800F274C7 /* OATransportDetailsTableViewController.mm in Sources */,
DAC849B426CF967C00018091 /* OADownloadMapWidget.mm in Sources */,
322B53952C78DD14006B48B0 /* OAIconsPaletteCell.m in Sources */,
Expand Down
8 changes: 8 additions & 0 deletions Sources/Common/OACollatorStringMatcher.m
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

#import "OACollatorStringMatcher.h"
#import "OAUtilities.h"
#import "OAArabicNormalizer.h"

static NSStringCompareOptions comparisonOptions = NSCaseInsensitiveSearch | NSWidthInsensitiveSearch | NSDiacriticInsensitiveSearch;

Expand Down Expand Up @@ -46,6 +47,13 @@ - (BOOL) matches:(NSString *)name

+ (BOOL) cmatches:(NSString *)fullName part:(NSString *)part mode:(StringMatcherMode)mode
{
if ([OAArabicNormalizer isSpecialArabic:fullName]) {
fullName = [OAArabicNormalizer normalize:fullName];
}

if ([OAArabicNormalizer isSpecialArabic:part]) {
part = [OAArabicNormalizer normalize:part];
}
switch (mode)
{
case CHECK_CONTAINS:
Expand Down
8 changes: 8 additions & 0 deletions Sources/Helpers/OAArabicNormalizer.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#import <Foundation/Foundation.h>

@interface OAArabicNormalizer : NSObject

+ (BOOL)isSpecialArabic:(NSString *)text;
+ (NSString *)normalize:(NSString *)text;

@end
100 changes: 100 additions & 0 deletions Sources/Helpers/OAArabicNormalizer.mm
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
#import "OAArabicNormalizer.h"

@implementation OAArabicNormalizer

static NSRegularExpression *diacriticRegex;
static NSString *const kArabicDigits = @"٠١٢٣٤٥٦٧٨٩";
static NSString *const kDigitsReplacement = @"0123456789";
static NSString *const kKashida = @"\u0640";

+ (void)initialize {
NSError *error = nil;
diacriticRegex = [NSRegularExpression regularExpressionWithPattern:@"[\u064B-\u0652]"
options:0
error:&error];
if (error) {
NSLog(@"Error initializing regex: %@", error.localizedDescription);
}
}

+ (BOOL)isSpecialArabic:(NSString *)text {
if (text == nil || text.length == 0) {
return NO;
}

unichar firstChar = [text characterAtIndex:0];
if ([self isArabicCharacter:firstChar]) {
for (NSUInteger i = 0; i < text.length; i++) {
unichar c = [text characterAtIndex:i];
if ([self isDiacritic:c] || [self isArabicDigit:c] || [self isKashida:c]) {
return YES;
}
}
}

return NO;
}

+ (NSString *)normalize:(NSString *)text {
if (text == nil || text.length == 0) {
return text;
}

// Remove diacritics
NSMutableString *result = [NSMutableString stringWithString:text];
result = [[diacriticRegex stringByReplacingMatchesInString:result
options:0
range:NSMakeRange(0, result.length)
withTemplate:@""] mutableCopy];

// Remove Kashida
[result replaceOccurrencesOfString:kKashida
withString:@""
options:0
range:NSMakeRange(0, result.length)];

return [self replaceDigits:result];
}

+ (NSString *)replaceDigits:(NSString *)text {
if (text == nil || text.length == 0) {
return nil;
}

unichar firstChar = [text characterAtIndex:0];
if (![self isArabicCharacter:firstChar]) {
return text;
}

NSMutableString *mutableText = [text mutableCopy];
for (NSUInteger i = 0; i < kArabicDigits.length; i++) {
unichar arabicDigit = [kArabicDigits characterAtIndex:i];
NSString *replacement = [NSString stringWithFormat:@"%c", [kDigitsReplacement characterAtIndex:i]];
NSString *arabicDigitStr = [NSString stringWithFormat:@"%C", arabicDigit];

[mutableText replaceOccurrencesOfString:arabicDigitStr
withString:replacement
options:0
range:NSMakeRange(0, mutableText.length)];
}

return mutableText;
}

+ (BOOL)isDiacritic:(unichar)c {
return (c >= 0x064B && c <= 0x0652);
}

+ (BOOL)isArabicDigit:(unichar)c {
return (c >= 0x0660 && c <= 0x0669);
}

+ (BOOL)isKashida:(unichar)c {
return (c == 0x0640);
}

+ (BOOL)isArabicCharacter:(unichar)c {
return (c >= 0x0600 && c <= 0x06FF);
}

@end
8 changes: 8 additions & 0 deletions Sources/Search/OASearchCoreFactory.mm
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
#import "OAResultMatcher.h"
#import "OATopIndexFilter.h"
#import "OACollatorStringMatcher.h"
#import "OAArabicNormalizer.h"

#include <OsmAndCore.h>
#include <OsmAndCore/IObfsCollection.h>
Expand Down Expand Up @@ -478,6 +479,10 @@ - (void) searchByName:(OASearchPhrase *)phrase resultMatcher:(OASearchResultMatc
if ([phrase getRadiusLevel] > 1 || [phrase getUnknownWordToSearch].length > 3 || [phrase hasMoreThanOneUnknownSearchWord] || [phrase isSearchTypeAllowed:POSTCODE exclusive:YES])
{
NSString *wordToSearch = [phrase getUnknownWordToSearch];
if ([OAArabicNormalizer isSpecialArabic:wordToSearch]) {
wordToSearch = [OAArabicNormalizer normalize:wordToSearch];
}

if (wordToSearch.length == 0)
return;

Expand Down Expand Up @@ -721,6 +726,9 @@ - (BOOL) search:(OASearchPhrase *)phrase resultMatcher:(OASearchResultMatcher *)
NSMutableSet<NSString *> *ids = [NSMutableSet new];

NSString *searchWord = [phrase getUnknownWordToSearch];
if ([OAArabicNormalizer isSpecialArabic:searchWord]) {
searchWord = [OAArabicNormalizer normalize:searchWord];
}
OANameStringMatcher *nm = [phrase getMainUnknownNameStringMatcher];

QuadRect *bbox = [phrase getFileId] != nil ? [phrase getRadiusBBox31ToSearch:BBOX_RADIUS_POI_IN_CITY] : [phrase getRadiusBBox31ToSearch:BBOX_RADIUS_INSIDE];
Expand Down

0 comments on commit 19c803f

Please sign in to comment.