Skip to content

Commit

Permalink
Add matchLength-returning APIs to CompareInfo (#40752)
Browse files Browse the repository at this point in the history
  • Loading branch information
GrabYourPitchforks authored Aug 14, 2020
1 parent 1f36de1 commit baaf591
Show file tree
Hide file tree
Showing 17 changed files with 798 additions and 346 deletions.
4 changes: 2 additions & 2 deletions src/libraries/Common/src/Interop/Interop.Collation.cs
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,11 @@ internal static partial class Globalization

[DllImport(Libraries.GlobalizationNative, CharSet = CharSet.Unicode, EntryPoint = "GlobalizationNative_StartsWith")]
[return: MarshalAs(UnmanagedType.Bool)]
internal static extern unsafe bool StartsWith(IntPtr sortHandle, char* target, int cwTargetLength, char* source, int cwSourceLength, CompareOptions options);
internal static extern unsafe bool StartsWith(IntPtr sortHandle, char* target, int cwTargetLength, char* source, int cwSourceLength, CompareOptions options, int* matchedLength);

[DllImport(Libraries.GlobalizationNative, CharSet = CharSet.Unicode, EntryPoint = "GlobalizationNative_EndsWith")]
[return: MarshalAs(UnmanagedType.Bool)]
internal static extern unsafe bool EndsWith(IntPtr sortHandle, char* target, int cwTargetLength, char* source, int cwSourceLength, CompareOptions options);
internal static extern unsafe bool EndsWith(IntPtr sortHandle, char* target, int cwTargetLength, char* source, int cwSourceLength, CompareOptions options, int* matchedLength);

[DllImport(Libraries.GlobalizationNative, CharSet = CharSet.Unicode, EntryPoint = "GlobalizationNative_StartsWith")]
[return: MarshalAs(UnmanagedType.Bool)]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -492,7 +492,7 @@ int32_t GlobalizationNative_IndexOf(
result = GlobalizationNative_CompareString(pSortHandle, lpTarget, cwTargetLength, lpSource, cwSourceLength, options);
if (result == UCOL_EQUAL && pMatchedLength != NULL)
{
*pMatchedLength = cwTargetLength;
*pMatchedLength = cwSourceLength;
}

return (result == UCOL_EQUAL) ? 0 : -1;
Expand Down Expand Up @@ -551,7 +551,7 @@ int32_t GlobalizationNative_LastIndexOf(
result = GlobalizationNative_CompareString(pSortHandle, lpTarget, cwTargetLength, lpSource, cwSourceLength, options);
if (result == UCOL_EQUAL && pMatchedLength != NULL)
{
*pMatchedLength = cwTargetLength;
*pMatchedLength = cwSourceLength;
}

return (result == UCOL_EQUAL) ? 0 : -1;
Expand Down Expand Up @@ -689,13 +689,14 @@ static int32_t GetCollationElementMask(UColAttributeValue strength)
}
}

static int32_t inline SimpleAffix_Iterators(UCollationElements* pPatternIterator, UCollationElements* pSourceIterator, UColAttributeValue strength, int32_t forwardSearch)
static int32_t inline SimpleAffix_Iterators(UCollationElements* pPatternIterator, UCollationElements* pSourceIterator, UColAttributeValue strength, int32_t forwardSearch, int32_t* pCapturedOffset)
{
assert(strength >= UCOL_SECONDARY);

UErrorCode errorCode = U_ZERO_ERROR;
int32_t movePattern = TRUE, moveSource = TRUE;
int32_t patternElement = UCOL_IGNORABLE, sourceElement = UCOL_IGNORABLE;
int32_t capturedOffset = 0;

int32_t collationElementMask = GetCollationElementMask(strength);

Expand All @@ -707,6 +708,10 @@ static int32_t inline SimpleAffix_Iterators(UCollationElements* pPatternIterator
}
if (moveSource)
{
if (pCapturedOffset != NULL)
{
capturedOffset = ucol_getOffset(pSourceIterator); // need to capture offset before advancing iterator
}
sourceElement = forwardSearch ? ucol_next(pSourceIterator, &errorCode) : ucol_previous(pSourceIterator, &errorCode);
}
movePattern = TRUE; moveSource = TRUE;
Expand All @@ -715,19 +720,19 @@ static int32_t inline SimpleAffix_Iterators(UCollationElements* pPatternIterator
{
if (sourceElement == UCOL_NULLORDER)
{
return TRUE; // source is equal to pattern, we have reached both ends|beginnings at the same time
goto ReturnTrue; // source is equal to pattern, we have reached both ends|beginnings at the same time
}
else if (sourceElement == UCOL_IGNORABLE)
{
return TRUE; // the next|previous character in source is an ignorable character, an example: "o\u0000".StartsWith("o")
goto ReturnTrue; // the next|previous character in source is an ignorable character, an example: "o\u0000".StartsWith("o")
}
else if (forwardSearch && ((sourceElement & UCOL_PRIMARYORDERMASK) == 0) && (sourceElement & UCOL_SECONDARYORDERMASK) != 0)
{
return FALSE; // the next character in source text is a combining character, an example: "o\u0308".StartsWith("o")
}
else
{
return TRUE;
goto ReturnTrue;
}
}
else if (patternElement == UCOL_IGNORABLE)
Expand All @@ -743,9 +748,16 @@ static int32_t inline SimpleAffix_Iterators(UCollationElements* pPatternIterator
return FALSE;
}
}

ReturnTrue:
if (pCapturedOffset != NULL)
{
*pCapturedOffset = capturedOffset;
}
return TRUE;
}

static int32_t SimpleAffix(const UCollator* pCollator, UErrorCode* pErrorCode, const UChar* pPattern, int32_t patternLength, const UChar* pText, int32_t textLength, int32_t forwardSearch)
static int32_t SimpleAffix(const UCollator* pCollator, UErrorCode* pErrorCode, const UChar* pPattern, int32_t patternLength, const UChar* pText, int32_t textLength, int32_t forwardSearch, int32_t* pMatchedLength)
{
int32_t result = FALSE;

Expand All @@ -757,7 +769,15 @@ static int32_t SimpleAffix(const UCollator* pCollator, UErrorCode* pErrorCode, c
{
UColAttributeValue strength = ucol_getStrength(pCollator);

result = SimpleAffix_Iterators(pPatternIterator, pSourceIterator, strength, forwardSearch);
int32_t capturedOffset = 0;
result = SimpleAffix_Iterators(pPatternIterator, pSourceIterator, strength, forwardSearch, (pMatchedLength != NULL) ? &capturedOffset : NULL);

if (result && pMatchedLength != NULL)
{
// depending on whether we're searching forward or backward, the matching substring
// is [start of source string .. curIdx] or [curIdx .. end of source string]
*pMatchedLength = (forwardSearch) ? capturedOffset : (textLength - capturedOffset);
}

ucol_closeElements(pSourceIterator);
}
Expand All @@ -768,7 +788,7 @@ static int32_t SimpleAffix(const UCollator* pCollator, UErrorCode* pErrorCode, c
return result;
}

static int32_t ComplexStartsWith(const UCollator* pCollator, UErrorCode* pErrorCode, const UChar* pPattern, int32_t patternLength, const UChar* pText, int32_t textLength)
static int32_t ComplexStartsWith(const UCollator* pCollator, UErrorCode* pErrorCode, const UChar* pPattern, int32_t patternLength, const UChar* pText, int32_t textLength, int32_t* pMatchedLength)
{
int32_t result = FALSE;

Expand All @@ -786,6 +806,12 @@ static int32_t ComplexStartsWith(const UCollator* pCollator, UErrorCode* pErrorC
{
result = CanIgnoreAllCollationElements(pCollator, pText, idx);
}

if (result && pMatchedLength != NULL)
{
// adjust matched length to account for all the elements we implicitly consumed at beginning of string
*pMatchedLength = idx + usearch_getMatchedLength(pSearch);
}
}

usearch_close(pSearch);
Expand All @@ -803,9 +829,9 @@ int32_t GlobalizationNative_StartsWith(
int32_t cwTargetLength,
const UChar* lpSource,
int32_t cwSourceLength,
int32_t options)
int32_t options,
int32_t* pMatchedLength)
{

UErrorCode err = U_ZERO_ERROR;
const UCollator* pCollator = GetCollatorFromSortHandle(pSortHandle, options, &err);

Expand All @@ -815,15 +841,15 @@ int32_t GlobalizationNative_StartsWith(
}
else if (options > CompareOptionsIgnoreCase)
{
return ComplexStartsWith(pCollator, &err, lpTarget, cwTargetLength, lpSource, cwSourceLength);
return ComplexStartsWith(pCollator, &err, lpTarget, cwTargetLength, lpSource, cwSourceLength, pMatchedLength);
}
else
{
return SimpleAffix(pCollator, &err, lpTarget, cwTargetLength, lpSource, cwSourceLength, TRUE);
return SimpleAffix(pCollator, &err, lpTarget, cwTargetLength, lpSource, cwSourceLength, TRUE, pMatchedLength);
}
}

static int32_t ComplexEndsWith(const UCollator* pCollator, UErrorCode* pErrorCode, const UChar* pPattern, int32_t patternLength, const UChar* pText, int32_t textLength)
static int32_t ComplexEndsWith(const UCollator* pCollator, UErrorCode* pErrorCode, const UChar* pPattern, int32_t patternLength, const UChar* pText, int32_t textLength, int32_t* pMatchedLength)
{
int32_t result = FALSE;

Expand All @@ -846,6 +872,12 @@ static int32_t ComplexEndsWith(const UCollator* pCollator, UErrorCode* pErrorCod

result = CanIgnoreAllCollationElements(pCollator, pText + matchEnd, remainingStringLength);
}

if (result && pMatchedLength != NULL)
{
// adjust matched length to account for all the elements we implicitly consumed at end of string
*pMatchedLength = textLength - idx;
}
}

usearch_close(pSearch);
Expand All @@ -863,7 +895,8 @@ int32_t GlobalizationNative_EndsWith(
int32_t cwTargetLength,
const UChar* lpSource,
int32_t cwSourceLength,
int32_t options)
int32_t options,
int32_t* pMatchedLength)
{
UErrorCode err = U_ZERO_ERROR;
const UCollator* pCollator = GetCollatorFromSortHandle(pSortHandle, options, &err);
Expand All @@ -874,11 +907,11 @@ int32_t GlobalizationNative_EndsWith(
}
else if (options > CompareOptionsIgnoreCase)
{
return ComplexEndsWith(pCollator, &err, lpTarget, cwTargetLength, lpSource, cwSourceLength);
return ComplexEndsWith(pCollator, &err, lpTarget, cwTargetLength, lpSource, cwSourceLength, pMatchedLength);
}
else
{
return SimpleAffix(pCollator, &err, lpTarget, cwTargetLength, lpSource, cwSourceLength, FALSE);
return SimpleAffix(pCollator, &err, lpTarget, cwTargetLength, lpSource, cwSourceLength, FALSE, pMatchedLength);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,14 +50,16 @@ PALEXPORT int32_t GlobalizationNative_StartsWith(SortHandle* pSortHandle,
int32_t cwTargetLength,
const UChar* lpSource,
int32_t cwSourceLength,
int32_t options);
int32_t options,
int32_t* pMatchedLength);

PALEXPORT int32_t GlobalizationNative_EndsWith(SortHandle* pSortHandle,
const UChar* lpTarget,
int32_t cwTargetLength,
const UChar* lpSource,
int32_t cwSourceLength,
int32_t options);
int32_t options,
int32_t* pMatchedLength);

PALEXPORT int32_t GlobalizationNative_GetSortKey(SortHandle* pSortHandle,
const UChar* lpStr,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@
PER_FUNCTION_BLOCK(ucal_set, libicui18n) \
PER_FUNCTION_BLOCK(ucol_close, libicui18n) \
PER_FUNCTION_BLOCK(ucol_closeElements, libicui18n) \
PER_FUNCTION_BLOCK(ucol_getOffset, libicui18n) \
PER_FUNCTION_BLOCK(ucol_getRules, libicui18n) \
PER_FUNCTION_BLOCK(ucol_getSortKey, libicui18n) \
PER_FUNCTION_BLOCK(ucol_getStrength, libicui18n) \
Expand Down Expand Up @@ -199,6 +200,7 @@ FOR_ALL_ICU_FUNCTIONS
#define ucal_set(...) ucal_set_ptr(__VA_ARGS__)
#define ucol_close(...) ucol_close_ptr(__VA_ARGS__)
#define ucol_closeElements(...) ucol_closeElements_ptr(__VA_ARGS__)
#define ucol_getOffset(...) ucol_getOffset_ptr(__VA_ARGS__)
#define ucol_getRules(...) ucol_getRules_ptr(__VA_ARGS__)
#define ucol_getSortKey(...) ucol_getSortKey_ptr(__VA_ARGS__)
#define ucol_getStrength(...) ucol_getStrength_ptr(__VA_ARGS__)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -438,6 +438,7 @@ UCalendar * ucal_open(const UChar * zoneID, int32_t len, const char * locale, UC
void ucal_set(UCalendar * cal, UCalendarDateFields field, int32_t value);
void ucol_close(UCollator * coll);
void ucol_closeElements(UCollationElements * elems);
int32_t ucol_getOffset(const UCollationElements *elems);
const UChar * ucol_getRules(const UCollator * coll, int32_t * length);
int32_t ucol_getSortKey(const UCollator * coll, const UChar * source, int32_t sourceLength, uint8_t * result, int32_t resultLength);
UCollationStrength ucol_getStrength(const UCollator * coll);
Expand Down
Loading

0 comments on commit baaf591

Please sign in to comment.