Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added logic to properly treat underscores, dashes and spaces. Fixes #318 #341

Merged
merged 7 commits into from
Oct 29, 2014
10 changes: 10 additions & 0 deletions src/Humanizer.Tests/StringHumanizeTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ public class StringHumanizeTests
[InlineData("10IsInTheBegining", "10 is in the begining")]
[InlineData("NumberIsAtTheEnd100", "Number is at the end 100")]
[InlineData("XIsFirstWordInTheSentence", "X is first word in the sentence")]
[InlineData("XIsFirstWordInTheSentence ThenThereIsASpace", "X is first word in the sentence then there is a space")]
public void CanHumanizeStringInPascalCase(string input, string expectedResult)
{
Assert.Equal(expectedResult, input.Humanize());
Expand All @@ -19,6 +20,14 @@ public void CanHumanizeStringInPascalCase(string input, string expectedResult)
[Theory]
[InlineData("Underscored_input_string_is_turned_into_sentence", "Underscored input string is turned into sentence")]
[InlineData("Underscored_input_String_is_turned_INTO_sentence", "Underscored input String is turned INTO sentence")]
[InlineData("TEST 1 - THIS IS A TEST", "TEST 1 THIS IS A TEST")]
[InlineData("TEST 1 -THIS IS A TEST", "TEST 1 THIS IS A TEST")]
[InlineData("TEST 1- THIS IS A TEST", "TEST 1 THIS IS A TEST")]
[InlineData("TEST 1_ THIS IS A TEST", "TEST 1 THIS IS A TEST")]
[InlineData("TEST 1 _THIS IS A TEST", "TEST 1 THIS IS A TEST")]
[InlineData("TEST 1 _ THIS IS A TEST", "TEST 1 THIS IS A TEST")]
[InlineData("TEST 1 - THIS_IS_A_TEST", "TEST 1 THIS IS A TEST")]
[InlineData("TEST 1 - THIS is A Test", "TEST 1 THIS is A test")]
public void CanHumanizeStringWithUnderscores(string input, string expectedReseult)
{
Assert.Equal(expectedReseult, input.Humanize());
Expand All @@ -28,6 +37,7 @@ public void CanHumanizeStringWithUnderscores(string input, string expectedReseul
[InlineData("HTML", "HTML")]
[InlineData("TheHTMLLanguage", "The HTML language")]
[InlineData("HTMLIsTheLanguage", "HTML is the language")]
[InlineData("TheLanguage IsHTML", "The language is HTML")]
[InlineData("TheLanguageIsHTML", "The language is HTML")]
[InlineData("HTML5", "HTML 5")]
[InlineData("1HTML", "1 HTML")]
Expand Down
16 changes: 12 additions & 4 deletions src/Humanizer/StringHumanizeExtensions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,17 @@ static string FromPascalCase(string input)
(?# acronym to number)
(?<=[A-Z])(?=[0-9])|
(?# acronym to word)
(?<=[A-Z])(?=[A-Z][a-z])
(?<=[A-Z])(?=[A-Z][a-z])|
(?# words/acronyms/numbers separated by space)
(?<=[^\s])(?=[\s])
", RegexOptions.IgnorePatternWhitespace);

var result = pascalCaseWordBoundaryRegex
.Split(input)
.Select(word =>
word.ToCharArray().All(Char.IsUpper) && word.Length > 1
? word
: word.ToLower())
word.Trim().ToCharArray().All(Char.IsUpper) && word.Length > 1
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In consideration of performance, can you do the Trim() once.

I know you didn't introduced it, but can you also remove ToCharArray(), it should work without it anyway.

? word.Trim()
: word.Trim().ToLower())
.Aggregate((res, word) => res + " " + word);

result = Char.ToUpper(result[0]) +
Expand All @@ -51,6 +53,12 @@ public static string Humanize(this string input)
if (input.ToCharArray().All(Char.IsUpper))
return input;

// if input contains a dash or hyphen which preceeds or follows a space (or both, i.g. free-standing)
// remove the dash/hyphen and run it through FromPascalCase
Regex r = new Regex(@"[\s]{1}[-_][\s]{0}|[\s]{0}[-_][\s]{1}", RegexOptions.IgnoreCase);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can remove [\s]{0} matches, as they in this case don't affect the functionality at all.

if (r.IsMatch(input))
return FromPascalCase(FromUnderscoreDashSeparatedWords(input));

if (input.Contains("_") || input.Contains("-"))
return FromUnderscoreDashSeparatedWords(input);

Expand Down