Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(internet): userName, email and slugify return only ascii #1554

Merged
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
405b129
fix(internet): use a ascii-only fallback for email addresses in some …
matthewmayer Nov 13, 2022
4b137b4
fix(internet): use a ascii-only fallback for email addresses in some …
matthewmayer Nov 13, 2022
bf18f4a
fix(internet): use a ascii-only fallback for email addresses in some …
matthewmayer Nov 13, 2022
ddbb19a
fix(internet): use a ascii-only fallback for email addresses in some …
matthewmayer Nov 13, 2022
219173d
Update src/modules/internet/index.ts
Nov 17, 2022
8560c5d
Merge branch 'next' into chore/fallback-ascii-email-addresses
Nov 17, 2022
c37ffcd
fix(internet): use a ascii-only fallback for email addresses in some …
matthewmayer Nov 20, 2022
d4b958b
fix(internet): make faker.internet.userName return ASCII only
matthewmayer Nov 25, 2022
83cd519
fix(internet): make faker.internet.userName return ASCII only - and u…
matthewmayer Nov 25, 2022
225de8d
fix(internet): add faker.internet.displayName
matthewmayer Nov 25, 2022
5280dd0
fix(internet): add displayName tests, add Greek
matthewmayer Nov 25, 2022
95fb1d8
Merge branch 'next' into chore/fallback-ascii-email-addresses
Nov 25, 2022
f16a53c
fix(internet): fix deprecations
matthewmayer Nov 25, 2022
82cb254
fix(internet): fix lint
matthewmayer Nov 25, 2022
9e66225
fix(internet): new source and add arabic,farsi,armenian
matthewmayer Nov 25, 2022
b717385
Update src/modules/internet/index.ts
Nov 27, 2022
4b0ee08
Update src/modules/internet/index.ts
Nov 27, 2022
2e1b5f6
Update test/internet.spec.ts
Nov 27, 2022
5b2757a
fix(internet): improve tests and docs
matthewmayer Nov 27, 2022
4884f27
fix(internet): improve tests and docs
matthewmayer Nov 27, 2022
bc636c5
Merge branch 'next' into chore/fallback-ascii-email-addresses
ST-DDT Nov 27, 2022
3505045
Update src/modules/internet/char-mappings.ts
Nov 29, 2022
8657c47
Merge branch 'next' into chore/fallback-ascii-email-addresses
ST-DDT Nov 30, 2022
c9caafc
fix: comment fixes
matthewmayer Dec 1, 2022
cdb5d4f
Merge branch 'next' into chore/fallback-ascii-email-addresses
ST-DDT Dec 1, 2022
d461e30
Merge branch 'next' into chore/fallback-ascii-email-addresses
ST-DDT Dec 2, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions src/modules/helpers/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,10 @@ export class HelpersModule {
*/
slugify(string: string = ''): string {
return string
.replace(/ /g, '-')
.replace(/[^\一-龠\ぁ-ゔ\ァ-ヴー\w\.\-]+/g, '');
.normalize('NFKD') //for example è decomposes to as e + ̀
.replace(/[\u0300-\u036f]/g, '') // removes combining marks
.replace(/ /g, '-') // replaces spaces with hyphens
.replace(/[^\w\.\-]+/g, ''); // removes all non-word characters except for dots and hyphens
}

/**
Expand Down
101 changes: 96 additions & 5 deletions src/modules/internet/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -81,10 +81,7 @@ export class InternetModule {
this.faker.definitions.internet.free_email
);

let localPart: string = this.faker.helpers.slugify(
this.userName(firstName, lastName)
);

let localPart: string = this.userName(firstName, lastName);
if (options?.allowSpecialCharacters) {
const usernameChars: string[] = '._-'.split('');
const specialChars: string[] = ".!#$%&'*+-/=?^_`{|}~".split('');
Expand Down Expand Up @@ -125,7 +122,7 @@ export class InternetModule {
}

/**
* Generates a username using the given person's name as base.
* Generates a username using the given person's name as base. This will always return a plain ASCII string.
*
* @param firstName The optional first name to use. If not specified, a random one will be chosen.
* @param lastName The optional last name to use. If not specified, a random one will be chosen.
Expand Down Expand Up @@ -155,8 +152,102 @@ export class InternetModule {
])}${lastName}${this.faker.datatype.number(99)}`;
break;
}

ST-DDT marked this conversation as resolved.
Show resolved Hide resolved
//There may still be non-ascii characters in the result.
//First remove simple accents etc
result = result
.normalize('NFKD') //for example è decomposes to as e + ̀
.replace(/[\u0300-\u036f]/g, ''); // removes combining marks

//simple mapping for Cyrillic - FIXME we could also do this for some other simple alphabets like Greek and Thai
const mappings: { [key: string]: string } = {
Ё: 'YO',
Й: 'I',
Ц: 'TS',
У: 'U',
К: 'K',
Е: 'E',
Н: 'N',
Г: 'G',
Ш: 'SH',
Щ: 'SCH',
З: 'Z',
Х: 'H',
Ъ: "'",
ё: 'yo',
й: 'i',
ц: 'ts',
у: 'u',
к: 'k',
е: 'e',
н: 'n',
г: 'g',
ш: 'sh',
щ: 'sch',
з: 'z',
х: 'h',
ъ: "'",
Ф: 'F',
Ы: 'I',
В: 'V',
А: 'А',
П: 'P',
Р: 'R',
О: 'O',
Л: 'L',
Д: 'D',
Ж: 'ZH',
Э: 'E',
ф: 'f',
ы: 'i',
в: 'v',
а: 'a',
п: 'p',
р: 'r',
о: 'o',
л: 'l',
д: 'd',
ж: 'zh',
э: 'e',
Я: 'Ya',
Ч: 'CH',
С: 'S',
М: 'M',
И: 'I',
Т: 'T',
Ь: "'",
Б: 'B',
Ю: 'YU',
я: 'ya',
ч: 'ch',
с: 's',
м: 'm',
и: 'i',
т: 't',
ь: "'",
б: 'b',
ю: 'yu',
};
result = result
.split('')
.map(function (char) {
ST-DDT marked this conversation as resolved.
Show resolved Hide resolved
//if we have a mapping for this character, use it
if (mappings[char]) {
return mappings[char];
}
if (char.charCodeAt(0) < 0x80) {
//keep ascii characters
return char;
}
//return the hex value for Chinese, Japanese, Korean etc
return char.charCodeAt(0).toString(16);
})
.join('');

//remove spaces and '
ST-DDT marked this conversation as resolved.
Show resolved Hide resolved
result = result.toString().replace(/'/g, '');
result = result.replace(/ /g, '');

return result;
}

Expand Down
16 changes: 14 additions & 2 deletions test/helpers.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -176,9 +176,21 @@ describe('helpers', () => {
});

describe('slugify()', () => {
it('removes unwanted characters from URI string', () => {
expect(faker.helpers.slugify('Aiden.Harªann')).toBe('Aiden.Harann');
it('removes non-word characters from strings except . and -', () => {
expect(faker.helpers.slugify('foo bar')).toBe('foo-bar');
expect(faker.helpers.slugify('Faker is cool')).toBe('Faker-is-cool');
expect(faker.helpers.slugify('super*star')).toBe('superstar');
expect(faker.helpers.slugify("d'angelo.net")).toBe('dangelo.net');
expect(faker.helpers.slugify('hello你好')).toBe('hello');
});
});

describe('slugify()', () => {
matthewmayer marked this conversation as resolved.
Show resolved Hide resolved
it('strips simple diacritics from strings', () => {
expect(faker.helpers.slugify('Aiden.Harªann')).toBe('Aiden.Haraann');
expect(faker.helpers.slugify('Adèle.Argüello')).toBe(
'Adele.Arguello'
);
});
});

Expand Down
41 changes: 14 additions & 27 deletions test/internet.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,20 @@ describe('internet', () => {
expect(faker.definitions.internet.free_email).toContain(suffix);
});

it('should return a valid email in every locale', () => {
for (const locale of Object.keys(faker.locales)) {
faker.setLocale(locale);
const email = faker.internet.email();

expect(email).toBeTruthy();
expect(email).toBeTypeOf('string');
expect(email).toSatisfy(
validator.isEmail,
`locale: ${locale} has invalid email: ${email}`
);
}
});

it('should return an email with given firstName', () => {
const email = faker.internet.email('Aiden.Harann55');

Expand Down Expand Up @@ -130,19 +144,6 @@ describe('internet', () => {
expect(faker.definitions.internet.free_email).toContain(suffix);
});

it('should return an email with japanese characters', () => {
Shinigami92 marked this conversation as resolved.
Show resolved Hide resolved
const email = faker.internet.email('思源_唐3');

expect(email).toBeTruthy();
expect(email).toBeTypeOf('string');
expect(email).toSatisfy(validator.isEmail);

const [prefix, suffix] = email.split('@');

expect(prefix).toMatch(/^思源_唐3/);
expect(faker.definitions.internet.free_email).toContain(suffix);
});

it('should return an email with special characters', () => {
const email = faker.internet.email('Mike', 'Smith', null, {
allowSpecialCharacters: true,
Expand Down Expand Up @@ -203,20 +204,6 @@ describe('internet', () => {
expect(prefix).toMatch(/^Aiden([._]Harann)?\d*/);
});

it('should return an email with the example suffix and japanese characters', () => {
const email = faker.internet.exampleEmail('思源_唐3');

expect(email).toBeTruthy();
expect(email).toBeTypeOf('string');
expect(email).toSatisfy(validator.isEmail);

const [prefix, suffix] = email.split('@');

expect(suffix).toMatch(/^example\.(com|net|org)$/);
expect(faker.definitions.internet.example_email).toContain(suffix);
expect(prefix).toMatch(/^思源_唐3/);
});

it('should return an email with special characters', () => {
const email = faker.internet.exampleEmail('Mike', 'Smith', {
allowSpecialCharacters: true,
Expand Down