From 92a3c3a85a26f53227401fa2b80ccf633ef8546d Mon Sep 17 00:00:00 2001 From: Caleb Date: Fri, 23 Feb 2018 15:52:50 -0700 Subject: [PATCH 01/13] Add config option to strip Unicode from entry filenames. --- package.json | 1 + src/backends/backend.js | 12 +++-- src/lib/__tests__/urlHelper.spec.js | 44 +++++++++++++------ src/lib/urlHelper.js | 24 +++++++--- .../content/docs/configuration-options.md | 6 +++ yarn.lock | 4 ++ 6 files changed, 68 insertions(+), 23 deletions(-) diff --git a/package.json b/package.json index 411396bc65fc..a3b966449f58 100644 --- a/package.json +++ b/package.json @@ -130,6 +130,7 @@ "dependencies": { "classnames": "^2.2.5", "create-react-class": "^15.6.0", + "diacritics": "^1.3.0", "fuzzy": "^0.1.1", "gotrue-js": "^0.9.15", "gray-matter": "^3.0.6", diff --git a/src/backends/backend.js b/src/backends/backend.js index d6e633d074fc..a6ca0340946a 100644 --- a/src/backends/backend.js +++ b/src/backends/backend.js @@ -11,6 +11,7 @@ import { } from "Reducers/collections"; import { createEntry } from "ValueObjects/Entry"; import { sanitizeSlug } from "Lib/urlHelper"; +import diacritics from 'diacritics'; import TestRepoBackend from "./test-repo/implementation"; import GitHubBackend from "./github/implementation"; import GitGatewayBackend from "./git-gateway/implementation"; @@ -41,7 +42,7 @@ class LocalStorageAuthStore { } } -const slugFormatter = (template = "{{slug}}", entryData) => { +const slugFormatter = (template = "{{slug}}", entryData, slugType) => { const date = new Date(); const getIdentifier = (entryData) => { @@ -79,7 +80,12 @@ const slugFormatter = (template = "{{slug}}", entryData) => { // Replace periods and spaces with dashes. .replace(/[.\s]/g, '-'); - return sanitizeSlug(slug); + if (slugType === "latin") { + const latinSlug = diacritics.remove(slug); + return sanitizeSlug(latinSlug, { slugType: "ascii" }); + } + + return sanitizeSlug(slug, { slugType }); }; class Backend { @@ -242,7 +248,7 @@ class Backend { if (!selectAllowNewEntries(collection)) { throw (new Error("Not allowed to create new entries in this collection")); } - const slug = slugFormatter(collection.get("slug"), entryDraft.getIn(["entry", "data"])); + const slug = slugFormatter(collection.get("slug"), entryDraft.getIn(["entry", "data"]), config.get("slug_type")); const path = selectEntryPath(collection, slug); entryObj = { path, diff --git a/src/lib/__tests__/urlHelper.spec.js b/src/lib/__tests__/urlHelper.spec.js index 13ebb7dc560a..75dabceb94b3 100644 --- a/src/lib/__tests__/urlHelper.spec.js +++ b/src/lib/__tests__/urlHelper.spec.js @@ -1,52 +1,58 @@ -import { sanitizeIRI, sanitizeSlug } from '../urlHelper'; +import { sanitizeURI, sanitizeSlug } from '../urlHelper'; -describe('sanitizeIRI', () => { - // `sanitizeIRI` tests from RFC 3987 +describe('sanitizeURI', () => { + // `sanitizeURI` tests from RFC 3987 it('should keep valid URI chars (letters digits _ - . ~)', () => { expect( - sanitizeIRI("This, that-one_or.the~other 123!") + sanitizeURI("This, that-one_or.the~other 123!") ).toEqual('Thisthat-one_or.the~other123'); }); it('should not remove accents', () => { expect( - sanitizeIRI("ěščřžý") + sanitizeURI("ěščřžý") ).toEqual('ěščřžý'); }); it('should keep valid non-latin chars (ucschars in RFC 3987)', () => { expect( - sanitizeIRI("日本語のタイトル") + sanitizeURI("日本語のタイトル") ).toEqual('日本語のタイトル'); }); + it('should not keep valid non-latin chars (ucschars in RFC 3987) if set to ASCII mode', () => { + expect( + sanitizeURI("ěščřžý日本語のタイトル", { type: 'ascii' }) + ).toEqual(''); + }); + it('should not normalize Unicode strings', () => { expect( - sanitizeIRI('\u017F\u0323\u0307') + sanitizeURI('\u017F\u0323\u0307') ).toEqual('\u017F\u0323\u0307'); expect( - sanitizeIRI('\u017F\u0323\u0307') + sanitizeURI('\u017F\u0323\u0307') ).not.toEqual('\u1E9B\u0323'); }); it('should allow a custom replacement character', () => { expect( - sanitizeIRI("duck\\goose.elephant", { replacement: '-' }) + sanitizeURI("duck\\goose.elephant", { replacement: '-' }) ).toEqual('duck-goose.elephant'); }); it('should not allow an improper replacement character', () => { expect(() => { - sanitizeIRI("I! like! dollars!", { replacement: '$' }); + sanitizeURI("I! like! dollars!", { replacement: '$' }); }).toThrow(); }); it('should not actually URI-encode the characters', () => { expect( - sanitizeIRI("🎉") + sanitizeURI("🎉") ).toEqual('🎉'); expect( - sanitizeIRI("🎉") + sanitizeURI("🎉") ).not.toEqual("%F0%9F%8E%89"); }); }); @@ -81,6 +87,18 @@ describe('sanitizeSlug', ()=> { ).toEqual('This-that-one_or.the~other-123'); }); + it('should remove accents in "latin" mode', () => { + expect( + sanitizeSlug("ěščřžý", { slugType: 'latin' }) + ).toEqual('escrzy'); + }); + + it('should remove non-latin chars in "latin" mode', () => { + expect( + sanitizeSlug("ěščřžý日本語のタイトル", { slugType: 'latin' }) + ).toEqual('escrzy'); + }); + it('removes double replacements', () => { expect(sanitizeSlug('test--test')).toEqual('test-test'); expect(sanitizeSlug('test test')).toEqual('test-test'); @@ -94,4 +112,4 @@ describe('sanitizeSlug', ()=> { expect(sanitizeSlug('test test ', { replacement: '_' })).toEqual('test_test'); }); -}); \ No newline at end of file +}); diff --git a/src/lib/urlHelper.js b/src/lib/urlHelper.js index 9b860e3be95b..fd41c9a90cdb 100644 --- a/src/lib/urlHelper.js +++ b/src/lib/urlHelper.js @@ -34,27 +34,37 @@ export function stripProtocol(url) { */ const uriChars = /[\w\-.~]/i; const ucsChars = /[\xA0-\u{D7FF}\u{F900}-\u{FDCF}\u{FDF0}-\u{FFEF}\u{10000}-\u{1FFFD}\u{20000}-\u{2FFFD}\u{30000}-\u{3FFFD}\u{40000}-\u{4FFFD}\u{50000}-\u{5FFFD}\u{60000}-\u{6FFFD}\u{70000}-\u{7FFFD}\u{80000}-\u{8FFFD}\u{90000}-\u{9FFFD}\u{A0000}-\u{AFFFD}\u{B0000}-\u{BFFFD}\u{C0000}-\u{CFFFD}\u{D0000}-\u{DFFFD}\u{E1000}-\u{EFFFD}]/u; +const validURIChar = (char) => (uriChars.test(char)); const validIRIChar = (char) => (uriChars.test(char) || ucsChars.test(char)); -// `sanitizeIRI` does not actually URI-encode the chars (that is the browser's and server's job), just removes the ones that are not allowed. -export function sanitizeIRI(str, { replacement = "" } = {}) { +// `sanitizeURI` does not actually URI-encode the chars (that is the browser's and server's job), just removes the ones that are not allowed. +export function sanitizeURI(str, { replacement = "", type = "iri" } = {}) { if (!isString(str)) throw "The input slug must be a string."; if (!isString(replacement)) throw "`options.replacement` must be a string."; + + let validChar; + if (type === "iri") { + validChar = validIRIChar; + } else if (type === "ascii") { + validChar = validURIChar; + } else { + throw '`options.type` must be "iri" or "ascii".'; + } // Check and make sure the replacement character is actually a safe char itself. - if (!Array.from(replacement).every(validIRIChar)) throw "The replacement character(s) (options.replacement) is itself unsafe."; + if (!Array.from(replacement).every(validChar)) throw "The replacement character(s) (options.replacement) is itself unsafe."; // `Array.from` must be used instead of `String.split` because // `split` converts things like emojis into UTF-16 surrogate pairs. - return Array.from(str).map(char => (validIRIChar(char) ? char : replacement)).join(''); + return Array.from(str).map(char => (validChar(char) ? char : replacement)).join(''); } -export function sanitizeSlug(str, { replacement = '-' } = {}) { +export function sanitizeSlug(str, { replacement = '-', slugType } = {}) { if (!isString(str)) throw "The input slug must be a string."; if (!isString(replacement)) throw "`options.replacement` must be a string."; - // Sanitize as IRI (i18n URI) and as filename. + // Sanitize as URI and as filename. const sanitize = flow([ - partialRight(sanitizeIRI, { replacement }), + partialRight(sanitizeURI, { replacement, type: slugType }), partialRight(sanitizeFilename, { replacement }), ]); const sanitizedSlug = sanitize(str); diff --git a/website/site/content/docs/configuration-options.md b/website/site/content/docs/configuration-options.md index fe844b7a82af..1ab083b91955 100644 --- a/website/site/content/docs/configuration-options.md +++ b/website/site/content/docs/configuration-options.md @@ -60,6 +60,12 @@ public_folder: "/images/uploads" Based on the settings above, if a user used an image widget field called `avatar` to upload and select an image called `philosoraptor.png`, the image would be saved to the repository at `/static/images/uploads/philosoraptor.png`, and the `avatar` field for the file would be set to `/images/uploads/philosoraptor.png`. +## Slug Type + +By default, filenames (slugs) for entries created in the CMS are sanitized according to RFC3987 and the WHATWG URL spec. This spec allows non-ASCII (or non-Latin) characters to exist in URLs. However, for maximum compatibility, you can also set a different slugification option: +- `iri` (default): Keeps Unicode characters in slugs, according the the IRI draft spec (RFC3987) and the WHATWG URL spec. +- `latin`: Removes accents/diacritics from slug, then strips out all non-valid URL characters and periods (see `ascii` below). +- `ascii`: Strips out all characters except valid URI chars (RFC3986) or periods (0-9, a-z, A-Z, `_`, `-`, `~`). ## Collections diff --git a/yarn.lock b/yarn.lock index 43bd0a379cca..71f89ee67bd2 100644 --- a/yarn.lock +++ b/yarn.lock @@ -2333,6 +2333,10 @@ detect-node@^2.0.3: version "2.0.3" resolved "https://registry.yarnpkg.com/detect-node/-/detect-node-2.0.3.tgz#a2033c09cc8e158d37748fbde7507832bd6ce127" +diacritics@^1.3.0: + version "1.3.0" + resolved "https://registry.yarnpkg.com/diacritics/-/diacritics-1.3.0.tgz#3efa87323ebb863e6696cebb0082d48ff3d6f7a1" + diff@^3.2.0: version "3.4.0" resolved "https://registry.yarnpkg.com/diff/-/diff-3.4.0.tgz#b1d85507daf3964828de54b37d0d73ba67dda56c" From 8403b9aca1749760f4a382fa0020c5be2ce2ceec Mon Sep 17 00:00:00 2001 From: Caleb Date: Fri, 23 Feb 2018 16:09:00 -0700 Subject: [PATCH 02/13] Remove unneeded code (in sanitizeSlug). --- src/backends/backend.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/backends/backend.js b/src/backends/backend.js index a6ca0340946a..7c730c87a9ca 100644 --- a/src/backends/backend.js +++ b/src/backends/backend.js @@ -77,8 +77,8 @@ const slugFormatter = (template = "{{slug}}", entryData, slugType) => { // Convert slug to lower-case .toLocaleLowerCase() - // Replace periods and spaces with dashes. - .replace(/[.\s]/g, '-'); + // Replace periods with dashes. + .replace(/[.]/g, '-'); if (slugType === "latin") { const latinSlug = diacritics.remove(slug); From 7ab0664bbe9501f63114f86e77af5137a6d45112 Mon Sep 17 00:00:00 2001 From: Caleb Date: Fri, 23 Feb 2018 16:22:03 -0700 Subject: [PATCH 03/13] Slugify media files. --- src/actions/mediaLibrary.js | 6 ++++-- src/backends/backend.js | 6 ------ src/lib/urlHelper.js | 16 ++++++++++++++-- 3 files changed, 18 insertions(+), 10 deletions(-) diff --git a/src/actions/mediaLibrary.js b/src/actions/mediaLibrary.js index 4ba512a53e3f..2af3386779f9 100644 --- a/src/actions/mediaLibrary.js +++ b/src/actions/mediaLibrary.js @@ -4,6 +4,7 @@ import { createAssetProxy } from 'ValueObjects/AssetProxy'; import { getAsset, selectIntegration } from 'Reducers'; import { getIntegrationProvider } from 'Integrations'; import { addAsset } from './media'; +import { sanitizeSlug } from "Lib/urlHelper"; const { notifSend } = notifActions; @@ -79,7 +80,8 @@ export function persistMedia(file, opts = {}) { const backend = currentBackend(state.config); const integration = selectIntegration(state, null, 'assetStore'); const files = state.mediaLibrary.get('files'); - const existingFile = files.find(existingFile => existingFile.name.toLowerCase() === file.name.toLowerCase()); + const fileName = sanitizeSlug(file.name.toLowerCase(), { slugType: state.config.get('slug_type') }); + const existingFile = files.find(existingFile => existingFile.name.toLowerCase() === fileName); /** * Check for existing files of the same name before persisting. If no asset @@ -98,7 +100,7 @@ export function persistMedia(file, opts = {}) { dispatch(mediaPersisting()); try { - const assetProxy = await createAssetProxy(file.name.toLowerCase(), file, false, privateUpload); + const assetProxy = await createAssetProxy(fileName, file, false, privateUpload); dispatch(addAsset(assetProxy)); if (!integration) { const asset = await backend.persistMedia(assetProxy); diff --git a/src/backends/backend.js b/src/backends/backend.js index 7c730c87a9ca..72e1b564c5ce 100644 --- a/src/backends/backend.js +++ b/src/backends/backend.js @@ -11,7 +11,6 @@ import { } from "Reducers/collections"; import { createEntry } from "ValueObjects/Entry"; import { sanitizeSlug } from "Lib/urlHelper"; -import diacritics from 'diacritics'; import TestRepoBackend from "./test-repo/implementation"; import GitHubBackend from "./github/implementation"; import GitGatewayBackend from "./git-gateway/implementation"; @@ -80,11 +79,6 @@ const slugFormatter = (template = "{{slug}}", entryData, slugType) => { // Replace periods with dashes. .replace(/[.]/g, '-'); - if (slugType === "latin") { - const latinSlug = diacritics.remove(slug); - return sanitizeSlug(latinSlug, { slugType: "ascii" }); - } - return sanitizeSlug(slug, { slugType }); }; diff --git a/src/lib/urlHelper.js b/src/lib/urlHelper.js index fd41c9a90cdb..a13d555f6e5d 100644 --- a/src/lib/urlHelper.js +++ b/src/lib/urlHelper.js @@ -1,4 +1,5 @@ import url from 'url'; +import diacritics from 'diacritics'; import sanitizeFilename from 'sanitize-filename'; import { isString, escapeRegExp, flow, partialRight } from 'lodash'; @@ -61,12 +62,23 @@ export function sanitizeURI(str, { replacement = "", type = "iri" } = {}) { export function sanitizeSlug(str, { replacement = '-', slugType } = {}) { if (!isString(str)) throw "The input slug must be a string."; if (!isString(replacement)) throw "`options.replacement` must be a string."; - + // Sanitize as URI and as filename. - const sanitize = flow([ + let sanitize = flow([ partialRight(sanitizeURI, { replacement, type: slugType }), partialRight(sanitizeFilename, { replacement }), ]); + + // For `latin` slug type, strip diacritics and use ASCII URL. + if (slugType === "latin") { + sanitize = flow([ + diacritics.remove, + partialRight(sanitizeURI, { replacement, type: 'ascii' }), + partialRight(sanitizeFilename, { replacement }), + ]); + } + + // Run sanitizers. const sanitizedSlug = sanitize(str); // Remove any doubled or trailing replacement characters (that were added in the sanitizers). From e6e40d19039f9f090c4efa2cd1811b518256aef9 Mon Sep 17 00:00:00 2001 From: Caleb Date: Fri, 23 Feb 2018 17:45:38 -0700 Subject: [PATCH 04/13] Update configuration-options.md --- website/site/content/docs/configuration-options.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/website/site/content/docs/configuration-options.md b/website/site/content/docs/configuration-options.md index 1ab083b91955..ebc52859bc86 100644 --- a/website/site/content/docs/configuration-options.md +++ b/website/site/content/docs/configuration-options.md @@ -63,10 +63,17 @@ Based on the settings above, if a user used an image widget field called `avatar ## Slug Type By default, filenames (slugs) for entries created in the CMS are sanitized according to RFC3987 and the WHATWG URL spec. This spec allows non-ASCII (or non-Latin) characters to exist in URLs. However, for maximum compatibility, you can also set a different slugification option: + - `iri` (default): Keeps Unicode characters in slugs, according the the IRI draft spec (RFC3987) and the WHATWG URL spec. - `latin`: Removes accents/diacritics from slug, then strips out all non-valid URL characters and periods (see `ascii` below). - `ascii`: Strips out all characters except valid URI chars (RFC3986) or periods (0-9, a-z, A-Z, `_`, `-`, `~`). +**Example** + +``` yaml +slug_type: "latin" +``` + ## Collections *This setting is required.* From 81c8c553d455f2e01cd347637ac9950eea8d93da Mon Sep 17 00:00:00 2001 From: Caleb Date: Sun, 25 Feb 2018 09:10:36 -0700 Subject: [PATCH 05/13] Update configuration-options.md --- website/site/content/docs/configuration-options.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/website/site/content/docs/configuration-options.md b/website/site/content/docs/configuration-options.md index ebc52859bc86..90156db99c88 100644 --- a/website/site/content/docs/configuration-options.md +++ b/website/site/content/docs/configuration-options.md @@ -62,9 +62,9 @@ Based on the settings above, if a user used an image widget field called `avatar ## Slug Type -By default, filenames (slugs) for entries created in the CMS are sanitized according to RFC3987 and the WHATWG URL spec. This spec allows non-ASCII (or non-Latin) characters to exist in URLs. However, for maximum compatibility, you can also set a different slugification option: +By default, filenames (slugs) for entries created in the CMS are sanitized according to [RFC3987](https://tools.ietf.org/html/rfc3987) and the [WHATWG URL spec](https://url.spec.whatwg.org/). This spec allows non-ASCII (or non-Latin) characters to exist in URLs. However, for maximum compatibility, you can also set a different slugification option: -- `iri` (default): Keeps Unicode characters in slugs, according the the IRI draft spec (RFC3987) and the WHATWG URL spec. +- `iri` (default): Keeps Unicode characters in slugs, according the the IRI draft spec ([RFC3987](https://tools.ietf.org/html/rfc3987)) and the [WHATWG URL spec](https://url.spec.whatwg.org/). - `latin`: Removes accents/diacritics from slug, then strips out all non-valid URL characters and periods (see `ascii` below). - `ascii`: Strips out all characters except valid URI chars (RFC3986) or periods (0-9, a-z, A-Z, `_`, `-`, `~`). From b8bade2babb33135289717f248ee60dea240375a Mon Sep 17 00:00:00 2001 From: Benaiah Mischenko Date: Mon, 26 Feb 2018 17:57:04 -0800 Subject: [PATCH 06/13] Cleanup urlHelper.js --- src/lib/urlHelper.js | 65 +++++++++++++++++++++++--------------------- 1 file changed, 34 insertions(+), 31 deletions(-) diff --git a/src/lib/urlHelper.js b/src/lib/urlHelper.js index a13d555f6e5d..e99f4350676f 100644 --- a/src/lib/urlHelper.js +++ b/src/lib/urlHelper.js @@ -3,8 +3,8 @@ import diacritics from 'diacritics'; import sanitizeFilename from 'sanitize-filename'; import { isString, escapeRegExp, flow, partialRight } from 'lodash'; -function getUrl(url, direct) { - return `${ direct ? '/#' : '' }${ url }`; +function getUrl(urlString, direct) { + return `${ direct ? '/#' : '' }${ urlString }`; } export function getCollectionUrl(collectionName, direct) { @@ -21,9 +21,9 @@ export function addParams(urlString, params) { return url.format(parsedUrl); } -export function stripProtocol(url) { - const protocolEndIndex = url.indexOf('//'); - return protocolEndIndex > -1 ? url.slice(protocolEndIndex + 2) : url; +export function stripProtocol(urlString) { + const protocolEndIndex = urlString.indexOf('//'); + return protocolEndIndex > -1 ? urlString.slice(protocolEndIndex + 2) : url; } /* See https://www.w3.org/International/articles/idn-and-iri/#path. @@ -35,12 +35,16 @@ export function stripProtocol(url) { */ const uriChars = /[\w\-.~]/i; const ucsChars = /[\xA0-\u{D7FF}\u{F900}-\u{FDCF}\u{FDF0}-\u{FFEF}\u{10000}-\u{1FFFD}\u{20000}-\u{2FFFD}\u{30000}-\u{3FFFD}\u{40000}-\u{4FFFD}\u{50000}-\u{5FFFD}\u{60000}-\u{6FFFD}\u{70000}-\u{7FFFD}\u{80000}-\u{8FFFD}\u{90000}-\u{9FFFD}\u{A0000}-\u{AFFFD}\u{B0000}-\u{BFFFD}\u{C0000}-\u{CFFFD}\u{D0000}-\u{DFFFD}\u{E1000}-\u{EFFFD}]/u; -const validURIChar = (char) => (uriChars.test(char)); -const validIRIChar = (char) => (uriChars.test(char) || ucsChars.test(char)); +const validURIChar = char => uriChars.test(char); +const validIRIChar = char => uriChars.test(char) || ucsChars.test(char); // `sanitizeURI` does not actually URI-encode the chars (that is the browser's and server's job), just removes the ones that are not allowed. export function sanitizeURI(str, { replacement = "", type = "iri" } = {}) { - if (!isString(str)) throw "The input slug must be a string."; - if (!isString(replacement)) throw "`options.replacement` must be a string."; + if (!isString(str)) { + throw new Error("The input slug must be a string."); + } + if (!isString(replacement)) { + throw new Error("`options.replacement` must be a string."); + } let validChar; if (type === "iri") { @@ -48,11 +52,13 @@ export function sanitizeURI(str, { replacement = "", type = "iri" } = {}) { } else if (type === "ascii") { validChar = validURIChar; } else { - throw '`options.type` must be "iri" or "ascii".'; + throw new Error('`options.type` must be "iri" or "ascii".'); } // Check and make sure the replacement character is actually a safe char itself. - if (!Array.from(replacement).every(validChar)) throw "The replacement character(s) (options.replacement) is itself unsafe."; + if (!Array.from(replacement).every(validChar)) { + throw new Error("The replacement character(s) (options.replacement) is itself unsafe."); + } // `Array.from` must be used instead of `String.split` because // `split` converts things like emojis into UTF-16 surrogate pairs. @@ -60,30 +66,27 @@ export function sanitizeURI(str, { replacement = "", type = "iri" } = {}) { } export function sanitizeSlug(str, { replacement = '-', slugType } = {}) { - if (!isString(str)) throw "The input slug must be a string."; - if (!isString(replacement)) throw "`options.replacement` must be a string."; - - // Sanitize as URI and as filename. - let sanitize = flow([ - partialRight(sanitizeURI, { replacement, type: slugType }), - partialRight(sanitizeFilename, { replacement }), - ]); - - // For `latin` slug type, strip diacritics and use ASCII URL. - if (slugType === "latin") { - sanitize = flow([ - diacritics.remove, - partialRight(sanitizeURI, { replacement, type: 'ascii' }), - partialRight(sanitizeFilename, { replacement }), - ]); + if (!isString(str)) { + throw new Error("The input slug must be a string."); + } + if (!isString(replacement)) { + throw new Error("`options.replacement` must be a string."); } - // Run sanitizers. - const sanitizedSlug = sanitize(str); + // For `latin` slug type, strip diacritics and use ASCII URL. + const [stripDiacritics, type] = (slugType === "latin") + ? [true, 'ascii'] + : [false, slugType]; + const sanitizedSlug = flow([ + ...(stripDiacritics ? [diacritics.remove] : []), + partialRight(sanitizeURI, { replacement, type }), + partialRight(sanitizeFilename, { replacement }), + ])(str); + // Remove any doubled or trailing replacement characters (that were added in the sanitizers). - const doubleReplacement = new RegExp('(?:' + escapeRegExp(replacement) + ')+', 'g'); - const trailingReplacment = new RegExp(escapeRegExp(replacement) + '$'); + const doubleReplacement = new RegExp(`(?:${ escapeRegExp(replacement) })+`, 'g'); + const trailingReplacment = new RegExp(`${ escapeRegExp(replacement) }$`); const normalizedSlug = sanitizedSlug .replace(doubleReplacement, replacement) .replace(trailingReplacment, ''); From 31fcb3cd78c3517560a441eb8e0b3aee92e095a9 Mon Sep 17 00:00:00 2001 From: Benaiah Mischenko Date: Mon, 26 Feb 2018 18:01:06 -0800 Subject: [PATCH 07/13] Fix missed rename in last commit --- src/lib/urlHelper.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib/urlHelper.js b/src/lib/urlHelper.js index e99f4350676f..1b009244328f 100644 --- a/src/lib/urlHelper.js +++ b/src/lib/urlHelper.js @@ -23,7 +23,7 @@ export function addParams(urlString, params) { export function stripProtocol(urlString) { const protocolEndIndex = urlString.indexOf('//'); - return protocolEndIndex > -1 ? urlString.slice(protocolEndIndex + 2) : url; + return protocolEndIndex > -1 ? urlString.slice(protocolEndIndex + 2) : urlString; } /* See https://www.w3.org/International/articles/idn-and-iri/#path. From deba8ad2b85576c4bf6fa78e5f790200ec914cc3 Mon Sep 17 00:00:00 2001 From: Caleb Date: Sat, 3 Mar 2018 12:53:22 -0700 Subject: [PATCH 08/13] Support multiple slug options. --- src/actions/mediaLibrary.js | 2 +- src/backends/backend.js | 6 +++--- src/lib/__tests__/urlHelper.spec.js | 24 ++++++++++----------- src/lib/functionHelper.js | 24 +++++++++++++++++++++ src/lib/urlHelper.js | 33 +++++++++++++---------------- 5 files changed, 55 insertions(+), 34 deletions(-) create mode 100644 src/lib/functionHelper.js diff --git a/src/actions/mediaLibrary.js b/src/actions/mediaLibrary.js index 2af3386779f9..8f4c129b573b 100644 --- a/src/actions/mediaLibrary.js +++ b/src/actions/mediaLibrary.js @@ -80,7 +80,7 @@ export function persistMedia(file, opts = {}) { const backend = currentBackend(state.config); const integration = selectIntegration(state, null, 'assetStore'); const files = state.mediaLibrary.get('files'); - const fileName = sanitizeSlug(file.name.toLowerCase(), { slugType: state.config.get('slug_type') }); + const fileName = sanitizeSlug(file.name.toLowerCase(), state.config.get('slug')); const existingFile = files.find(existingFile => existingFile.name.toLowerCase() === fileName); /** diff --git a/src/backends/backend.js b/src/backends/backend.js index 72e1b564c5ce..1e60da7f84c0 100644 --- a/src/backends/backend.js +++ b/src/backends/backend.js @@ -41,7 +41,7 @@ class LocalStorageAuthStore { } } -const slugFormatter = (template = "{{slug}}", entryData, slugType) => { +const slugFormatter = (template = "{{slug}}", entryData, slugConfig) => { const date = new Date(); const getIdentifier = (entryData) => { @@ -79,7 +79,7 @@ const slugFormatter = (template = "{{slug}}", entryData, slugType) => { // Replace periods with dashes. .replace(/[.]/g, '-'); - return sanitizeSlug(slug, { slugType }); + return sanitizeSlug(slug, slugConfig); }; class Backend { @@ -242,7 +242,7 @@ class Backend { if (!selectAllowNewEntries(collection)) { throw (new Error("Not allowed to create new entries in this collection")); } - const slug = slugFormatter(collection.get("slug"), entryDraft.getIn(["entry", "data"]), config.get("slug_type")); + const slug = slugFormatter(collection.get("slug"), entryDraft.getIn(["entry", "data"]), config.get("slug")); const path = selectEntryPath(collection, slug); entryObj = { path, diff --git a/src/lib/__tests__/urlHelper.spec.js b/src/lib/__tests__/urlHelper.spec.js index 75dabceb94b3..a6a4b563fd6c 100644 --- a/src/lib/__tests__/urlHelper.spec.js +++ b/src/lib/__tests__/urlHelper.spec.js @@ -71,14 +71,14 @@ describe('sanitizeSlug', ()=> { }); it('throws an error for non-string replacements', () => { - expect(() => sanitizeSlug('test', { replacement: {} })).toThrowError("`options.replacement` must be a string."); - expect(() => sanitizeSlug('test', { replacement: [] })).toThrowError("`options.replacement` must be a string."); - expect(() => sanitizeSlug('test', { replacement: false })).toThrowError("`options.replacement` must be a string."); - expect(() => sanitizeSlug('test', { replacement: null } )).toThrowError("`options.replacement` must be a string."); - expect(() => sanitizeSlug('test', { replacement: 11232 })).toThrowError("`options.replacement` must be a string."); + expect(() => sanitizeSlug('test', { filter_replacement: {} })).toThrowError("`options.replacement` must be a string."); + expect(() => sanitizeSlug('test', { filter_replacement: [] })).toThrowError("`options.replacement` must be a string."); + expect(() => sanitizeSlug('test', { filter_replacement: false })).toThrowError("`options.replacement` must be a string."); + expect(() => sanitizeSlug('test', { filter_replacement: null } )).toThrowError("`options.replacement` must be a string."); + expect(() => sanitizeSlug('test', { filter_replacement: 11232 })).toThrowError("`options.replacement` must be a string."); // do not test undefined for this variant since a default is set in the cosntructor. - //expect(() => sanitizeSlug('test', { replacement: undefined })).toThrowError("`options.replacement` must be a string."); - expect(() => sanitizeSlug('test', { replacement: ()=>{} })).toThrowError("`options.replacement` must be a string."); + //expect(() => sanitizeSlug('test', { filter_replacement: undefined })).toThrowError("`options.replacement` must be a string."); + expect(() => sanitizeSlug('test', { filter_replacement: ()=>{} })).toThrowError("`options.replacement` must be a string."); }); it('should keep valid URI chars (letters digits _ - . ~)', () => { @@ -87,15 +87,15 @@ describe('sanitizeSlug', ()=> { ).toEqual('This-that-one_or.the~other-123'); }); - it('should remove accents in "latin" mode', () => { + it('should remove accents if set', () => { expect( - sanitizeSlug("ěščřžý", { slugType: 'latin' }) + sanitizeSlug("ěščřžý", { strip_accents: true }) ).toEqual('escrzy'); }); - it('should remove non-latin chars in "latin" mode', () => { + it('should remove non-latin chars in "ascii" mode', () => { expect( - sanitizeSlug("ěščřžý日本語のタイトル", { slugType: 'latin' }) + sanitizeSlug("ěščřžý日本語のタイトル", { filter: 'ascii' }) ).toEqual('escrzy'); }); @@ -109,7 +109,7 @@ describe('sanitizeSlug', ()=> { }); it('uses alternate replacements', () => { - expect(sanitizeSlug('test test ', { replacement: '_' })).toEqual('test_test'); + expect(sanitizeSlug('test test ', { filter_replacement: '_' })).toEqual('test_test'); }); }); diff --git a/src/lib/functionHelper.js b/src/lib/functionHelper.js new file mode 100644 index 000000000000..a3fed68ed423 --- /dev/null +++ b/src/lib/functionHelper.js @@ -0,0 +1,24 @@ +import { isImmutable } from 'immutable'; +import { isString } from 'lodash'; + +/** + * Turns a string or Immutable object into a JS object. + * Useful for function options. + * + * The first parameter should be the object key + * that you want set if the option is a string. + * + * Example Usage: + * const { bob = true, cat = false } = stringOptions('cat', options); + */ +export function stringOptions(defaultKey, options) { + if (options === undefined) { + return {}; + } else if (isString(options)) { + return { [defaultKey]: options }; + } else if (isImmutable(options)) { + return options.toJS(); + } else { + return options; + } +} \ No newline at end of file diff --git a/src/lib/urlHelper.js b/src/lib/urlHelper.js index 1b009244328f..fe05020213da 100644 --- a/src/lib/urlHelper.js +++ b/src/lib/urlHelper.js @@ -2,6 +2,7 @@ import url from 'url'; import diacritics from 'diacritics'; import sanitizeFilename from 'sanitize-filename'; import { isString, escapeRegExp, flow, partialRight } from 'lodash'; +import { stringOptions } from './functionHelper'; function getUrl(urlString, direct) { return `${ direct ? '/#' : '' }${ urlString }`; @@ -38,7 +39,7 @@ const ucsChars = /[\xA0-\u{D7FF}\u{F900}-\u{FDCF}\u{FDF0}-\u{FFEF}\u{10000}-\u{1 const validURIChar = char => uriChars.test(char); const validIRIChar = char => uriChars.test(char) || ucsChars.test(char); // `sanitizeURI` does not actually URI-encode the chars (that is the browser's and server's job), just removes the ones that are not allowed. -export function sanitizeURI(str, { replacement = "", type = "iri" } = {}) { +export function sanitizeURI(str, { replacement = "", filter = "unicode" } = {}) { if (!isString(str)) { throw new Error("The input slug must be a string."); } @@ -47,12 +48,12 @@ export function sanitizeURI(str, { replacement = "", type = "iri" } = {}) { } let validChar; - if (type === "iri") { + if (filter === "unicode") { validChar = validIRIChar; - } else if (type === "ascii") { + } else if (filter === "ascii") { validChar = validURIChar; } else { - throw new Error('`options.type` must be "iri" or "ascii".'); + throw new Error('`options.filter` must be "unicode" or "ascii".'); } // Check and make sure the replacement character is actually a safe char itself. @@ -65,22 +66,18 @@ export function sanitizeURI(str, { replacement = "", type = "iri" } = {}) { return Array.from(str).map(char => (validChar(char) ? char : replacement)).join(''); } -export function sanitizeSlug(str, { replacement = '-', slugType } = {}) { - if (!isString(str)) { - throw new Error("The input slug must be a string."); - } - if (!isString(replacement)) { - throw new Error("`options.replacement` must be a string."); - } - - // For `latin` slug type, strip diacritics and use ASCII URL. - const [stripDiacritics, type] = (slugType === "latin") - ? [true, 'ascii'] - : [false, slugType]; +export function sanitizeSlug(str, options) { + const { + filter = 'unicode', + strip_accents: stripAccents = false, + filter_replacement: replacement = '-', + } = stringOptions('filter', options); + + if (!isString(str)) { throw new Error("The input slug must be a string."); } const sanitizedSlug = flow([ - ...(stripDiacritics ? [diacritics.remove] : []), - partialRight(sanitizeURI, { replacement, type }), + ...(stripAccents ? [diacritics.remove] : []), + partialRight(sanitizeURI, { replacement, filter }), partialRight(sanitizeFilename, { replacement }), ])(str); From 03c7ad90bcf44a531ce73a215d01dfaaf910ca3b Mon Sep 17 00:00:00 2001 From: Caleb Date: Sat, 17 Mar 2018 14:07:01 -0600 Subject: [PATCH 09/13] Update for multiple options. --- src/lib/__tests__/urlHelper.spec.js | 2 +- src/lib/functionHelper.js | 24 ------------------- src/lib/urlHelper.js | 10 ++++---- .../content/docs/configuration-options.md | 15 ++++++++---- 4 files changed, 15 insertions(+), 36 deletions(-) delete mode 100644 src/lib/functionHelper.js diff --git a/src/lib/__tests__/urlHelper.spec.js b/src/lib/__tests__/urlHelper.spec.js index a6a4b563fd6c..1fe6b999bdc8 100644 --- a/src/lib/__tests__/urlHelper.spec.js +++ b/src/lib/__tests__/urlHelper.spec.js @@ -89,7 +89,7 @@ describe('sanitizeSlug', ()=> { it('should remove accents if set', () => { expect( - sanitizeSlug("ěščřžý", { strip_accents: true }) + sanitizeSlug("ěščřžý", { clean_accents: true }) ).toEqual('escrzy'); }); diff --git a/src/lib/functionHelper.js b/src/lib/functionHelper.js deleted file mode 100644 index a3fed68ed423..000000000000 --- a/src/lib/functionHelper.js +++ /dev/null @@ -1,24 +0,0 @@ -import { isImmutable } from 'immutable'; -import { isString } from 'lodash'; - -/** - * Turns a string or Immutable object into a JS object. - * Useful for function options. - * - * The first parameter should be the object key - * that you want set if the option is a string. - * - * Example Usage: - * const { bob = true, cat = false } = stringOptions('cat', options); - */ -export function stringOptions(defaultKey, options) { - if (options === undefined) { - return {}; - } else if (isString(options)) { - return { [defaultKey]: options }; - } else if (isImmutable(options)) { - return options.toJS(); - } else { - return options; - } -} \ No newline at end of file diff --git a/src/lib/urlHelper.js b/src/lib/urlHelper.js index fe05020213da..e6b7432c5ab1 100644 --- a/src/lib/urlHelper.js +++ b/src/lib/urlHelper.js @@ -67,16 +67,14 @@ export function sanitizeURI(str, { replacement = "", filter = "unicode" } = {}) } export function sanitizeSlug(str, options) { - const { - filter = 'unicode', - strip_accents: stripAccents = false, - filter_replacement: replacement = '-', - } = stringOptions('filter', options); + const filter = options.get('filter', 'unicode'); + const stripDiacritics = options.get('clean_accents', false); + const replacement = options.get('filter_replacement', '-'); if (!isString(str)) { throw new Error("The input slug must be a string."); } const sanitizedSlug = flow([ - ...(stripAccents ? [diacritics.remove] : []), + ...(stripDiacritics ? [diacritics.remove] : []), partialRight(sanitizeURI, { replacement, filter }), partialRight(sanitizeFilename, { replacement }), ])(str); diff --git a/website/site/content/docs/configuration-options.md b/website/site/content/docs/configuration-options.md index 90156db99c88..b6ae6e8eab71 100644 --- a/website/site/content/docs/configuration-options.md +++ b/website/site/content/docs/configuration-options.md @@ -62,16 +62,21 @@ Based on the settings above, if a user used an image widget field called `avatar ## Slug Type -By default, filenames (slugs) for entries created in the CMS are sanitized according to [RFC3987](https://tools.ietf.org/html/rfc3987) and the [WHATWG URL spec](https://url.spec.whatwg.org/). This spec allows non-ASCII (or non-Latin) characters to exist in URLs. However, for maximum compatibility, you can also set a different slugification option: +The `slug` option allows you to change how filenames for entries are created and sanitized. For modifying the actual data in a slug, see the per-collection option below. -- `iri` (default): Keeps Unicode characters in slugs, according the the IRI draft spec ([RFC3987](https://tools.ietf.org/html/rfc3987)) and the [WHATWG URL spec](https://url.spec.whatwg.org/). -- `latin`: Removes accents/diacritics from slug, then strips out all non-valid URL characters and periods (see `ascii` below). -- `ascii`: Strips out all characters except valid URI chars (RFC3986) or periods (0-9, a-z, A-Z, `_`, `-`, `~`). +`slug` accepts multiple options: + +- `encoding` + - `unicode` (default): Sanitize filenames (slugs) according to [RFC3987](https://tools.ietf.org/html/rfc3987) and the [WHATWG URL spec](https://url.spec.whatwg.org/). This spec allows non-ASCII (or non-Latin) characters to exist in URLs. + - `ascii`: Sanitize filenames (slugs) according to [RFC3986](https://tools.ietf.org/html/rfc3986). The only allowed characters are (0-9, a-z, A-Z, `_`, `-`, `~`). +- `clean_accents`: Set to `true` to remove diacritics from slug characters before sanitizing. This is often helpful when using `ascii` encoding. **Example** ``` yaml -slug_type: "latin" +slug: + encoding: "ascii" + clean_accents: true ``` ## Collections From 2f65f1188ca2f07d395f5cee745bcf846f56a313 Mon Sep 17 00:00:00 2001 From: Caleb Date: Sat, 17 Mar 2018 14:52:28 -0600 Subject: [PATCH 10/13] Convert filter to encoding. --- src/lib/__tests__/urlHelper.spec.js | 18 +++++++++--------- src/lib/urlHelper.js | 14 +++++++------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/src/lib/__tests__/urlHelper.spec.js b/src/lib/__tests__/urlHelper.spec.js index 1fe6b999bdc8..c80542c9974e 100644 --- a/src/lib/__tests__/urlHelper.spec.js +++ b/src/lib/__tests__/urlHelper.spec.js @@ -71,14 +71,14 @@ describe('sanitizeSlug', ()=> { }); it('throws an error for non-string replacements', () => { - expect(() => sanitizeSlug('test', { filter_replacement: {} })).toThrowError("`options.replacement` must be a string."); - expect(() => sanitizeSlug('test', { filter_replacement: [] })).toThrowError("`options.replacement` must be a string."); - expect(() => sanitizeSlug('test', { filter_replacement: false })).toThrowError("`options.replacement` must be a string."); - expect(() => sanitizeSlug('test', { filter_replacement: null } )).toThrowError("`options.replacement` must be a string."); - expect(() => sanitizeSlug('test', { filter_replacement: 11232 })).toThrowError("`options.replacement` must be a string."); + expect(() => sanitizeSlug('test', { sanitize_replacement: {} })).toThrowError("`options.replacement` must be a string."); + expect(() => sanitizeSlug('test', { sanitize_replacement: [] })).toThrowError("`options.replacement` must be a string."); + expect(() => sanitizeSlug('test', { sanitize_replacement: false })).toThrowError("`options.replacement` must be a string."); + expect(() => sanitizeSlug('test', { sanitize_replacement: null } )).toThrowError("`options.replacement` must be a string."); + expect(() => sanitizeSlug('test', { sanitize_replacement: 11232 })).toThrowError("`options.replacement` must be a string."); // do not test undefined for this variant since a default is set in the cosntructor. - //expect(() => sanitizeSlug('test', { filter_replacement: undefined })).toThrowError("`options.replacement` must be a string."); - expect(() => sanitizeSlug('test', { filter_replacement: ()=>{} })).toThrowError("`options.replacement` must be a string."); + //expect(() => sanitizeSlug('test', { sanitize_replacement: undefined })).toThrowError("`options.replacement` must be a string."); + expect(() => sanitizeSlug('test', { sanitize_replacement: ()=>{} })).toThrowError("`options.replacement` must be a string."); }); it('should keep valid URI chars (letters digits _ - . ~)', () => { @@ -95,7 +95,7 @@ describe('sanitizeSlug', ()=> { it('should remove non-latin chars in "ascii" mode', () => { expect( - sanitizeSlug("ěščřžý日本語のタイトル", { filter: 'ascii' }) + sanitizeSlug("ěščřžý日本語のタイトル", { encoding: 'ascii' }) ).toEqual('escrzy'); }); @@ -109,7 +109,7 @@ describe('sanitizeSlug', ()=> { }); it('uses alternate replacements', () => { - expect(sanitizeSlug('test test ', { filter_replacement: '_' })).toEqual('test_test'); + expect(sanitizeSlug('test test ', { sanitize_replacement: '_' })).toEqual('test_test'); }); }); diff --git a/src/lib/urlHelper.js b/src/lib/urlHelper.js index e6b7432c5ab1..86aaafc22391 100644 --- a/src/lib/urlHelper.js +++ b/src/lib/urlHelper.js @@ -39,7 +39,7 @@ const ucsChars = /[\xA0-\u{D7FF}\u{F900}-\u{FDCF}\u{FDF0}-\u{FFEF}\u{10000}-\u{1 const validURIChar = char => uriChars.test(char); const validIRIChar = char => uriChars.test(char) || ucsChars.test(char); // `sanitizeURI` does not actually URI-encode the chars (that is the browser's and server's job), just removes the ones that are not allowed. -export function sanitizeURI(str, { replacement = "", filter = "unicode" } = {}) { +export function sanitizeURI(str, { replacement = "", encoding = "unicode" } = {}) { if (!isString(str)) { throw new Error("The input slug must be a string."); } @@ -48,12 +48,12 @@ export function sanitizeURI(str, { replacement = "", filter = "unicode" } = {}) } let validChar; - if (filter === "unicode") { + if (encoding === "unicode") { validChar = validIRIChar; - } else if (filter === "ascii") { + } else if (encoding === "ascii") { validChar = validURIChar; } else { - throw new Error('`options.filter` must be "unicode" or "ascii".'); + throw new Error('`options.encoding` must be "unicode" or "ascii".'); } // Check and make sure the replacement character is actually a safe char itself. @@ -67,15 +67,15 @@ export function sanitizeURI(str, { replacement = "", filter = "unicode" } = {}) } export function sanitizeSlug(str, options) { - const filter = options.get('filter', 'unicode'); + const encoding = options.get('encoding', 'unicode'); const stripDiacritics = options.get('clean_accents', false); - const replacement = options.get('filter_replacement', '-'); + const replacement = options.get('sanitize_replacement', '-'); if (!isString(str)) { throw new Error("The input slug must be a string."); } const sanitizedSlug = flow([ ...(stripDiacritics ? [diacritics.remove] : []), - partialRight(sanitizeURI, { replacement, filter }), + partialRight(sanitizeURI, { replacement, encoding }), partialRight(sanitizeFilename, { replacement }), ])(str); From 202e985713260d875da2470b5dd4b570476a8c5c Mon Sep 17 00:00:00 2001 From: Caleb Date: Sat, 17 Mar 2018 15:22:32 -0600 Subject: [PATCH 11/13] Add validation. --- src/actions/config.js | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/actions/config.js b/src/actions/config.js index becbe783cfa7..12144a8d430d 100644 --- a/src/actions/config.js +++ b/src/actions/config.js @@ -1,6 +1,6 @@ import yaml from "js-yaml"; import { Map, List, fromJS } from "immutable"; -import { trimStart, flow } from "lodash"; +import { trimStart, flow, isBoolean } from "lodash"; import { authenticateUser } from "Actions/auth"; import * as publishModes from "Constants/publishModes"; @@ -43,6 +43,13 @@ export function validateConfig(config) { if (typeof config.get('media_folder') !== 'string') { throw new Error("Error in configuration file: Your `media_folder` must be a string. Check your config.yml file."); } + const slug_encoding = config.getIn(['slug', 'encoding'], "unicode"); + if (slug_encoding !== "unicode" && slug_encoding !== "ascii") { + throw new Error("Error in configuration file: Your `slug.encoding` must be either `unicode` or `ascii`. Check your config.yml file.") + } + if (!isBoolean(config.getIn(['slug', 'clean_accents'], false))) { + throw new Error("Error in configuration file: Your `slug.clean_accents` must be a boolean. Check your config.yml file."); + } if (!config.get('collections')) { throw new Error("Error in configuration file: A `collections` wasn\'t found. Check your config.yml file."); } From 7cdf7d7ee554592dff0cda0f7e5aba4ee8bb4aa9 Mon Sep 17 00:00:00 2001 From: Caleb Date: Sat, 17 Mar 2018 16:19:12 -0600 Subject: [PATCH 12/13] Fix tests. --- src/lib/__tests__/urlHelper.spec.js | 27 +++++++++++++++++---------- src/lib/urlHelper.js | 4 ++-- 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/src/lib/__tests__/urlHelper.spec.js b/src/lib/__tests__/urlHelper.spec.js index c80542c9974e..6ffaf795587c 100644 --- a/src/lib/__tests__/urlHelper.spec.js +++ b/src/lib/__tests__/urlHelper.spec.js @@ -1,3 +1,4 @@ +import { Map } from 'immutable'; import { sanitizeURI, sanitizeSlug } from '../urlHelper'; describe('sanitizeURI', () => { @@ -22,7 +23,7 @@ describe('sanitizeURI', () => { it('should not keep valid non-latin chars (ucschars in RFC 3987) if set to ASCII mode', () => { expect( - sanitizeURI("ěščřžý日本語のタイトル", { type: 'ascii' }) + sanitizeURI("ěščřžý日本語のタイトル", { encoding: 'ascii' }) ).toEqual(''); }); @@ -71,14 +72,14 @@ describe('sanitizeSlug', ()=> { }); it('throws an error for non-string replacements', () => { - expect(() => sanitizeSlug('test', { sanitize_replacement: {} })).toThrowError("`options.replacement` must be a string."); - expect(() => sanitizeSlug('test', { sanitize_replacement: [] })).toThrowError("`options.replacement` must be a string."); - expect(() => sanitizeSlug('test', { sanitize_replacement: false })).toThrowError("`options.replacement` must be a string."); - expect(() => sanitizeSlug('test', { sanitize_replacement: null } )).toThrowError("`options.replacement` must be a string."); - expect(() => sanitizeSlug('test', { sanitize_replacement: 11232 })).toThrowError("`options.replacement` must be a string."); + expect(() => sanitizeSlug('test', Map({ sanitize_replacement: {} }))).toThrowError("`options.replacement` must be a string."); + expect(() => sanitizeSlug('test', Map({ sanitize_replacement: [] }))).toThrowError("`options.replacement` must be a string."); + expect(() => sanitizeSlug('test', Map({ sanitize_replacement: false }))).toThrowError("`options.replacement` must be a string."); + expect(() => sanitizeSlug('test', Map({ sanitize_replacement: null } ))).toThrowError("`options.replacement` must be a string."); + expect(() => sanitizeSlug('test', Map({ sanitize_replacement: 11232 }))).toThrowError("`options.replacement` must be a string."); // do not test undefined for this variant since a default is set in the cosntructor. //expect(() => sanitizeSlug('test', { sanitize_replacement: undefined })).toThrowError("`options.replacement` must be a string."); - expect(() => sanitizeSlug('test', { sanitize_replacement: ()=>{} })).toThrowError("`options.replacement` must be a string."); + expect(() => sanitizeSlug('test', Map({ sanitize_replacement: ()=>{} }))).toThrowError("`options.replacement` must be a string."); }); it('should keep valid URI chars (letters digits _ - . ~)', () => { @@ -89,13 +90,19 @@ describe('sanitizeSlug', ()=> { it('should remove accents if set', () => { expect( - sanitizeSlug("ěščřžý", { clean_accents: true }) + sanitizeSlug("ěščřžý", Map({ clean_accents: true })) ).toEqual('escrzy'); }); it('should remove non-latin chars in "ascii" mode', () => { expect( - sanitizeSlug("ěščřžý日本語のタイトル", { encoding: 'ascii' }) + sanitizeSlug("ěščřžý日本語のタイトル", Map({ encoding: 'ascii' })) + ).toEqual(''); + }); + + it('should clean accents and strip non-latin chars in "ascii" mode with `clean_accents` set', () => { + expect( + sanitizeSlug("ěščřžý日本語のタイトル", Map({ encoding: 'ascii', clean_accents: true })) ).toEqual('escrzy'); }); @@ -109,7 +116,7 @@ describe('sanitizeSlug', ()=> { }); it('uses alternate replacements', () => { - expect(sanitizeSlug('test test ', { sanitize_replacement: '_' })).toEqual('test_test'); + expect(sanitizeSlug('test test ', Map({ sanitize_replacement: '_' }))).toEqual('test_test'); }); }); diff --git a/src/lib/urlHelper.js b/src/lib/urlHelper.js index 86aaafc22391..4ef79b8d21bf 100644 --- a/src/lib/urlHelper.js +++ b/src/lib/urlHelper.js @@ -2,7 +2,7 @@ import url from 'url'; import diacritics from 'diacritics'; import sanitizeFilename from 'sanitize-filename'; import { isString, escapeRegExp, flow, partialRight } from 'lodash'; -import { stringOptions } from './functionHelper'; +import { Map } from 'immutable'; function getUrl(urlString, direct) { return `${ direct ? '/#' : '' }${ urlString }`; @@ -66,7 +66,7 @@ export function sanitizeURI(str, { replacement = "", encoding = "unicode" } = {} return Array.from(str).map(char => (validChar(char) ? char : replacement)).join(''); } -export function sanitizeSlug(str, options) { +export function sanitizeSlug(str, options = Map()) { const encoding = options.get('encoding', 'unicode'); const stripDiacritics = options.get('clean_accents', false); const replacement = options.get('sanitize_replacement', '-'); From 0d12e6be77b82bf764becb6cd6b0b6c7d8935a5e Mon Sep 17 00:00:00 2001 From: Caleb Date: Tue, 27 Mar 2018 09:39:07 -0600 Subject: [PATCH 13/13] Update urlHelper.spec.js --- src/lib/__tests__/urlHelper.spec.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib/__tests__/urlHelper.spec.js b/src/lib/__tests__/urlHelper.spec.js index 6ffaf795587c..de27c31c5f59 100644 --- a/src/lib/__tests__/urlHelper.spec.js +++ b/src/lib/__tests__/urlHelper.spec.js @@ -88,7 +88,7 @@ describe('sanitizeSlug', ()=> { ).toEqual('This-that-one_or.the~other-123'); }); - it('should remove accents if set', () => { + it('should remove accents with `clean_accents` set', () => { expect( sanitizeSlug("ěščřžý", Map({ clean_accents: true })) ).toEqual('escrzy');