From 24946ec4e3eb7757d88cdcb6afff03a149429e33 Mon Sep 17 00:00:00 2001 From: "Val Neekman (AvidCoder)" Date: Thu, 25 Jan 2024 11:24:52 -0500 Subject: [PATCH 1/4] pre normalize --- CHANGELOG.md | 5 +++-- slugify/__version__.py | 2 +- slugify/slugify.py | 9 ++++++--- test.py | 4 ++++ 4 files changed, 14 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 395e538..015c5bb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,7 +1,8 @@ ## Work in progress -- Added typing to API and expose `py.typed`. -- Formally support 3.12 +## 8.0.2 + +- Normalize text before converting to unicode. (chuckyblack - thx) ## 8.0.1 diff --git a/slugify/__version__.py b/slugify/__version__.py index a558d9b..dbbff9f 100644 --- a/slugify/__version__.py +++ b/slugify/__version__.py @@ -5,4 +5,4 @@ __url__ = 'https://github.com/un33k/python-slugify' __license__ = 'MIT' __copyright__ = 'Copyright 2022 Val Neekman @ Neekware Inc.' -__version__ = '8.0.1' +__version__ = '8.0.2' diff --git a/slugify/slugify.py b/slugify/slugify.py index 21bdaeb..9242e3e 100644 --- a/slugify/slugify.py +++ b/slugify/slugify.py @@ -118,8 +118,11 @@ def slugify( # replace quotes with dashes - pre-process text = QUOTE_PATTERN.sub(DEFAULT_SEPARATOR, text) - # decode unicode - if not allow_unicode: + # normalize text, convert to unicode if required + if allow_unicode: + text = unicodedata.normalize('NFKC', text) + else: + text = unicodedata.normalize('NFKD', text) text = unidecode.unidecode(text) # ensure text is still in unicode @@ -144,7 +147,7 @@ def slugify( except Exception: pass - # translate + # re normalize text if allow_unicode: text = unicodedata.normalize('NFKC', text) else: diff --git a/test.py b/test.py index 931f38f..995affa 100644 --- a/test.py +++ b/test.py @@ -36,6 +36,10 @@ def test_phonetic_conversion_of_eastern_scripts(self): self.assertEqual(r, "ying-shi-ma") def test_accented_text(self): + txt = '𝐚́́𝕒́àéé' + r = slugify(txt) + self.assertEqual(r, "aaaee") + txt = 'C\'est déjà l\'été.' r = slugify(txt) self.assertEqual(r, "c-est-deja-l-ete") From 3b61126a72c07b6fd7378aeba2f58fe787aa011a Mon Sep 17 00:00:00 2001 From: "Val Neekman (AvidCoder)" Date: Thu, 25 Jan 2024 11:30:32 -0500 Subject: [PATCH 2/4] upversion node support --- .github/workflows/ci.yml | 4 ++-- .github/workflows/dev.yml | 4 ++-- .github/workflows/main.yml | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 36959b0..71bc219 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -17,9 +17,9 @@ jobs: python: [3.7, 3.8, 3.9, "3.10", 3.11, 3.12, pypy3.8] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: setup python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python }} - name: Install dependencies diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml index c12b80a..88791a7 100644 --- a/.github/workflows/dev.yml +++ b/.github/workflows/dev.yml @@ -17,9 +17,9 @@ jobs: python: [3.7, 3.8, 3.9, "3.10", 3.11, 3.12, pypy3.8] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: setup python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python }} - name: Install dependencies diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index eb66dc5..7a9c77e 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -16,9 +16,9 @@ jobs: python: [3.7, 3.8, 3.9, "3.10", 3.11, 3.12, pypy3.8] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: setup python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python }} - name: Install dependencies From ea42704bf74c8a914de49fce0e5543481c5ef39c Mon Sep 17 00:00:00 2001 From: "Val Neekman (AvidCoder)" Date: Thu, 25 Jan 2024 11:35:46 -0500 Subject: [PATCH 3/4] format fix --- test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test.py b/test.py index 995affa..4f960fa 100644 --- a/test.py +++ b/test.py @@ -39,7 +39,7 @@ def test_accented_text(self): txt = '𝐚́́𝕒́àéé' r = slugify(txt) self.assertEqual(r, "aaaee") - + txt = 'C\'est déjà l\'été.' r = slugify(txt) self.assertEqual(r, "c-est-deja-l-ete") From 237ccbd93244a0d435bfdb56f94ae4008a7b85b9 Mon Sep 17 00:00:00 2001 From: "Val Neekman (AvidCoder)" Date: Thu, 25 Jan 2024 11:39:22 -0500 Subject: [PATCH 4/4] more test --- CHANGELOG.md | 2 +- test.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 015c5bb..eb60bee 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,7 +2,7 @@ ## 8.0.2 -- Normalize text before converting to unicode. (chuckyblack - thx) +- Normalize text before converting to unicode. (@chuckyblack - thx) ## 8.0.1 diff --git a/test.py b/test.py index 4f960fa..2534499 100644 --- a/test.py +++ b/test.py @@ -36,9 +36,9 @@ def test_phonetic_conversion_of_eastern_scripts(self): self.assertEqual(r, "ying-shi-ma") def test_accented_text(self): - txt = '𝐚́́𝕒́àéé' + txt = '𝐚́́𝕒́àáâäãąā' r = slugify(txt) - self.assertEqual(r, "aaaee") + self.assertEqual(r, "aaaaaaaaa") txt = 'C\'est déjà l\'été.' r = slugify(txt)