Skip to content

Commit

Permalink
Add Wikipedia sanitizer as per #434
Browse files Browse the repository at this point in the history
  • Loading branch information
jmbreuer committed Oct 24, 2024
1 parent 67bfcde commit 15cf278
Show file tree
Hide file tree
Showing 4 changed files with 102 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ import com.svenjacobs.app.leon.core.domain.sanitizer.theguardian.TheGuardianSani
import com.svenjacobs.app.leon.core.domain.sanitizer.threads.ThreadsSanitizer
import com.svenjacobs.app.leon.core.domain.sanitizer.tiktok.TiktokSanitizer
import com.svenjacobs.app.leon.core.domain.sanitizer.webtrekk.WebtrekkSanitizer
import com.svenjacobs.app.leon.core.domain.sanitizer.wikipedia.WikipediaSanitizer
import com.svenjacobs.app.leon.core.domain.sanitizer.x.XSanitizer
import com.svenjacobs.app.leon.core.domain.sanitizer.yahoo.YahooSearchSanitizer
import com.svenjacobs.app.leon.core.domain.sanitizer.yandex.YandexSanitizer
Expand Down Expand Up @@ -113,6 +114,7 @@ class ContainerInitializer : DistinctInitializer<Unit> {
ThreadsSanitizer(),
TiktokSanitizer(),
WebtrekkSanitizer(),
WikipediaSanitizer(),
XSanitizer(),
YahooSearchSanitizer(),
YandexSanitizer(),
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
/*
* Léon - The URL Cleaner
* Copyright (C) 2023 Sven Jacobs
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

package com.svenjacobs.app.leon.core.domain.sanitizer.wikipedia

import android.content.Context
import com.svenjacobs.app.leon.core.common.domain.matchesDomainRegex
import com.svenjacobs.app.leon.core.common.regex.RegexFactory
import com.svenjacobs.app.leon.core.domain.R
import com.svenjacobs.app.leon.core.domain.sanitizer.RegexSanitizer
import com.svenjacobs.app.leon.core.domain.sanitizer.Sanitizer
import com.svenjacobs.app.leon.core.domain.sanitizer.SanitizerId

class WikipediaSanitizer :
RegexSanitizer(
regex = RegexFactory.ofParameter("wprov"),
) {

override val id = SanitizerId("wikipedia")

override fun getMetadata(context: Context) = Sanitizer.Metadata(
name = context.getString(R.string.sanitizer_wikipedia_name),
)

override fun matchesDomain(input: String) = input.matchesDomainRegex(
domain = "(.*\\.)?wikipedia.org",
)
}
1 change: 1 addition & 0 deletions core-domain/src/main/res/values/strings.xml
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
<string name="sanitizer_threads" translatable="false">Threads.net</string>
<string name="sanitizer_tiktok_name" translatable="false">TikTok</string>
<string name="sanitizer_webtrekk_name" translatable="false">Webtrekk</string>
<string name="sanitizer_wikipedia_name" translatable="false">Wikipedia</string>
<string name="sanitizer_x_name" translatable="false">X (Twitter)</string>
<string name="sanitizer_yahoo_search_name">Yahoo Search</string>
<string name="sanitizer_yandex" translatable="false">Yandex</string>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
/*
* Léon - The URL Cleaner
* Copyright (C) 2023 Sven Jacobs
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

package com.svenjacobs.app.leon.core.domain.sanitizer.wikipedia

import io.kotest.core.spec.style.WordSpec
import io.kotest.matchers.shouldBe

class WikipediaSanitizerTest :
WordSpec(
{
val sanitizer = WikipediaSanitizer()

"invoke" should {

"clean en.wikipedia.org URLs" {
sanitizer("https://en.wikipedia.org/wiki/Kerosene?wprov=sfla1") shouldBe
"https://en.wikipedia.org/wiki/Kerosene"
}
}

"matchesDomain" should {

"match wikipedia.org" {
sanitizer.matchesDomain("https://wikipedia.org") shouldBe true
}

"match en.wikipedia.org" {
sanitizer.matchesDomain("https://en.wikipedia.org") shouldBe true
}

"match m.en.wikipedia.org" {
sanitizer.matchesDomain("https://de.m.wikipedia.org") shouldBe true
}

"don't match google.com" {
sanitizer.matchesDomain("https://google.com") shouldBe false
}
}
},
)

0 comments on commit 15cf278

Please sign in to comment.