diff --git a/config.template.toml b/config.template.toml index 3061cd3..c4ba980 100644 --- a/config.template.toml +++ b/config.template.toml @@ -10,6 +10,9 @@ pythonista = '' # optional key [DATABASE] dsn = 'postgres://pythonistabot:pythonistabot@database:5432/pythonistabot' # assumed default +[BADBIN] +domains = ["pastebin.com", "hastebin.com"] + # 50 = CRITICAL # 40 = ERROR # 30 = WARNING diff --git a/modules/moderation.py b/modules/moderation.py index 50cf6aa..ff9bfd0 100644 --- a/modules/moderation.py +++ b/modules/moderation.py @@ -26,11 +26,13 @@ import base64 import binascii import datetime +import logging import re from textwrap import shorten from typing import TYPE_CHECKING, Any, Self, TypeAlias import discord +import mystbin import yarl from discord.ext import commands @@ -45,6 +47,9 @@ ModLogType: TypeAlias = PythonistaAPIWebsocketPayload[ModLogPayload] +logger = logging.getLogger(__name__) + +BASE_BADBIN_RE = r"https://(?P{domains})/(?P[a-zA-Z0-9]+)[.]?(?P[a-z]{{1,8}})?" TOKEN_RE = re.compile(r"[a-zA-Z0-9_-]{23,28}\.[a-zA-Z0-9_-]{6,7}\.[a-zA-Z0-9_-]{27}") PROSE_LOOKUP = { 1: "banned", @@ -105,6 +110,12 @@ def __init__(self, bot: core.Bot, /) -> None: self.dpy_mod_cache: dict[int, discord.User | discord.Member] = {} self._req_lock = asyncio.Lock() + domains = core.CONFIG["BADBIN"]["domains"] + formatted = BASE_BADBIN_RE.format(domains="|".join(domains)) + + self.BADBIN_RE = re.compile(formatted) + logger.info("Badbin initialized with following domains: %s", ", ".join(domains)) + async def github_request( self, method: str, @@ -195,6 +206,48 @@ async def find_discord_tokens(self, message: discord.Message) -> None: ) await message.reply(msg) + async def pull_badbin_content(self, site: str, slug: str, *, fail_hard: bool = True) -> str: + async with self.bot.session.get(f"https://{site}/raw/{slug}") as f: + if 200 > f.status > 299: + if fail_hard: + f.raise_for_status() + else: + err = f"Could not read {slug} from {site}. Details: {f.status}\n\n{await f.read()}" + logger.error(err) + return err # if we don't fail hard, we'll return the error message in the new paste. + + return (await f.read()).decode() + + async def post_mystbin_content(self, contents: list[tuple[str, str]]) -> tuple[str, str | None]: + response = await self.bot.mb_client.create_paste( + files=[mystbin.File(filename=a, content=b, attachment_url=None) for a, b in contents] + ) + return response.id, response.notice or None + + @commands.Cog.listener("on_message") + async def find_badbins(self, message: discord.Message) -> None: + matches = self.BADBIN_RE.findall(message.content) + + if matches: + contents: list[tuple[str, str]] = [] + + for match in matches: + site, slug, ext = match + + if site is None or slug is None: + continue + + contents.append((await self.pull_badbin_content(site, slug), f"migrated.{ext or 'txt'}")) + + if contents: + key, notice = await self.post_mystbin_content(contents) + msg = f"I've detected a badbin and have uploaded your pastes here: https://mystb.in/{key}" + + if notice: + msg += "\nnotice: " + notice + + await message.reply(msg, mention_author=False) + @commands.Cog.listener() async def on_papi_dpy_modlog(self, payload: ModLogType, /) -> None: moderation_payload = payload["payload"] diff --git a/types_/config.py b/types_/config.py index 7379684..2ed9d0e 100644 --- a/types_/config.py +++ b/types_/config.py @@ -27,6 +27,10 @@ class Snekbox(TypedDict): url: str +class BadBin(TypedDict): + domains: list[str] + + class Config(TypedDict): prefix: str owner_ids: NotRequired[list[int]] @@ -34,3 +38,4 @@ class Config(TypedDict): DATABASE: Database LOGGING: Logging SNEKBOX: NotRequired[Snekbox] + BADBIN: BadBin