Skip to content

Commit

Permalink
Strip IRC formatting from message before extracting URLs from it
Browse files Browse the repository at this point in the history
  • Loading branch information
impredicative committed Aug 16, 2020
1 parent af9aae9 commit 049a156
Showing 1 changed file with 2 additions and 0 deletions.
2 changes: 2 additions & 0 deletions ircurltitlebot/bot.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from typing import Dict, List, NoReturn, Optional, Tuple
from urllib.parse import urlparse

import ircstyle
import miniirc
import urlextract

Expand Down Expand Up @@ -223,6 +224,7 @@ def _handle_privmsg(irc: miniirc.IRC, hostmask: Tuple[str, str, str], args: List
return

# Extract URLs
msg = ircstyle.unstyle(msg)
# words = [word for word in msg.split() if not word.isalnum()] # Filter out several non-URL words.
try:
urls = url_extractor.find_urls(msg, only_unique=False) # Assumes returned URLs have same order as in message.
Expand Down

0 comments on commit 049a156

Please sign in to comment.