Skip to content

Commit

Permalink
core: configure schemes to trigger URL Callbacks on
Browse files Browse the repository at this point in the history
  • Loading branch information
Exirel committed Apr 20, 2019
1 parent 5be90d9 commit 3317c0b
Show file tree
Hide file tree
Showing 4 changed files with 39 additions and 5 deletions.
6 changes: 6 additions & 0 deletions sopel/config/core_section.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,12 @@ class CoreSection(StaticSection):
May not apply, depending on ``auth_method``."""

auto_url_schemes = ListAttribute(
'auto_url_schemes',
strip=True,
default=['http', 'https', 'ftp'])
"""List of URL schemes that will trigger URL callbacks."""

bind_host = ValidatedAttribute('bind_host')
"""Bind the connection to a specific IP"""

Expand Down
5 changes: 3 additions & 2 deletions sopel/coretasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -800,16 +800,17 @@ def track_topic(bot, trigger):
bot.channels[channel].topic = trigger.args[-1]


@sopel.module.rule(r'(?u).*(https?://\S+).*')
@sopel.module.rule(r'(?u).*(.+://\S+).*')
@sopel.module.unblockable
def handle_url_callbacks(bot, trigger):
"""Dispatch callbacks on URLs
For each URL found in the trigger, trigger the URL callback registered by
the ``@url`` decorator.
"""
schemes = bot.config.core.auto_url_schemes
# find URLs in the trigger
for url in sopel.web.search_urls(trigger):
for url in sopel.web.search_urls(trigger, schemes=schemes):
# find callbacks for said URL
for function, match in bot.search_url_callbacks(url):
# trigger callback defined by the `@url` decorator
Expand Down
9 changes: 6 additions & 3 deletions sopel/web.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,10 +252,13 @@ def trim_url(url):
return url


def search_urls(text, exclusion_char=None, clean=False):
re_url = r'((?:http|https|ftp)(?::\/\/\S+))'
def search_urls(text, exclusion_char=None, clean=False, schemes=None):
schemes = schemes or ['http', 'https', 'ftp']
schemes_patterns = '|'.join(re.escape(scheme) for scheme in schemes)
re_url = r'((?:%s)(?::\/\/\S+))' % schemes_patterns
if exclusion_char is not None:
re_url = r'((?<!%s)(?:http|https|ftp)(?::\/\/\S+))' % (exclusion_char)
re_url = r'((?<!%s)(?:%s)(?::\/\/\S+))' % (
exclusion_char, schemes_patterns)

r = re.compile(re_url, re.IGNORECASE | re.UNICODE)

Expand Down
24 changes: 24 additions & 0 deletions test/test_web.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,30 @@ def test_search_urls_exclusion_char_only_once():
assert 'http://b.com' in urls


def test_search_urls_default_schemes():
urls = list(search_urls('http://a.com ftp://b.com https://c.com'))
assert len(urls) == 3, 'Must find all three URLs'
assert 'http://a.com' in urls
assert 'ftp://b.com' in urls
assert 'https://c.com' in urls


@pytest.mark.parametrize('scheme', ['http', 'https', 'ftp', 'steam'])
def test_search_urls_defined_schemes(scheme):
expected = {
'http': 'http://a.com',
'https': 'https://c.com',
'ftp': 'ftp://b.com',
'steam': 'steam://portal2',
}.get(scheme)

urls = list(
search_urls('http://a.com ftp://b.com https://c.com steam://portal2',
schemes=[scheme]))
assert len(urls) == 1, 'Only %s URLs must be found' % scheme
assert expected in urls


TRAILING_CHARS = list('.,?!\'":;')
ENCLOSING_PAIRS = [('(', ')'), ('[', ']'), ('{', '}'), ('<', '>')]

Expand Down

0 comments on commit 3317c0b

Please sign in to comment.