Skip to content

Commit

Permalink
[reddit] some small fixes
Browse files Browse the repository at this point in the history
- filter or complete some URLs
- remove the 'nofollow:' scheme before printing URLs
- (#15)
  • Loading branch information
mikf committed May 23, 2017
1 parent a22892f commit e425243
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 7 deletions.
6 changes: 3 additions & 3 deletions gallery_dl/extractor/recursive.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-

# Copyright 2015, 2016 Mike Fährmann
# Copyright 2015-2017 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
Expand All @@ -14,9 +14,9 @@


class RecursiveExtractor(Extractor):

"""Extractor that fetches URLs from a remote or local source"""
category = "recursive"
pattern = ["r(?:ecursive)?:(.+)"]
pattern = [r"r(?:ecursive)?:(.+)"]
test = [("recursive:https://pastebin.com/raw/FLwrCYsT", {
"url": "eee86d65c346361b818e8f4b2b307d9429f136a2",
})]
Expand Down
9 changes: 7 additions & 2 deletions gallery_dl/extractor/reddit.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,11 @@ def items(self):
)
)
for url in urls:
if regex.match(url):
if url[0] == "#":
continue
elif url[0] == "/":
url = "nofollow:https://www.reddit.com" + url
elif regex.match(url):
url = "nofollow:" + url
yield Message.Queue, url

Expand All @@ -61,7 +65,8 @@ class RedditSubmissionExtractor(RedditExtractor):
"""Extractor for images from a submission on reddit.com"""
subcategory = "subreddit"
pattern = [(r"(?:https?://)?(?:m\.|www\.)?reddit\.com/r/[^/]+"
r"/comments/([^/]+)")]
r"/comments/([a-z0-9]+)"),
(r"(?:https?://)?redd\.it/([a-z0-9]+)")]

def __init__(self, match):
RedditExtractor.__init__(self)
Expand Down
11 changes: 9 additions & 2 deletions gallery_dl/job.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,9 +214,10 @@ def __init__(self, url, depth=1):
Job.__init__(self, url)
self.depth = depth
if depth == self.maxdepth:
self.handle_queue = print
self.handle_queue = self._print

def handle_url(self, url, _):
@staticmethod
def handle_url(url, _):
print(url)

def handle_queue(self, url):
Expand All @@ -225,6 +226,12 @@ def handle_queue(self, url):
except exception.NoExtractorError:
pass

@staticmethod
def _print(url):
if url.startswith("nofollow:"):
url = url[9:]
print(url)


class TestJob(DownloadJob):
"""Generate test-results for extractor runs"""
Expand Down

0 comments on commit e425243

Please sign in to comment.