[reddit] some small fixes

- filter or complete some URLs - remove the 'nofollow:' scheme before printing URLs - (#15)
mikf · May 23, 2017 · e425243 · e425243
1 parent a22892f
commit e425243
Show file tree

Hide file tree

Showing 3 changed files with 19 additions and 7 deletions.
diff --git a/gallery_dl/extractor/recursive.py b/gallery_dl/extractor/recursive.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 
-# Copyright 2015, 2016 Mike Fährmann
+# Copyright 2015-2017 Mike Fährmann
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
@@ -14,9 +14,9 @@
 
 
 class RecursiveExtractor(Extractor):
-
+    """Extractor that fetches URLs from a remote or local source"""
     category = "recursive"
-    pattern = ["r(?:ecursive)?:(.+)"]
+    pattern = [r"r(?:ecursive)?:(.+)"]
     test = [("recursive:https://pastebin.com/raw/FLwrCYsT", {
         "url": "eee86d65c346361b818e8f4b2b307d9429f136a2",
     })]

diff --git a/gallery_dl/extractor/reddit.py b/gallery_dl/extractor/reddit.py
@@ -34,7 +34,11 @@ def items(self):
                 )
             )
             for url in urls:
-                if regex.match(url):
+                if url[0] == "#":
+                    continue
+                elif url[0] == "/":
+                    url = "nofollow:https://www.reddit.com" + url
+                elif regex.match(url):
                     url = "nofollow:" + url
                 yield Message.Queue, url
 
@@ -61,7 +65,8 @@ class RedditSubmissionExtractor(RedditExtractor):
     """Extractor for images from a submission on reddit.com"""
     subcategory = "subreddit"
     pattern = [(r"(?:https?://)?(?:m\.|www\.)?reddit\.com/r/[^/]+"
-                r"/comments/([^/]+)")]
+                r"/comments/([a-z0-9]+)"),
+               (r"(?:https?://)?redd\.it/([a-z0-9]+)")]
 
     def __init__(self, match):
         RedditExtractor.__init__(self)

diff --git a/gallery_dl/job.py b/gallery_dl/job.py
@@ -214,9 +214,10 @@ def __init__(self, url, depth=1):
         Job.__init__(self, url)
         self.depth = depth
         if depth == self.maxdepth:
-            self.handle_queue = print
+            self.handle_queue = self._print
 
-    def handle_url(self, url, _):
+    @staticmethod
+    def handle_url(url, _):
         print(url)
 
     def handle_queue(self, url):
@@ -225,6 +226,12 @@ def handle_queue(self, url):
         except exception.NoExtractorError:
             pass
 
+    @staticmethod
+    def _print(url):
+        if url.startswith("nofollow:"):
+            url = url[9:]
+        print(url)
+
 
 class TestJob(DownloadJob):
     """Generate test-results for extractor runs"""