Skip to content

Commit

Permalink
[reddit] support filtering by timestamp (#15)
Browse files Browse the repository at this point in the history
- Added the 'extractor.reddit.date-min' and '….date-max'
  config options. These values should be UTC timestamps.
- All submissions not posted in date-min <= T <= date-max
  will be ignored.

- Fixed the limit parameter for submission comments by setting
  it to its apparent max value (500).
  • Loading branch information
mikf committed Jun 3, 2017
1 parent 4e80e0c commit 5f05543
Showing 1 changed file with 15 additions and 12 deletions.
27 changes: 15 additions & 12 deletions gallery_dl/extractor/reddit.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ class RedditExtractor(Extractor):

def __init__(self):
Extractor.__init__(self)
self.api = RedditAPI(self.session, self.log)
self.api = RedditAPI(self)
self.max_depth = int(self.config("recursion", 0))
self._visited = set()

Expand Down Expand Up @@ -101,24 +101,25 @@ def submissions(self):

class RedditAPI():
"""Minimal interface for the reddit API"""
def __init__(self, session, log, client_id="6N9uN0krSDE-ig"):
self.session = session
self.log = log
def __init__(self, extractor, client_id="6N9uN0krSDE-ig"):
self.session = extractor.session
self.date_min = int(extractor.config("date-min", 0))
# 253402210800 == datetime.max.timestamp()
self.date_max = int(extractor.config("date-max", 253402210800))
self.client_id = client_id
session.headers["User-Agent"] = "Python:gallery-dl:0.8.4 (by /u/mikf1)"
self.session.headers["User-Agent"] = ("Python:gallery-dl:0.8.4"
" (by /u/mikf1)")

def submission(self, submission_id):
"""Fetch the (submission, comments)=-tuple for a submission id"""
endpoint = "/comments/" + submission_id + "/.json"
params = {"raw_json": 1, "limit": 100}
submission, comments = self._call(endpoint, params)
submission, comments = self._call(endpoint, {"limit": 500})
return (submission["data"]["children"][0]["data"],
self._unfold(comments))

def submissions_subreddit(self, subreddit, params):
"""Collect all (submission, comments)-tuples of a subreddit"""
endpoint = "/r/" + subreddit + "/.json"
params["raw_json"] = 1
params["limit"] = 100
return self._pagination(endpoint, params)

Expand All @@ -142,6 +143,7 @@ def _authenticate_impl(self, client_id):

def _call(self, endpoint, params):
url = "https://oauth.reddit.com" + endpoint
params["raw_json"] = 1
self.authenticate()
data = self.session.get(url, params=params).json()
if "error" in data:
Expand All @@ -158,10 +160,11 @@ def _pagination(self, endpoint, params, _empty=()):

for submission in data["children"]:
submission = submission["data"]
if submission["num_comments"]:
yield self.submission(submission["id"])
else:
yield submission, _empty
if self.date_min <= submission["created_utc"] <= self.date_max:
if submission["num_comments"]:
yield self.submission(submission["id"])
else:
yield submission, _empty

if not data["after"]:
return
Expand Down

0 comments on commit 5f05543

Please sign in to comment.