Skip to content

Commit

Permalink
add generalized extractors for Mastodon instances (#144)
Browse files Browse the repository at this point in the history
Extractors for Mastodon instances can now be dynamically generated,
based on the instance names in the 'extractor.mastodon.*' config path.

Example:
{
    "extractor": {
        "mastodon": {
            "pawoo.net": { ... },
            "mastodon.xyz": { ... },
            "tabletop.social": { ... },
            ...
        }
    }
}

Each entry requires an 'access-token' value, which can be generated with
'gallery-dl oauth:mastodon:<instance URL>'.
An 'access-token' (as well as a 'client-id' and 'client-secret') for
pawoo.net is always available, but can be overwritten as necessary.
  • Loading branch information
mikf committed Jan 19, 2019
1 parent 4b441c1 commit b8fed34
Show file tree
Hide file tree
Showing 7 changed files with 269 additions and 150 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# Changelog

## Unreleased

## 1.6.3 - 2019-01-18
- Added `metadata` post-processor to write image metadata to an external file ([#135](https://github.com/mikf/gallery-dl/issues/135))
- Added option to reverse chapter order of manga extractors ([#149](https://github.com/mikf/gallery-dl/issues/149))
Expand Down
4 changes: 2 additions & 2 deletions gallery_dl/extractor/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-

# Copyright 2015-2018 Mike Fährmann
# Copyright 2015-2019 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
Expand Down Expand Up @@ -67,7 +67,6 @@
"nijie",
"nyafuu",
"paheal",
"pawoo",
"piczel",
"pinterest",
"pixiv",
Expand Down Expand Up @@ -95,6 +94,7 @@
"yandere",
"xvideos",
"yuki",
"mastodon",
"imagehosts",
"directlink",
"recursive",
Expand Down
175 changes: 175 additions & 0 deletions gallery_dl/extractor/mastodon.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
# -*- coding: utf-8 -*-

# Copyright 2019 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.

"""Extractors for mastodon instances"""

from .common import Extractor, Message
from .. import text, config, exception
import re


class MastodonExtractor(Extractor):
"""Base class for mastodon extractors"""
basecategory = "mastodon"
directory_fmt = ["mastodon", "{category}", "{account[username]}"]
filename_fmt = "{category}_{id}_{media[id]}.{extension}"
archive_fmt = "{media[id]}"
instance = None

def __init__(self, match):
Extractor.__init__(self)
self.instance = match.group(1)
self.api = MastodonAPI(self, self.instance)

def config(self, key, default=None):
return config.interpolate(
("extractor", "mastodon", self.category, self.subcategory, key),
default,
)

def items(self):
yield Message.Version, 1
for status in self.statuses():
attachments = self.prepare(status)
yield Message.Directory, status
for media in attachments:
status["media"] = media
url = media["url"]
yield Message.Url, url, text.nameext_from_url(url, status)

def statuses(self):
"""Return an iterable containing all relevant Status-objects"""
return ()

@staticmethod
def prepare(status):
"""Prepare a status object"""
attachments = status["media_attachments"]
del status["media_attachments"]
return attachments


class MastodonUserExtractor(MastodonExtractor):
"""Extractor for all images of an account/user"""
subcategory = "user"

def __init__(self, match):
MastodonExtractor.__init__(self, match)
self.account_name = match.group(2)

def statuses(self):
results = self.api.account_search("@" + self.account_name, 1)
for account in results:
if account["username"] == self.account_name:
break
else:
raise exception.NotFoundError("account")
return self.api.account_statuses(account["id"])


class MastodonStatusExtractor(MastodonExtractor):
"""Extractor for images from a status"""
subcategory = "status"

def __init__(self, match):
MastodonExtractor.__init__(self, match)
self.status_id = match.group(2)

def statuses(self):
return (self.api.status(self.status_id),)


class MastodonAPI():
"""Minimal interface for the Mastodon API
https://github.com/tootsuite/mastodon
https://github.com/tootsuite/documentation/blob/master/Using-the-API/API.md
"""

def __init__(self, extractor, instance, access_token=None):
self.instance = instance
self.extractor = extractor
self.headers = {"Authorization": "Bearer {}".format(
extractor.config("access-token", access_token))}

def account_search(self, query, limit=40):
"""Search for content"""
params = {"q": query, "limit": limit}
return self._call("accounts/search", params)

def account_statuses(self, account_id):
"""Get an account's statuses"""
endpoint = "accounts/{}/statuses".format(account_id)
params = {"only_media": "1"}
return self._pagination(endpoint, params)

def status(self, status_id):
"""Fetch a Status"""
return self._call("statuses/" + status_id)

def _call(self, endpoint, params=None):
url = "https://{}/api/v1/{}".format(self.instance, endpoint)
response = self.extractor.request(
url, params=params, headers=self.headers)
return self._parse(response)

def _pagination(self, endpoint, params):
url = "https://{}/api/v1/{}".format(self.instance, endpoint)
while url:
response = self.extractor.request(
url, params=params, headers=self.headers)
yield from self._parse(response)
url = response.links.get("next", {}).get("url")

@staticmethod
def _parse(response):
"""Parse an API response"""
if response.status_code == 404:
raise exception.NotFoundError()
return response.json()


def generate_extractors():
"""Dynamically generate Extractor classes for Mastodon instances"""

symtable = globals()
mastodon = config.get(("extractor", "mastodon")) or {}

if "pawoo.net" not in mastodon:
mastodon["pawoo.net"] = {
"access-token" : "286462927198d0cf3e24683e91c8259a"
"ac4367233064e0570ca18df2ac65b226",
"client-id" : "97b142b6904abf97a1068d51a7bc2f2f"
"cf9323cef81f13cb505415716dba7dac",
"client-secret": "e45bef4bad45b38abf7d9ef88a646b73"
"75e7fb2532c31a026327a93549236481",
}

for instance, info in mastodon.items():

if not isinstance(info, dict):
continue

class UserExtractor(MastodonUserExtractor):
pattern = [r"(?:https?://)?({})/@([^/?&#]+)(?:/media)?/?$".format(
re.escape(instance))]

class StatusExtractor(MastodonStatusExtractor):
pattern = [r"(?:https?://)?({})/@[^/?&#]+/(\d+)".format(
re.escape(instance))]

name = re.sub(r"[^A-Za-z]+", "", instance).capitalize()

for extr in (UserExtractor, StatusExtractor):
extr.category = instance
extr.__name__ = name + extr.__name__
extr.__doc__ = "{} on {}".format(extr.__base__.__doc__, instance)
symtable[extr.__name__] = extr


generate_extractors()
90 changes: 85 additions & 5 deletions gallery_dl/extractor/oauth.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-

# Copyright 2017-2018 Mike Fährmann
# Copyright 2017-2019 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
Expand All @@ -10,7 +10,8 @@

from .common import Extractor, Message
from . import deviantart, flickr, reddit, smugmug, tumblr
from .. import text, oauth, config
from .. import text, oauth, config, exception
from ..cache import cache
import os
import urllib.parse

Expand Down Expand Up @@ -82,7 +83,6 @@ def _oauth1_authorization_flow(
data = self.open(authorize_url, params)

# exchange the request token for an access token
# self.session.token = data["oauth_token"]
data = self.session.get(access_token_url, params=data).text

data = text.parse_query(data)
Expand All @@ -94,7 +94,8 @@ def _oauth1_authorization_flow(

def _oauth2_authorization_code_grant(
self, client_id, client_secret, auth_url, token_url,
scope="read", key="refresh_token", auth=True):
scope="read", key="refresh_token", auth=True,
message_template=None):
"""Perform an OAuth2 authorization code grant"""

state = "gallery-dl_{}_{}".format(
Expand Down Expand Up @@ -147,11 +148,15 @@ def _oauth2_authorization_code_grant(

# display token
part = key.partition("_")[0]
self.send(OAUTH2_MSG_TEMPLATE.format(
template = message_template or OAUTH2_MSG_TEMPLATE
self.send(template.format(
category=self.subcategory,
key=part,
Key=part.capitalize(),
token=data[key],
instance=getattr(self, "instance", ""),
client_id=client_id,
client_secret=client_secret,
))


Expand Down Expand Up @@ -254,6 +259,55 @@ def items(self):
)


class OAuthMastodon(OAuthBase):
subcategory = "mastodon"
pattern = ["oauth:mastodon:(?:https?://)?([^/?&#]+)"]

def __init__(self, match):
OAuthBase.__init__(self, match)
self.instance = match.group(1)

def items(self):
yield Message.Version, 1

application = self.oauth_config(self.instance)
if not application:
application = self._register(self.instance)

self._oauth2_authorization_code_grant(
application["client-id"],
application["client-secret"],
"https://{}/oauth/authorize".format(self.instance),
"https://{}/oauth/token".format(self.instance),
key="access_token",
message_template=MASTODON_MSG_TEMPLATE,
)

@cache(maxage=10*365*24*60*60, keyarg=1)
def _register(self, instance):
self.log.info("Registering application for '%s'", instance)

url = "https://{}/api/v1/apps".format(instance)
data = {
"client_name": "gdl:" + oauth.nonce(8),
"redirect_uris": self.redirect_uri,
"scopes": "read",
}
data = self.session.post(url, data=data).json()

if "client_id" not in data or "client_secret" not in data:
self.log.error("Failed to register new application: '%s'", data)
raise exception.StopExtraction()

data["client-id"] = data.pop("client_id")
data["client-secret"] = data.pop("client_secret")

self.log.info("client-id:\n%s", data["client-id"])
self.log.info("client-secret:\n%s", data["client-secret"])

return data


OAUTH1_MSG_TEMPLATE = """
Your Access Token and Access Token Secret are
Expand Down Expand Up @@ -293,3 +347,29 @@ def items(self):
}}
}}
"""


MASTODON_MSG_TEMPLATE = """
Your {Key} Token is
{token}
Put this value into your configuration file as
'extractor.mastodon.{instance}.{key}-token'.
You can also add your 'client-id' and 'client-secret' values
if you want to register another account in the future.
Example:
{{
"extractor": {{
"mastodon": {{
"{instance}": {{
"{key}-token": "{token}",
"client-id": "{client_id}",
"client-secret": "{client_secret}"
}}
}}
}}
}}
"""
Loading

0 comments on commit b8fed34

Please sign in to comment.