Skip to content

Commit

Permalink
[unsplash] add extractors (#1197)
Browse files Browse the repository at this point in the history
for
- single photos  (/photos/ID)
- user profiles  (/@user)
- user likes     (/@USER/likes)
- search results (/s/photos/SEARCH)
  • Loading branch information
mikf committed Jan 19, 2021
1 parent 1fc16cb commit 534194b
Show file tree
Hide file tree
Showing 3 changed files with 186 additions and 0 deletions.
2 changes: 2 additions & 0 deletions docs/supportedsites.rst
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ The /b/ Archive https://thebarchive.com/ Boards, Search Results,
Tsumino https://www.tsumino.com/ Galleries, Search Results Supported
Tumblr https://www.tumblr.com/ Likes, Posts, Tag Searches, User Profiles `OAuth <https://github.com/mikf/gallery-dl#oauth>`__
Twitter https://twitter.com/ |twitter-C| Supported
Unsplash https://unsplash.com/ |unsplash-C|
VSCO https://vsco.co/ Collections, individual Images, User Profiles
Wallhaven https://wallhaven.cc/ individual Images, Search Results `API Key <configuration.rst#extractorwallhavenapi-key>`__
Warosu https://warosu.org/ Threads
Expand Down Expand Up @@ -166,5 +167,6 @@ Turboimagehost https://www.turboimagehost.com/ individual Images
.. |reddit-C| replace:: individual Images, Submissions, Subreddits, User Profiles
.. |smugmug-C| replace:: Albums, individual Images, Images from Users and Folders
.. |twitter-C| replace:: Bookmarks, Likes, Lists, List Members, Media Timelines, Search Results, Timelines, Tweets
.. |unsplash-C| replace:: Favorites, individual Images, Search Results, User Profiles
.. |wikiart-C| replace:: Artists, Artist Listings, Artworks, individual Images
.. |yuki-S| replace:: yuki.la 4chan archive
1 change: 1 addition & 0 deletions gallery_dl/extractor/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@
"tsumino",
"tumblr",
"twitter",
"unsplash",
"vanillarock",
"vsco",
"wallhaven",
Expand Down
183 changes: 183 additions & 0 deletions gallery_dl/extractor/unsplash.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
# -*- coding: utf-8 -*-

# Copyright 2021 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.

"""Extractors for https://unsplash.com/"""

from .common import Extractor, Message
from .. import text, util

BASE_PATTERN = r"(?:https?://)?unsplash\.com"


class UnsplashExtractor(Extractor):
"""Base class for unsplash extractors"""
category = "unsplash"
directory_fmt = ("{category}", "{user[username]}")
filename_fmt = "{id}.{extension}"
archive_fmt = "{id}"
root = "https://unsplash.com"

def __init__(self, match):
Extractor.__init__(self, match)
self.item = match.group(1)

def items(self):
for photo in self.photos():
util.delete_items(
photo, ("", "related_collections"))
url = photo["urls"]["raw"]
text.nameext_from_url(url, photo)

photo["extension"] = "jpg"
photo["date"] = text.parse_datetime(photo["created_at"])
if "tags" in photo:
photo["tags"] = [t["title"] for t in photo["tags"]]

yield Message.Directory, photo
yield Message.Url, url, photo

def _pagination(self, url, params, results=False):
params["per_page"] = "20"
params["page"] = 1

while True:
photos = self.request(url, params=params).json()
if results:
photos = photos["results"]
yield from photos

if len(photos) < 20:
return
params["page"] += 1


class UnsplashImageExtractor(UnsplashExtractor):
"""Extractor for a single unsplash photo"""
subcategory = "image"
pattern = BASE_PATTERN + r"/photos/(\w+)"
test = ("https://unsplash.com/photos/lsoogGC_5dg", {
"url": "00accb0a64d5a0df0db911f8b425892718dce524",
"keyword": {
"alt_description": "re:silhouette of trees near body of water ",
"blur_hash": "LZP4uQS4jboe%#o0WCa}2doJNaaz",
"categories": list,
"color": "#f3c08c",
"created_at": "2020-04-08T08:29:42-04:00",
"date": "dt:2020-04-08 12:29:42",
"description": "The Island",
"downloads": int,
"exif": {
"aperture": "11",
"exposure_time": "30",
"focal_length": "70.0",
"iso": 200,
"make": "Canon",
"model": "Canon EOS 5D Mark IV"
},
"extension": "jpg",
"filename": "photo-1586348943529-beaae6c28db9",
"height": 6272,
"id": "lsoogGC_5dg",
"liked_by_user": False,
"likes": int,
"location": {
"city": "Beaver Dam",
"country": "United States",
"name": "Beaver Dam, WI 53916, USA",
"position": {
"latitude": 43.457769,
"longitude": -88.837329
},
"title": "Beaver Dam, WI 53916, USA"
},
"promoted_at": "2020-04-08T11:12:03-04:00",
"sponsorship": None,
"tags": list,
"updated_at": "2021-01-13T07:15:42-05:00",
"user": {
"accepted_tos": True,
"bio": str,
"first_name": "Dave",
"id": "uMJXuywXLiU",
"instagram_username": "just_midwest_rock",
"last_name": "Hoefler",
"location": "Madison, WI",
"name": "Dave Hoefler",
"portfolio_url": str,
"total_collections": 1,
"total_likes": 178,
"total_photos": 687,
"twitter_username": None,
"updated_at": "2021-01-13T21:50:35-05:00",
"username": "johnwestrock"
},
"views": int,
"width": 4480,
},
})

def photos(self):
url = "{}/napi/photos/{}".format(self.root, self.item)
return (self.request(url).json(),)


class UnsplashUserExtractor(UnsplashExtractor):
"""Extractor for all photos of an unsplash user"""
subcategory = "user"
pattern = BASE_PATTERN + r"/@(\w+)/?$"
test = ("https://unsplash.com/@johnwestrock", {
"pattern": r"https://images\.unsplash\.com/(photo-\d+-\w+"
r"|reserve/[^/?#]+)\?ixid=\w+&ixlib=rb-1\.2\.1$",
"range": "1-30",
"count": 30,
})

def photos(self):
url = "{}/napi/users/{}/photos".format(self.root, self.item)
params = {"order_by": "latest"}
return self._pagination(url, params)


class UnsplashFavoriteExtractor(UnsplashExtractor):
"""Extractor for all likes of an unsplash user"""
subcategory = "favorite"
pattern = BASE_PATTERN + r"/@(\w+)/likes"
test = ("https://unsplash.com/@johnwestrock/likes", {
"pattern": r"https://images\.unsplash\.com/(photo-\d+-\w+"
r"|reserve/[^/?#]+)\?ixid=\w+&ixlib=rb-1\.2\.1$",
"range": "1-30",
"count": 30,
})

def photos(self):
url = "{}/napi/users/{}/likes".format(self.root, self.item)
params = {"order_by": "latest"}
return self._pagination(url, params)


class UnsplashSearchExtractor(UnsplashExtractor):
"""Extractor for unsplash search results"""
subcategory = "search"
pattern = BASE_PATTERN + r"/s/photos/([^/?#]+)(?:\?([^/?#]+))?"
test = ("https://unsplash.com/s/photos/nature", {
"pattern": r"https://images\.unsplash\.com/(photo-\d+-\w+"
r"|reserve/[^/?#]+)\?ixid=\w+&ixlib=rb-1\.2\.1$",
"range": "1-30",
"count": 30,
})

def __init__(self, match):
UnsplashExtractor.__init__(self, match)
self.query = match.group(2)

def photos(self):
url = self.root + "/napi/search/photos"
params = {"query": text.unquote(self.item)}
if self.query:
params.update(text.parse_query(self.query))
return self._pagination(url, params, True)

0 comments on commit 534194b

Please sign in to comment.