Skip to content

Commit

Permalink
[gelbooru_v01] add 'favorite' extractor (#2546)
Browse files Browse the repository at this point in the history
  • Loading branch information
mikf committed May 2, 2022
1 parent 5b7423d commit 52b47c3
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 17 deletions.
12 changes: 6 additions & 6 deletions docs/supportedsites.md
Original file line number Diff line number Diff line change
Expand Up @@ -930,37 +930,37 @@ Consider all sites to be NSFW unless otherwise known.
<tr>
<td>The /co/llection</td>
<td>https://the-collection.booru.org/</td>
<td>Posts, Tag Searches</td>
<td>Favorites, Posts, Tag Searches</td>
<td></td>
</tr>
<tr>
<td>Illusion Game Cards</td>
<td>https://illusioncards.booru.org/</td>
<td>Posts, Tag Searches</td>
<td>Favorites, Posts, Tag Searches</td>
<td></td>
</tr>
<tr>
<td>All girl</td>
<td>https://allgirl.booru.org/</td>
<td>Posts, Tag Searches</td>
<td>Favorites, Posts, Tag Searches</td>
<td></td>
</tr>
<tr>
<td>Draw Friends</td>
<td>https://drawfriends.booru.org/</td>
<td>Posts, Tag Searches</td>
<td>Favorites, Posts, Tag Searches</td>
<td></td>
</tr>
<tr>
<td>/v/idyart</td>
<td>https://vidyart.booru.org/</td>
<td>Posts, Tag Searches</td>
<td>Favorites, Posts, Tag Searches</td>
<td></td>
</tr>
<tr>
<td>The Loud Booru</td>
<td>https://tlb.booru.org/</td>
<td>Posts, Tag Searches</td>
<td>Favorites, Posts, Tag Searches</td>
<td></td>
</tr>

Expand Down
60 changes: 49 additions & 11 deletions gallery_dl/extractor/gelbooru_v01.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,21 @@ def _parse_post(self, post_id):

return post

def _pagination(self, url, begin, end):
pid = self.page_start

while True:
page = self.request(url + str(pid)).text

cnt = 0
for post_id in text.extract_iter(page, begin, end):
yield self._parse_post(post_id)
cnt += 1

if cnt < self.per_page:
return
pid += self.per_page


BASE_PATTERN = GelbooruV01Extractor.update({
"thecollection" : {"root": "https://the-collection.booru.org"},
Expand Down Expand Up @@ -88,20 +103,43 @@ def metadata(self):
def posts(self):
url = "{}/index.php?page=post&s=list&tags={}&pid=".format(
self.root, self.tags)
pid = self.page_start
return self._pagination(url, 'class="thumb"><a id="p', '"')

while True:
page = self.request(url + str(pid)).text

cnt = 0
for post_id in text.extract_iter(
page, 'class="thumb"><a id="p', '"'):
yield self._parse_post(post_id)
cnt += 1
class GelbooruV01FavoriteExtractor(GelbooruV01Extractor):
subcategory = "favorite"
directory_fmt = ("{category}", "favorites", "{favorite_id}")
archive_fmt = "f_{favorite_id}_{id}"
per_page = 50
pattern = BASE_PATTERN + r"/index\.php\?page=favorites&s=view&id=(\d+)"
test = (
(("https://the-collection.booru.org"
"/index.php?page=favorites&s=view&id=1166"), {
"count": 2,
}),
(("https://illusioncards.booru.org"
"/index.php?page=favorites&s=view&id=84887"), {
"count": 2,
}),
("https://allgirl.booru.org/index.php?page=favorites&s=view&id=380", {
"count": 4,
}),
("https://drawfriends.booru.org/index.php?page=favorites&s=view&id=1"),
("https://vidyart.booru.org/index.php?page=favorites&s=view&id=1"),
("https://tlb.booru.org/index.php?page=favorites&s=view&id=1"),
)

if cnt < self.per_page:
return
pid += self.per_page
def __init__(self, match):
GelbooruV01Extractor.__init__(self, match)
self.favorite_id = match.group(match.lastindex)

def metadata(self):
return {"favorite_id": text.parse_int(self.favorite_id)}

def posts(self):
url = "{}/index.php?page=favorites&s=view&id={}&pid=".format(
self.root, self.favorite_id)
return self._pagination(url, "posts[", "]")


class GelbooruV01PostExtractor(GelbooruV01Extractor):
Expand Down

0 comments on commit 52b47c3

Please sign in to comment.