diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 7f55b04f8f..c84ea80fac 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -930,37 +930,37 @@ Consider all sites to be NSFW unless otherwise known. The /co/llection https://the-collection.booru.org/ - Posts, Tag Searches + Favorites, Posts, Tag Searches Illusion Game Cards https://illusioncards.booru.org/ - Posts, Tag Searches + Favorites, Posts, Tag Searches All girl https://allgirl.booru.org/ - Posts, Tag Searches + Favorites, Posts, Tag Searches Draw Friends https://drawfriends.booru.org/ - Posts, Tag Searches + Favorites, Posts, Tag Searches /v/idyart https://vidyart.booru.org/ - Posts, Tag Searches + Favorites, Posts, Tag Searches The Loud Booru https://tlb.booru.org/ - Posts, Tag Searches + Favorites, Posts, Tag Searches diff --git a/gallery_dl/extractor/gelbooru_v01.py b/gallery_dl/extractor/gelbooru_v01.py index 541f454b36..f0f50cceb5 100644 --- a/gallery_dl/extractor/gelbooru_v01.py +++ b/gallery_dl/extractor/gelbooru_v01.py @@ -42,6 +42,21 @@ def _parse_post(self, post_id): return post + def _pagination(self, url, begin, end): + pid = self.page_start + + while True: + page = self.request(url + str(pid)).text + + cnt = 0 + for post_id in text.extract_iter(page, begin, end): + yield self._parse_post(post_id) + cnt += 1 + + if cnt < self.per_page: + return + pid += self.per_page + BASE_PATTERN = GelbooruV01Extractor.update({ "thecollection" : {"root": "https://the-collection.booru.org"}, @@ -88,20 +103,43 @@ def metadata(self): def posts(self): url = "{}/index.php?page=post&s=list&tags={}&pid=".format( self.root, self.tags) - pid = self.page_start + return self._pagination(url, 'class="thumb">