-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmanga_dl.py
174 lines (161 loc) · 8.38 KB
/
manga_dl.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
from pathlib import Path
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.firefox.service import Service
from selenium.webdriver.firefox.options import Options
from bs4 import BeautifulSoup
from urllib.parse import quote
import subprocess
import time
import re
import shutil
import os
p = print
storage = Path("D:/Mangas") # path to main root directory to store/manage files in - make sure there is enough storage in directory to DL files
driver_location = Service(Path("D:/geckodriver.exe")) # path to Firefox geckodriver executable
kcc = Path("D:/kcc/kcc-c2e.py") # path to KindleComicConverter script
mangasee_base = "https://mangasee123.com" # main URL for MangaSee
engine = "https://duckduckgo.com/?q=" # default search engine is DuckDuckGo
mangasee_dir = mangasee_base + "/directory" # MangaSee directory for all listings
storage_subdirectory = storage / "mangasee" # subdirectory that Gallery-dl generates for the specific URL
options = Options()
options.headless = True # allows headless Firefox to reduce resource use
def search(): # searches for query in all links inside the URL directory
matched_name = []
matched_url = []
query = input("\nEnter the content to search for: ")
p("\nNavigating to URL... ")
driver.get(mangasee_dir) # go to URL directory
time.sleep(1)
html = driver.execute_script("return document.getElementsByTagName('html')[0].innerHTML") # execute scripts on page to modify base DOM content
p(f"\nSearching for {query}... ")
soup = BeautifulSoup(html, "html.parser")
links = [a for a in soup.find_all("a", href=True)]
for link in links:
if (query.casefold() in link.decode_contents().casefold()) or (query.casefold() in link["href"].casefold()):
matched_name.append(link.decode_contents())
matched_url.append(mangasee_base + link["href"])
n = 1
p("\nResults:\n")
for match in matched_name:
p(f"{str(n)}) {match}")
n += 1
# default recursive searching for query, or can use choice number or allow for later input
confirm = input(f"\nDo you see what you're looking for? (Y/N/[1-{len(matched_name)}]) [N]: ").casefold() or "n"
if ("y" in confirm):
choice = int(input(f"\nNumber of choice [1-{len(matched_name)}]: "))
return choice-1, matched_name, matched_url
elif ("n" not in confirm) and (int(confirm) in range(1, len(matched_name)+1)):
return int(confirm)-1, matched_name, matched_url
else:
search()
def selection(number, matched_name, matched_url): # find all <span> tags inside choice URL, find latest chapter for % completion and specific choice details
latest_chapter = []
name = matched_name[number]
url = matched_url[number]
driver.get(url)
soup = BeautifulSoup(driver.page_source, "html.parser")
for s in soup.find_all("span", class_="ng-binding"):
latest_chapter.append(list(filter(None, re.split("\t|\n", s.decode_contents()))))
try:
latest_chapter = int(float(latest_chapter[0][1]))
except IndexError as e:
latest_chapter = int(float(latest_chapter[2][1]))
p(f"\nSelection: {name} | Total number of chapters: {latest_chapter}")
return url, latest_chapter, name
def gallery_dl(url, total, name): # passes url to Gallery-dl and gives an interactive output to user instead of default no response
process = subprocess.Popen(f"gallery-dl {url} -d {storage}", stderr=subprocess.STDOUT, stdout=subprocess.PIPE)
p()
try:
for line in iter(process.stdout.readline, ""):
current = int(float(line.split(b"_")[-2].strip(b"c")))
prcnt = round(100 * (current / total), 2)
p(f"Percentage complete: {str(prcnt)}%", end="\r")
except IndexError as e:
pass
p(f"\n\nDone downloading: {name}")
def vol_search(term): # searches for, title of content + "list of chapters", to find first Wikipedia page to volume/chapter information - WIP!
hit_name = []
hit_url = []
p(f"\nLooking up {term} in search engine... ")
search_query = engine + quote(term + " list of chapters")
driver.get(search_query)
soup = BeautifulSoup(driver.page_source, "html.parser")
for a in soup.find_all("a", href=True):
info_page = a["href"]
if "wikipedia" in info_page:
driver.get(info_page)
break
number_of_volumes = 0
chapters_per_volume = []
soup_2 = BeautifulSoup(driver.page_source, "html.parser")
for main_table in soup_2.find_all("table", class_="wikitable"):
for info_table in main_table.find_all("table"):
number_of_volumes += 1
n = 0
for ol in info_table.find_all("ol"):
for li in ol.find_all("li"):
n += 1
chapters_per_volume.append(n)
return chapters_per_volume
# strips folders of non-useful characters and places chapters in volumes as well as side-chapters corresponding to main chapter number
def sorter(chapter, name):
root = storage_subdirectory / name
p("\nCleaning up chapter names into a usable format... ")
for folder in root.iterdir():
c_num = float(folder.name.strip("c").lstrip("0"))
shutil.move(folder, folder.parent / f"{c_num}")
current_chapter = 0
chapter_list = [1]
p("\nMoving chapters into their respective volumes... ")
for v in range(1, len(chapter)+1):
current_chapter += chapter[v-1]
chapter_list.append(current_chapter)
for i in range(1, len(chapter_list)):
current_volume = root / f"Volume {i}"
for j in range(chapter_list[i], chapter_list[i]+1):
p(f"\nMoving chapters related to Volume {i}... ", end="\r")
for k in range(chapter_list[i-1], j):
if k == 1:
shutil.move(root / f"{float(k)}", current_volume / f"{int(float(k))}")
shutil.move(root / f"{float(k+1)}", current_volume / f"{int(float(k+1))}")
p("\n\nSorting side chapters (if any) into their respective volumes... ")
for f in root.iterdir():
if "." in f.name:
deci_name = f.name.split(".")
if deci_name[-1] != "0":
chapter_stem = deci_name[0]
for s in root.glob("*/*"):
if s.name[0].isdigit():
if chapter_stem == s.name:
shutil.move(f, s.parent / f.name)
p("\nDeleting residual, non-volume-sorted chapters... ")
for g in root.iterdir():
if g.name[0].isdigit():
shutil.rmtree(g)
def converter(name): # archives the sorted/cleaned directory into .CBZ, and uses KCC to convert -> .epub -> .MOBI files
p("\nArchiving directory for KindleComicConverter... ")
shutil.make_archive(name, "zip", storage_subdirectory / name)
shutil.move(f"{name}.zip", storage / f"{name}.cbz")
p("\nRunning KindleComicConverter... ")
cmd = subprocess.Popen(["python", kcc, name + ".cbz", "-m", "-u", "-s", "-b", "2", "-o", storage]) # can edit flags to change kcc options
cmd.communicate()
if __name__ == "__main__": # initialise program
os.chdir(storage) # change directory to user-defined storage location
p("\nOpening headless Firefox temporarily... ")
driver = webdriver.Firefox(service=driver_location, options=options) # opens Firefox with Geckodriver path and Headless option
manga_choice, name_list, url_list = search() # user-selected content of choice, names of matching query, URLs of matching query
url, latest, name = selection(manga_choice, name_list, url_list) # specific content DL link, latest chapter available to DL, name of content
gallery_dl(url, latest, name)
chapter_info = vol_search(name)
p("\nClosing headless Firefox... ")
driver.close()
sorter(chapter_info, name)
converter(name)
os.remove(name + ".cbz") # currently broken as executes at same time as subprocess Gallery-dl
p("\nDeleting original downloaded files... ")
shutil.rmtree(storage_subdirectory) # delete mangasee subdirectory by default
# uncomment below this line to only remove the specific content's folder not the whole subdirectory if there is other content within subdirectory
#shutil.rmtree(storage_subdirectory / name)
p("\nDone!\n")