-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathpyside2dash.py
186 lines (150 loc) · 5.8 KB
/
pyside2dash.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
# -*- coding: utf-8 -*-
import os
import shutil
import logging
import sqlite3
import plistlib
from multiprocessing import Pool
from bs4 import BeautifulSoup
docset_folder = "./Qt_for_Python.docset"
num_worker = 16
# guides_1
def get_guides_1():
# https://doc.qt.io/qtforpython/index.html
soup = BeautifulSoup(open("./doc.qt.io/qtforpython/index.html"), "lxml")
i = soup.find_all("li", class_="toctree-l1")
for x in i:
line = x.contents[0]
if not str(line["href"]).startswith("https://doc.qt.io"):
yield line.get_text(), "Guide", os.path.join("qtforpython", line["href"])
# guides_2
def get_guides_2():
# https://doc.qt.io/qtforpython/index.html
soup = BeautifulSoup(open("./doc.qt.io/qtforpython/index.html"), "lxml")
i = soup.find_all("li", class_="toctree-l2")
for x in i:
line = x.contents[0]
if not str(line["href"]).startswith("https://doc.qt.io"):
yield line.get_text(), "Guide", os.path.join("qtforpython", line["href"])
# guides_3
def get_guides_3():
# https://doc.qt.io/qtforpython/index.html
soup = BeautifulSoup(open("./doc.qt.io/qtforpython/index.html"), "lxml")
i = soup.find_all("li", class_="toctree-l3")
for x in i:
line = x.contents[0]
if not str(line["href"]).startswith("https://doc.qt.io"):
if "tutorials" in line["href"]:
yield line.get_text(), "Guide", os.path.join("qtforpython", line["href"])
# modules
def get_modules():
# https://doc.qt.io/qtforpython/index.html
soup = BeautifulSoup(open("./doc.qt.io/qtforpython/index.html"), "lxml")
i = soup.find_all("li", class_="toctree-l3")
for x in i:
line = x.contents[0]
if not str(line["href"]).startswith("https://doc.qt.io"):
if "tutorials" not in line["href"]:
yield line.get_text(), "Module", os.path.join("qtforpython", line["href"])
# classes
def get_classes():
# https://doc.qt.io/qtforpython/index.html
soup = BeautifulSoup(open("./doc.qt.io/qtforpython/index.html"), "lxml")
i = soup.find_all("li", class_="toctree-l4")
for x in i:
line = x.contents[0]
if not str(line["href"]).startswith("https://doc.qt.io"):
yield line.get_text(), "Class", os.path.join("qtforpython", line["href"])
# function
def functions_single(c):
items = {
"functions": "Function",
"static-functions": "Function",
"virtual-functions": "Function",
"signals": "Provider",
"slots": "Operator",
"id3": "Function"
}
res_ = []
soup = BeautifulSoup(open(os.path.join("./doc.qt.io", c[-1])), "lxml")
soup = soup.find("body")
synopsis = soup.find(id="synopsis")
if not synopsis:
logging.warning("{} don't have synopsis".format(c))
else:
for tag in synopsis.find_all(True):
if tag.has_attr("id"):
if tag["id"] in items.keys():
t = items[tag["id"]]
else:
t = "Function"
logging.warning("ID {} don't exist in records.".format(tag["id"]))
i = tag.find_all("a", class_="reference internal")
for x in i:
# handle 404
if not str(x["href"]).startswith("https://doc.qt.io"):
res_.append((x.get_text(), t, os.path.join(os.path.dirname(c[-1]), x["href"])))
return res_
# deduplicate
def deduplicate(items: list, name: str):
set_items = set(items)
print('The number of {} is {}, and the number is {} after removing duplicated items.'
.format(name, len(items), len(set_items)))
return list(set_items)
def get_functions(classes: list):
with Pool(num_worker) as p:
res = p.map(functions_single, classes)
res = [item for items in res for item in items]
return res
def generate_docset():
if os.path.exists(docset_folder):
shutil.rmtree(docset_folder)
shutil.copytree("./doc.qt.io", os.path.join(docset_folder, "Contents/Resources/Documents"))
info_plist = dict(
CFBundleIdentifier="qtforpython",
CFBundleName="Qt for Python",
DocSetPlatformFamily="qtforpython",
isDashDocset=True,
dashIndexFilePath="qtforpython/index.html",
isJavaScriptEnabled=True
)
with open(os.path.join(docset_folder, "Contents/Info.plist"), "wb") as f:
plistlib.dump(info_plist, f)
def write_to_sqlite(doc_set: list):
print('Writing to sqlite.... It may take seconds... Please wait...')
conn = sqlite3.connect(os.path.join(docset_folder, 'Contents/Resources/docSet.dsidx'))
cur = conn.cursor()
try:
cur.execute('DROP TABLE searchIndex;')
except sqlite3.OperationalError:
pass
cur.execute('CREATE TABLE searchIndex (id INTEGER PRIMARY KEY, name TEXT, type TEXT, path TEXT);')
cur.execute('CREATE UNIQUE INDEX anchor ON searchIndex (name, type, path);')
for item in doc_set:
cur.executemany('INSERT OR IGNORE INTO searchIndex (name, type, path) VALUES (?,?,?)', [item])
print(item, end='\n')
conn.commit()
conn.close()
if __name__ == '__main__':
guides_1 = list(get_guides_1())
guides_2 = list(get_guides_2())
guides_3 = list(get_guides_3())
guides = []
guides.extend(guides_1)
guides.extend(guides_2)
guides.extend(guides_3)
guides = deduplicate(guides, 'guides')
modules = list(get_modules())
modules = deduplicate(modules, 'modules')
classes = list(get_classes())
classes = deduplicate(classes, 'classes')
functions = get_functions(classes)
functions = deduplicate(functions, 'functions')
docs = []
docs.extend(guides)
docs.extend(modules)
docs.extend(classes)
docs.extend(functions)
generate_docset()
write_to_sqlite(docs)
print('Okay! Done! Have fun coding')