-
Notifications
You must be signed in to change notification settings - Fork 0
/
script.py
33 lines (25 loc) · 908 Bytes
/
script.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
import requests
import threading
from bs4 import BeautifulSoup
def download_pdf(name: str):
BASE_URL = "https://pages.cs.wisc.edu/~remzi/OSTEP/"
BOOK_DIR = './'
url = BASE_URL + name
print(f"thread {threading.get_ident()}: downloading {url}...")
res = requests.get(url)
file_path = BOOK_DIR + name
with open(file_path, 'wb') as f:
f.write(res.content)
print(f"thread {threading.get_ident()}: download complete for {name}")
if __name__ == '__main__':
OSTEP_URL = "https://pages.cs.wisc.edu/~remzi/OSTEP/"
res = requests.get(OSTEP_URL)
soup = BeautifulSoup(res.text)
anchors = soup.find_all('a')
pdf_names = [
a.get('href')
for a in anchors if a.get('href') and a.get('href').endswith('.pdf')
]
for name in pdf_names:
thread = threading.Thread(target=download_pdf, args=(name,))
thread.start()