-
Notifications
You must be signed in to change notification settings - Fork 0
/
NasaSpaceflight.py
36 lines (32 loc) · 1.33 KB
/
NasaSpaceflight.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import subprocess
import os
# https://forum.nasaspaceflight.com/index.php?topic=51332
# wget -e robots=off -r -k -np --accept-regex 'action=dlattach' --content-disposition -U Mozilla -w 3 'https://forum.nasaspaceflight.com/index.php?topic=51332'
# wget -U Mozilla -w 3 -O index.html 'https://forum.nasaspaceflight.com/index.php?topic=51332'
# Variables
topic = "51332"
pages = 156
page_start = 1
# ---------
URL = "https://forum.nasaspaceflight.com/index.php?topic="
complete_URL = URL + topic
print(complete_URL)
for page in range(page_start-1, pages):
print("Downloading page " + str(page+1))
# make dir
dir = str(page+1).zfill(len(str(pages)))
os.mkdir(dir)
os.chdir(dir)
counter = page * 20
# images
args = ['wget', '-e', 'robots=off', '-r', '-k', '-np','--accept-regex', 'action=dlattach', '--content-disposition', '-U', 'Mozilla', '-w', '2' , complete_URL+"."+str(counter)]
print(complete_URL+"."+str(counter))
process = subprocess.Popen(args, stdout=subprocess.PIPE)
stdout, stderr = process.communicate()
print("Images Downloaded")
# html page
args2 = ['wget', '-U', 'Mozilla', '-w', '2' , '-O', 'index.html', complete_URL+"."+str(counter)]
process2 = subprocess.Popen(args2, stdout=subprocess.PIPE)
stdout, stderr = process2.communicate()
print("HTML downloaded")
os.chdir('..')