-
Notifications
You must be signed in to change notification settings - Fork 0
/
get_final_download_url.py
47 lines (39 loc) · 1.33 KB
/
get_final_download_url.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
from bs4 import BeautifulSoup
import requests
import random
def get_ip_list(url, headers):
web_data = requests.get(url, headers=headers)
soup = BeautifulSoup(web_data.text, 'lxml')
ips = soup.find_all('tr')
ip_list = []
for i in range(1, len(ips)):
ip_info = ips[i]
tds = ip_info.find_all('td')
ip_list.append(tds[1].text + ':' + tds[2].text)
return ip_list
def get_random_ip(ip_list):
proxy_list = []
for ip in ip_list:
proxy_list.append('http://' + ip)
proxy_ip = random.choice(proxy_list)
proxies = {'http': proxy_ip}
return proxies
url = 'http://www.xicidaili.com/nn/'
headers = {
# 'Host': 'm.toutiao.com',
'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.186 Mobile Safari/537.36'
}
ip_list = get_ip_list(url, headers=headers)
f = open('url')
url_list = f.readlines()
headers = {
'Host': 'm.toutiao.com',
'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.186 Mobile Safari/537.36'
}
for url in url_list:
proxies = get_random_ip(ip_list)
r = requests.get(url.strip(), headers=headers, proxies=proxies, allow_redirects=False)
try:
print(r.headers['location'])
except:
continue