-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscrape_names_and_links.py
53 lines (36 loc) · 1.29 KB
/
scrape_names_and_links.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import bs4
import requests
import re
def get_web_content():
page = requests.get(url="https://pokemondb.net/pokedex/national")
return page.content
def get_name_and_url(_html):
_html = str(_html)
name = re.search('alt="(.*) sprite', _html)
name = name.group(1)
url = re.search('src="(.*)"', _html)
url = url.group(1)
return name, url
if __name__ == "__main__":
content = get_web_content()
soup = bs4.BeautifulSoup(content, "html.parser")
gen_i_div = soup.find(class_="infocard-list infocard-list-pkmn-lg")
links = gen_i_div.findAll(class_="img-fixed img-sprite")
# links are in format of:
# <span class="img-fixed img-sprite" data-alt="Bulbasaur sprite"
# data-src="https://img.pokemondb.net/sprites/bank/normal/bulbasaur.png">
# </span>
pairings = []
for link in links:
name, url = get_name_and_url(link)
pairings.append((name, url))
with open("pokemon_image_links.txt", mode="a") as file:
for item in pairings:
entry = f"'{item[0]}' #start#{item[1]}#end#\n"
try:
entry = entry.encode(encoding="ascii")
entry = entry.decode()
except UnicodeEncodeError:
print(entry)
else:
file.write(entry)