-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathMCI_internet_coverage.py
50 lines (42 loc) · 1.52 KB
/
MCI_internet_coverage.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import re
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
import time
# URL of the MCI internet coverage map
urlpage = 'https://mci.ir/notrino-coverage-map'
# Set up the WebDriver for Chrome
driver = webdriver.Chrome()
# Initialize a list to store the scraped data
data = []
# Start loading the page
driver.get(urlpage)
# Wait for 2 seconds to ensure the page loads
time.sleep(2)
# Loop through page numbers (assuming a range of 1 to 294)
for i in range(1, 295):
# Find the link element with the current page number
element = driver.find_element(By.LINK_TEXT, str(i))
element.click()
# Wait for 1 second to allow the data to load
time.sleep(1)
# Get the HTML source of the page
htmlSource = driver.page_source
# Use regular expressions to extract the data from HTML
k = re.findall("<td>(.+?)</td><td>(.+?)</td><td class=\" en-text\">(.+?)</td>", htmlSource)
for item in k:
temp = {}
item = list(item)
# Assign data to appropriate columns
temp['Province'] = item[0]
temp['Region'] = item[1]
temp['Status'] = item[2]
temp['3G'] = 1 if '3G' in item[2] else 0
temp['4G'] = 1 if '4G' in item[2] else 0
temp['4.5G'] = 1 if '4.5G' in item[2] else 0
# Append the data to the list
data.append(temp)
# Close the WebDriver
driver.quit()
# Create a DataFrame from the collected data and save it as an Excel file
pd.DataFrame(data).to_excel('MCI_internet_coverage.xlsx', index=None)