-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathselenium_practice.py
91 lines (65 loc) · 3.32 KB
/
selenium_practice.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import time
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.chrome.options import Options
chrome_options = Options()
chrome_options.add_experimental_option("detach", True) # this keeps the window open post execution/errors/ changing frames (explained later)
prefs = {"download.default_directory": '/Users/justinpaul/Downloads', # note need to change this
"directory_upgrade": True}
chrome_options.add_experimental_option("prefs", prefs)
# driver = webdriver.Chrome('/usr/local/bin/chromedriver')
driver = webdriver.Chrome('/usr/local/bin/chromedriver', options=chrome_options)
driver.get('https://www.mdst.club/agenda')
WebDriverWait(driver, timeout=5) # make sure DOM has loaded elements
# like before, it is easy to get some identifying information about the site
print(f'title of site: {driver.title}')
# you can also access and set cookies
print(f'initial cookies: {driver.get_cookies()}')
cookie = {'name' : 'MDSTcookie', 'value' : 'yum'}
driver.add_cookie(cookie)
print(f'cookies: {driver.get_cookies()}')
print('_'*30)
# you can also scroll!
time.sleep(2) # don't put in time.sleep() normally, use WebDriverWaits... I do just to show what it looks like
# driver.execute_script("window.scrollTo(0,document.body.scrollHeight-300)")
# As per usual, you can still search for elements by ID/class/...
for x in driver.find_elements(By.CLASS_NAME, 'C9DxTc '):
print(x.text)
print('-'*30)
# The more standard/conventional way with selenium is with XPATH
for x in driver.find_elements(By.XPATH, '//span[@class="C9DxTc "]'):
print(x.text)
print('_'*30)
# Some elements you can not interact with directly
try:
driver.find_elements(By.XPATH, '//img[@id="navForward1"]')
except Exception as e:
print(e)
print("There is a google calendar iframe embedded in the website, so to interact with it, we need to change our driver from the site to the iframe.")
# this is the normal way to deal with embedded elements/iframes when it gives you problems
calendar = driver.find_element(By.XPATH, "//iframe[@jsname='L5Fo6c']")
# to switch specifically you can use
driver.switch_to.frame(calendar)
time.sleep(2)
WebDriverWait(driver, timeout=2)
driver.find_element(By.XPATH, '//img[@id="navForward1"]').click()
time.sleep(2)
# driver.find_elements(By.XPATH, '//span[@class="te-s"]')[-1].click() # this is equivalent
driver.find_elements(By.XPATH, '//*[@class="te-s"]')[-1].click()
time.sleep(2)
expo = driver.find_element(By.XPATH, '//div[@class="details"]')
# sample print out of card
print(expo.find_element(By.XPATH, '//span[@class="title"]').text)
print(expo.find_element(By.XPATH, '//span[@class="event-when"]').text)
links = expo.find_element(By.XPATH, '//span[@class="links"]')
for a_tag in links.find_elements(By.XPATH, '//a'):
print(a_tag.text, a_tag.get_attribute('href'), sep='\t')
# Screenshotting!
driver.save_screenshot('selenium_full_screen.png') # full screen
expo.screenshot('selenium_expo_card.png') # individual element
driver.switch_to.default_content() # set driver back to site, not iframe
WebDriverWait(driver, timeout=5)
# time.sleep(10) # can change this as well
driver.close() # if you comment this out, the window will stay open even after it is done executing