forked from stefan-jansen/machine-learning-for-trading
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscrape_test.py
74 lines (58 loc) · 2.06 KB
/
scrape_test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
#!/usr/bin/env python
# -*- coding: utf-8 -*-
__author__ = 'Stefan Jansen'
from bs4 import BeautifulSoup
from pathlib import Path
from selenium import webdriver
from selenium.webdriver.support import expected_conditions
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from time import sleep
import pickle
import requests
from lxml import html
from os import environ
EMAIL = environ['SEEKING_ALPHA_USER']
PASS = environ['SEEKING_ALPHA_PWD']
driver = webdriver.Chrome()
url = 'http://seekingalpha.com/account/login'
# url = 'https://seekingalpha.com/'
driver.get(url)
driver.find_element_by_id("sign-in").click()
# box = 'alphabox-modal-window'
sleep(1)
try:
email = driver.find_element_by_id("authentication_login_email")
email.send_keys(EMAIL)
except Exception as e:
print(e)
try:
password = driver.find_element_by_id('authentication_login_password')
password.send_keys(PASS)
except Exception as e:
print(e)
try:
driver.find_element_by_xpath("//input[@value='Sign in' and @class='c']").click()
# WebDriverWait(driver, 10).until(expected_conditions.title_contains("home"))
except Exception as e:
print(e)
# html = driver.page_source
sleep(10)
cookies = driver.get_cookies()
pickle.dump(cookies, open('SA_cookies.pkl', 'wb'))
driver.close()
# exit()
# WebDriverWait(driver, 10).until(expected_conditions.title_contains("home"))
sessionRequests = requests.Session()
# This is the form data that the page sends when logging in
loginData = {
'slugs[]' : None,
'rt' : None,
'user[url_source]' : None,
'user[location_source]': 'orthodox_login',
'user[email]' : keys['username'],
'user[password]' : keys['password'],
}
# Authenticate
r = sessionRequests.post(loginUrl, data=loginData, headers={"Referer" : "http://seekingalpha.com/",
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36"})