-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
77 lines (60 loc) · 2.55 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
"""Main script for this webscraper to get data from Yahoo Finance.
Prints regular progress messages in standard output to update on the progress
of the webscraper. Logs are stored in default file called scraper.log.
"""
import numpy as np
import pandas as pd
import functions
def main() -> None:
# Enable logging.
functions.configure_logs()
# Correctly format the skeleton Excel table before use.
functions.fix_ticker_formatting(
filename="original_data.xlsx",
save_filename="ready_input_data.xlsx",
column="A",
)
data_file: str = "ready_input_data.xlsx" # Formatted table.
input_df = pd.read_excel(data_file)
work_df = input_df.copy() # To avoid working with original df.
df_length: int = len(work_df)
print("Status: Starting webscrape.\n")
for i in range(df_length):
ticker: str = work_df["Ticker"][i]
print(f"\tStatus: # {i+1}/{df_length} Currently scraping data: {ticker}\n")
# Getting the url's for a ticker.
price_url, bal_url, inc_url = functions.get_urls(ticker)
# Getting the data for a ticker.
price20, price19, price18, price17 = functions.get_hist_price(price_url, ticker)
functions.generate_rand_delay(lower=0, upper=5)
shares20, shares19, debt20, debt19 = functions.get_debt_shares(bal_url, ticker)
functions.generate_rand_delay(lower=0, upper=5)
rev20, rev19, ebit20, ebit19 = functions.get_revenue_ebit(inc_url, ticker)
functions.generate_rand_delay(lower=0, upper=5)
# Entering data into dataframe.
work_df.loc[i, "Price 2020"] = price20
work_df.loc[i, "Price 2019"] = price19
work_df.loc[i, "Price 2018"] = price18
work_df.loc[i, "Price 2017"] = price17
work_df.loc[i, "Revenue 2020"] = rev20
work_df.loc[i, "Revenue 2019"] = rev19
work_df.loc[i, "Share Number 2020"] = shares20
work_df.loc[i, "Share Number 2019"] = shares19
work_df.loc[i, "Debt 2020"] = debt20
work_df.loc[i, "Debt 2019"] = debt19
work_df.loc[i, "EBIT 2020"] = ebit20
work_df.loc[i, "EBIT 2019"] = ebit19
# Export scraped data to Excel file.
work_df.replace(to_replace=-1.0, value=np.nan)
_file_name: str = str(
input(
"\nWhat filename to save as?"
"\n1. Use a unique filename!"
"\n2. Don't provide a file extension"
"\nEnter here: "
)
)
work_df.to_excel(f"{_file_name}.xlsx")
print("Status: Data exported!\nAll done!")
if __name__ == "__main__":
main()