-
Notifications
You must be signed in to change notification settings - Fork 34
/
Collecting NYTimes Data.py
79 lines (64 loc) · 2.56 KB
/
Collecting NYTimes Data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""
Created on Mon Mar 20 00:10:29 2017
@author: Dinesh
"""
# Below part is not required
########## News API ########################################################
# from newsapi import NewsAPI
# key = '96af62a035db45bda517a9ca62a25ac3'
# params = {}
# api = NewsAPI(key)
# sources = api.sources(params)
# articles = api.articles(sources[0]['id'], params)
################ NY Times API #############################################
import sys, csv, json
reload(sys)
sys.setdefaultencoding('utf8')
import requests
"""
About:
Python wrapper for the New York Times Archive API
https://developer.nytimes.com/article_search_v2.json
"""
class APIKeyException(Exception):
def __init__(self, message): self.message = message
class InvalidQueryException(Exception):
def __init__(self, message): self.message = message
class ArchiveAPI(object):
def __init__(self, key=None):
"""
Initializes the ArchiveAPI class. Raises an exception if no API key is given.
:param key: New York Times API Key
"""
self.key = key
self.root = 'http://api.nytimes.com/svc/archive/v1/{}/{}.json?api-key={}'
if not self.key:
nyt_dev_page = 'http://developer.nytimes.com/docs/reference/keys'
exception_str = 'Warning: API Key required. Please visit {}'
raise NoAPIKeyException(exception_str.format(nyt_dev_page))
def query(self, year=None, month=None, key=None,):
"""
Calls the archive API and returns the results as a dictionary.
:param key: Defaults to the API key used to initialize the ArchiveAPI class.
"""
if not key: key = self.key
if (year < 1882) or not (0 < month < 13):
# currently the Archive API only supports year >= 1882
exception_str = 'Invalid query: See http://developer.nytimes.com/archive_api.json'
raise InvalidQueryException(exception_str)
url = self.root.format(year, month, key)
r = requests.get(url)
return r.json()
# Replace below key with your NYTimes Developer key
api = ArchiveAPI('key')
years = [2016, 2015, 2014, 2013, 2012, 2011, 2010, 2009, 2008, 2007]
months = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
for year in years:
for month in months:
mydict = api.query(year, month)
file_str = '/Users/Dinesh/Documents/Project Stock predictions/data/nytimes/' + str(year) + '-' + '{:02}'.format(month) + '.json'
with open(file_str, 'w') as fout:
json.dump(mydict, fout)
fout.close()