-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathimages.py
122 lines (92 loc) · 4.37 KB
/
images.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
"""Queries a Google Custom Search Engine for images.
This uses Google's `Custom Search API`. More details can be found at:
https://developers.google.com/custom-search/
The Python Client library info can be found at:
https://developers.google.com/api-client-library/python/apis/customsearch/v1
Follow the instructions there to download the Google API Python client. It might
be as simple as running:
pip install --upgrade google-api-python-client
A canonical example for using Image Search can be found at:
https://github.com/google/google-api-python-client/blob/master/samples/customsearch/main.py
"""
__author__ = 'shor.joel@gmail.com (Joel Shor)'
import logging
import os
import shutil
import urllib
import imghdr
from googleapiclient.discovery import build
def _fetch_single_image(word, destination_fn, service, credentials, max_tries=5):
"""Copies a web image to a destination on the local disk.
Args:
word: Word to find image for.
destination_fn: Destination filename for image.
service: The Google Client API service object.
credentials: The credentials object.
Returns:
`True` on success, `False` otherwise.
"""
res = service.cse().list(
q=word,
cx=credentials.images.cxString,
searchType="image",
fileType="png", # this is just to match the filename template defined in `main.py`
num=max_tries).execute()
# Copy images to disk until one works, or we run out of images and give up.
for search_result in res['items']:
img_url = search_result['link']
try:
logging.info('about to retrieve: %s', word)
urllib.urlretrieve(img_url, destination_fn)
logging.info('retrieved: %s', word)
except Exception as e:
logging.error('Failed on word / url: %s / %s', word, img_url)
logging.info(e)
continue
# Verify that the file is readable.
if not imghdr.what(destination_fn):
logging.error('Downloaded image, but was unreadable: word / filename: %s / %s', word, destination_fn)
continue
return True
return False
def get_images(filenames_to_write_imgs, credentials):
"""Fetch images from a Google Custom Search Engine.
Based on instructions for `Custom Search` at
https://developers.google.com/custom-search/docs/tutorial/creatingcse
and
https://developers.google.com/api-client-library/python/apis/customsearch/v1
and
https://github.com/google/google-api-python-client/tree/master/samples/customsearch.
Args:
filenames_to_write_imgs: A dictionary of {English word: full filename to copy image to}.
credentials: A object with Google CSE credentials.
Returns:
A list of words that failed.
"""
if not isinstance(filenames_to_write_imgs, dict):
raise ValueError('`filenames_to_write_imgs` must be a dict. Instead, was %s' % type(filenames_to_write_imgs))
# Build a service object for interacting with the API. Visit
# the Google APIs Console <http://code.google.com/apis/console>
# to get an API key for your own application.
service = build("customsearch", "v1", developerKey=credentials.images.developerKey)
# TODO(joelshor): Use `multiprocessing.Pool` to fetch many images in parallel.
words_that_failed = []
for word, destination_fn in filenames_to_write_imgs.items():
if not _fetch_single_image(word, destination_fn, service, credentials):
words_that_failed.append(word)
return words_that_failed
def copy_images_from_disk(filenames_to_write_imgs, media_dir, filename_regexp="image_%s.jpg"):
"""Copies images from one directory to another.
Args:
filenames_to_write_imgs: A dictionary of {English word: full filename to copy image to}.
media_dir: The location on disk we expect to find the files.
filename_regexp: The expected filename, with one spot for the word.
Raises:
ValueError: If any image file doesn't exist.
"""
for word, target_location in filenames_to_write_imgs.items():
existing_filename = os.path.join(media_dir, filename_regexp % word)
if not os.path.exists(existing_filename):
raise ValueError("Word `%s` was expecting image file %s, but it didn't "
"exist." % (word, existing_filename))
shutil.copyfile(existing_filename, target_location)