-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathlabel_maker.py
49 lines (38 loc) · 1.47 KB
/
label_maker.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
#!/usr/bin/python
import os
from pathlib import Path
from datetime import datetime
from google.cloud import vision
LOG_PATH = Path("logs")
PATH_TO_IMAGES = Path(os.environ["PHOTOS_DIR"])
client = vision.ImageAnnotatorClient()
POTENTIAL_DOCUMENT = ['font', 'material property', 'parallel', 'stationery', 'recipe', 'paper', 'paper product',
'letter', 'document', 'post-it note', 'screenshot', '']
def is_paper(labels: list):
test = [x for x in labels if x in POTENTIAL_DOCUMENT]
match_per = len(test) / len(POTENTIAL_DOCUMENT)
if len(test) > 0:
return True, match_per
return False, match_per
def get_file():
images = [".jpg", ".png"]
for p in PATH_TO_IMAGES.glob("**/*.*"):
if p.suffix.lower() in images:
yield p
def label_maker():
log = LOG_PATH / f"potential_documents_{datetime.now()}.log"
with log.open('w') as logfh:
for f in get_file():
print(f"Examining: {f}")
with f.open('rb') as fh:
content = fh.read()
image = vision.Image(content=content)
response = client.label_detection(image=image)
labels = response.label_annotations
labels = [x.description.lower() for x in labels]
potential, percentage = is_paper(labels)
if potential:
print(f"{f} is probably a document.")
logfh.write(f"{f}: {percentage} {labels}\n")
if __name__ == "__main__":
label_maker()