This repository has been archived by the owner on May 24, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
constants.py
128 lines (99 loc) · 4.54 KB
/
constants.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
import numpy as np
import tensorflow as tf
from collections import namedtuple
from os.path import join as path_join
from pickle import dump as pickle_dump
from tensorflow.python.lib.io.tf_record import TFRecordWriter
NUM_TRAIN_IMAGES = 12371293
NUM_CLASSES = 5270
DATA_SET_FOLDER = '/Users/Sophie/Documents/cdiscount-data/'
CATEGORY_NAMES_FILE_NAME = path_join(DATA_SET_FOLDER, 'category_names.csv')
BSON_DATA_FILE_NAME = path_join(DATA_SET_FOLDER, 'train_example.bson')
TRAIN_TF_DATA_FILE_NAME = path_join(DATA_SET_FOLDER, 'train_train.tfrecord')
TRAIN_VAL_TF_DATA_FILE_NAME = path_join(DATA_SET_FOLDER, 'train_val.tfrecord')
VALIDATION_TF_DATA_FILE_NAME = path_join(DATA_SET_FOLDER, 'validation_all.tfrecord')
TEST_TF_DATA_FILE_NAME = path_join(DATA_SET_FOLDER, 'test.tfrecord')
VALIDATION_PICKLE_DATA_FILE_NAME = path_join(DATA_SET_FOLDER, 'validation_minor.pickle')
IMAGE_WIDTH = 180
IMAGE_HEIGHT = 180
IMAGE_CHANNELS = 3
IMAGE_SIZE = IMAGE_HEIGHT * IMAGE_WIDTH * IMAGE_CHANNELS
MEAN_PIXEL_VALUE = [199.02578378, 195.51931958, 192.0718313]
DataPipeline = namedtuple('DataPipeline', ['reader', 'data_pattern', 'batch_size', 'num_threads'])
ConvFilterShape = namedtuple('ConvFilterShape', ['filter_height', 'filter_width', 'in_channels', 'out_channels'])
def make_summary(name, value):
"""Creates a tf.Summary proto with the given name and value."""
summary = tf.Summary()
val = summary.value.add()
val.tag = str(name)
val.simple_value = float(value)
return summary
def compute_accuracy(labels=None, predictions=None):
"""
Compute accuracy for a batch of labels and predictions.
Each element is treated as an example.
:param labels: The true labels.
:param predictions: The predicted labels.
:return: The accuracy.
"""
labels = np.array(labels, dtype=np.int32)
if len(labels.shape) == 2:
labels = np.argmax(labels, -1)
return np.sum(np.equal(labels, predictions)) / np.size(labels)
compute_accuracy.__name__ = 'accuracy'
def random_split_tf_record(file, filenames, ratios=(0.5, 0.5)):
"""
Randomly split a tf record into two parts (evenly by default).
:param file:
:param filenames:
:param ratios:
:return:
"""
assert (len(filenames) == len(ratios)) and (len(filenames) == 2), 'Support two parts only'
if tf.gfile.Exists(filenames[0]) or tf.gfile.Exists(filenames[1]):
raise FileExistsError('File exists. Continuing will overwrite it. Abort!')
ratio = ratios[0] / sum(ratios)
with TFRecordWriter(filenames[0]) as tfwriter1, TFRecordWriter(filenames[1]) as tfwriter2:
for example in tf.python_io.tf_record_iterator(file):
if np.random.rand(1) <= ratio:
tfwriter1.write(example)
else:
tfwriter2.write(example)
tfwriter1.flush()
tfwriter2.flush()
def convert_to_pickle(tf_file, filename):
imgs = []
labels = []
# No need to perform initialization in this simple program
with tf.Graph().as_default() as g:
pl = tf.placeholder(tf.string, shape=[])
img = tf.image.decode_jpeg(pl, channels=3)
img.set_shape([180, 180, None])
with tf.Session(graph=g) as sess:
for example in tf.python_io.tf_record_iterator(tf_file):
feature = tf.train.Example.FromString(example).features.feature
label = feature['category_id'].int64_list.value[0]
labels.append(label)
raw_img = feature['img'].bytes_list.value[0]
image = sess.run(img, feed_dict={pl: raw_img})
imgs.append(image)
with open(filename, mode='wb') as pickle_f:
pickle_dump((np.array(imgs, dtype=np.uint8), np.array(labels, dtype=np.int32)), pickle_f)
def convert_to_npz(tf_file, filename):
imgs = []
labels = []
# No need to perform initialization in this simple program
with tf.Graph().as_default() as g:
pl = tf.placeholder(tf.string, shape=[])
img = tf.image.decode_jpeg(pl, channels=3)
img.set_shape([180, 180, None])
with tf.Session(graph=g) as sess:
for example in tf.python_io.tf_record_iterator(tf_file):
feature = tf.train.Example.FromString(example).features.feature
label = feature['category_id'].int64_list.value[0]
labels.append(label)
raw_img = feature['img'].bytes_list.value[0]
image = sess.run(img, feed_dict={pl: raw_img})
imgs.append(image)
np.savez_compressed(filename, validation_data=np.array(imgs, dtype=np.uint8),
validation_labels=np.array(labels, dtype=np.int32))