-
Notifications
You must be signed in to change notification settings - Fork 112
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Loss and scoring changes #14
Changes from 3 commits
4c53913
f6ac03d
29ae87f
1d85344
3624e38
675cbba
29328e6
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,23 +3,22 @@ | |
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stderr", | ||
"output_type": "stream", | ||
"text": [ | ||
"Using Theano backend.\n" | ||
"Using TensorFlow backend.\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"from keras.datasets import mnist\n", | ||
"from keras.utils.np_utils import to_categorical\n", | ||
"from devol import DEvol, GenomeHandler\n", | ||
"import numpy as np" | ||
"import numpy as np\n", | ||
"from keras import backend as K\n" | ||
] | ||
}, | ||
{ | ||
|
@@ -32,15 +31,36 @@ | |
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 2, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [], | ||
"execution_count": 15, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"(60000, 28, 28)" | ||
] | ||
}, | ||
"execution_count": 15, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"(x_train, y_train), (x_test, y_test) = mnist.load_data()\n", | ||
"x_train = x_train.reshape(x_train.shape[0], 1, 28, 28).astype('float32') / 255\n", | ||
"x_test = x_test.reshape(x_test.shape[0], 1, 28, 28).astype('float32') / 255\n", | ||
"x_train.shape" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 16, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# set image format so that it's compatable with backend\n", | ||
"image_format = (28, 28, 1) if K.image_data_format() is 'channels_last' else (1, 28, 28 )\n", | ||
"\n", | ||
"x_train = x_train.reshape((x_train.shape[0],) + image_format).astype('float32') / 255\n", | ||
"x_test = x_test.reshape((x_test.shape[0],) + image_format).astype('float32') / 255\n", | ||
"y_train = to_categorical(y_train)\n", | ||
"y_test = to_categorical(y_test)\n", | ||
"dataset = ((x_train, y_train), (x_test, y_test))" | ||
|
@@ -56,11 +76,19 @@ | |
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": true | ||
}, | ||
"outputs": [], | ||
"execution_count": 20, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stderr", | ||
"output_type": "stream", | ||
"text": [ | ||
"\n", | ||
"\n", | ||
"\u001b[A\u001b[A" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"max_conv_layers = 6\n", | ||
"max_dense_layers = 2 # including final softmax layer\n", | ||
|
@@ -85,62 +113,47 @@ | |
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": false | ||
"scrolled": false | ||
}, | ||
"outputs": [ | ||
{ | ||
"name": "stderr", | ||
"output_type": "stream", | ||
"text": [ | ||
"Generations: 0%| | 0/10 [00:00<?, ?it/s]\n", | ||
"Gen 1 Models Fitness Eval: 0%| | 0/20 [00:00<?, ?it/s]" | ||
] | ||
}, | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"Genome encoding and accuracy data stored at Fri May 26 09:37:46 2017.csv \n", | ||
"\n" | ||
] | ||
}, | ||
{ | ||
"name": "stderr", | ||
"output_type": "stream", | ||
"text": [ | ||
"\u001b[A" | ||
] | ||
} | ||
], | ||
"outputs": [], | ||
"source": [ | ||
"num_generations = 10\n", | ||
"population_size = 20\n", | ||
"num_epochs = 1\n", | ||
"\n", | ||
"devol = DEvol(genome_handler)\n", | ||
"model, accurracy = devol.run(dataset, num_generations, population_size, num_epochs)\n", | ||
"print model.summary()" | ||
"model, accurracy, loss = devol.run(dataset, num_generations, population_size, num_epochs)\n", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This also needs to be added to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. silly me, I didn't even look in there! I have updated it for the return values of |
||
"model.summary()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": true | ||
}, | ||
"outputs": [], | ||
"source": [] | ||
} | ||
], | ||
"metadata": { | ||
"anaconda-cloud": {}, | ||
"kernelspec": { | ||
"display_name": "Python [default]", | ||
"display_name": "Python 3", | ||
"language": "python", | ||
"name": "python2" | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 2 | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython2", | ||
"version": "2.7.12" | ||
"pygments_lexer": "ipython3", | ||
"version": "3.6.0" | ||
} | ||
}, | ||
"nbformat": 4, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,6 @@ | ||
from __future__ import print_function | ||
|
||
|
||
from genome_handler import GenomeHandler | ||
import numpy as np | ||
from keras.models import Sequential | ||
|
@@ -11,98 +12,145 @@ | |
import csv | ||
from tqdm import trange, tqdm | ||
import sys | ||
import operator | ||
|
||
METRIC_OPS = [operator.__lt__, operator.__gt__] | ||
METRIC_OBJECTIVES = [min, max] | ||
|
||
|
||
class DEvol: | ||
|
||
def __init__(self, genome_handler, data_path=""): | ||
self.genome_handler = genome_handler | ||
self.datafile = data_path or (datetime.now().ctime() + '.csv') | ||
self.bssf = (None, 0.) # model, accuracy | ||
self.bssf = (None, float('inf'), 0.) # model, loss, accuracy | ||
|
||
print("Genome encoding and accuracy data stored at", self.datafile, "\n") | ||
|
||
def set_objective(self, metric): | ||
"""set the metric and objective for this search should be 'accuracy' or 'loss'""" | ||
if metric is 'acc': | ||
metric = 'accuracy' | ||
if not metric in ['loss', 'accuracy']: | ||
raise ValueError( | ||
'Invalid metric name {} provided - should be "accuracy" or "loss"'.format(metric)) | ||
self.metric = metric | ||
self.objective = "max" if self.metric is "accuracy" else "min" | ||
self.metric_index = 1 if self.metric is 'loss' else -1 | ||
self.metric_op = METRIC_OPS[self.objective is 'max'] | ||
self.metric_objective = METRIC_OBJECTIVES[self.objective is 'max'] | ||
|
||
# Create a population and evolve | ||
# Returns best model found in the form of (model, accuracy) | ||
def run(self, dataset, num_generations, pop_size, epochs, fitness=None): | ||
# Returns best model found in the form of (model, loss, accuracy) | ||
def run(self, dataset, num_generations, pop_size, epochs, fitness=None, metric='accuracy'): | ||
"""run genetic search on dataset given number of generations and population size | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These comments are great - it'd probably be good to get rid of the preexisting ones on line 43-44 since these take their place |
||
|
||
Args: | ||
dataset : tuple or list of numpy arrays in form ((train_data, train_labels), (validation_data, validation_labels)) | ||
num_generations (int): number of generations to search | ||
pop_size (int): initial population size | ||
epochs (int): epochs to run each search, passed to keras model.fit -currently searches are | ||
curtailed if no improvement is seen in 1 epoch | ||
fitness (None, optional): scoring function to be applied to population scores, will be called on a numpy array | ||
which is a min/max scaled version of evaluated model metrics, so | ||
It should accept a real number including 0. If left as default just the min/max | ||
scaled values will be used. | ||
metric (str, optional): must be "accuracy" or "loss" , defines what to optimize during search | ||
|
||
Returns: | ||
(keras model, float, float ): best model found in the form of (model, loss, accuracy) | ||
""" | ||
self.set_objective(metric) | ||
generations = trange(num_generations, desc="Generations") | ||
(self.x_train, self.y_train), (self.x_test, self.y_test) = dataset | ||
# Generate initial random population | ||
members = [self.genome_handler.generate() for _ in range(pop_size)] | ||
fit = [] | ||
# where to look for our metric in bssf.. | ||
metric_index = 1 if self.metric is 'loss' else -1 | ||
for i in trange(len(members), desc="Gen 1 Models Fitness Eval"): | ||
loss, acc, model = self.evaluate(members[i], epochs) | ||
if acc > self.bssf[1]: | ||
self.bssf = (model, acc) | ||
fit.append(acc) | ||
pop = Population(members, fit, fitness) | ||
res = self.evaluate(members[i], epochs) | ||
v = res[metric_index] | ||
if self.metric_op(v, self.bssf[metric_index]): | ||
self.bssf = res | ||
fit.append(v) | ||
|
||
fit = np.array(fit) | ||
tqdm.write("Generation 1:\t\tmax: {0}\t\taverage: {1}\t\tstd: {2}".format(max(fit), np.mean(fit), np.std(fit))) | ||
pop = Population(members, fit, fitness, obj=self.objective) | ||
|
||
tqdm.write("Generation 1:\t\tbest {3}: {0:0.4f}\t\taverage: {1:0.4f}\t\tstd: {2:0.4f}".format(self.metric_objective(fit), | ||
np.mean(fit), np.std(fit), self.metric)) | ||
# Evolve over generations | ||
for gen in generations: | ||
if gen == 0: | ||
continue | ||
members = [] | ||
for i in range(int(pop_size*0.95)): # Crossover | ||
for i in range(int(pop_size * 0.95)): # Crossover | ||
members.append(self.crossover(pop.select(), pop.select())) | ||
members += pop.getBest(pop_size - int(pop_size*0.95)) | ||
for i in range(len(members)): # Mutation | ||
members += pop.getBest(pop_size - int(pop_size * 0.95)) | ||
for i in range(len(members)): # Mutation | ||
members[i] = self.mutate(members[i], gen) | ||
fit = [] | ||
for i in trange(len(members), desc="Gen %i Models Fitness Eval" % (gen + 1)): | ||
loss, acc, model = self.evaluate(members[i], epochs) | ||
if acc > self.bssf[1]: | ||
self.bssf = (model, acc) | ||
fit.append(acc) | ||
pop = Population(members, fit, fitness) | ||
res = self.evaluate(members[i], epochs) | ||
v = res[metric_index] | ||
if self.metric_op(v, self.bssf[metric_index]): | ||
self.bssf = res | ||
fit.append(v) | ||
fit = np.array(fit) | ||
tqdm.write("Generation {3}:\t\tmax: {0}\t\taverage: {1}\t\tstd: {2}".format(max(fit), np.mean(fit), np.std(fit), gen + 1)) | ||
|
||
pop = Population(members, fit, fitness, obj=self.objective) | ||
|
||
tqdm.write("Generation {3}:\t\tbest {4}: {0:0.4f}\t\taverage: {1:0.4f}\t\tstd: {2:0.4f}".format(self.metric_objective(fit), | ||
np.mean(fit), np.std(fit), gen + 1, self.metric)) | ||
return self.bssf | ||
|
||
def evaluate(self, genome, epochs): | ||
model = self.genome_handler.decode(genome) | ||
loss, accuracy = None, None | ||
model.fit(self.x_train, self.y_train, validation_data=(self.x_test, self.y_test), | ||
epochs=epochs, | ||
verbose=0, | ||
callbacks=[EarlyStopping(monitor='val_loss', patience=1, verbose=0)]) | ||
epochs=epochs, | ||
verbose=0, | ||
callbacks=[EarlyStopping(monitor='val_loss', patience=1, verbose=0)]) | ||
loss, accuracy = model.evaluate(self.x_test, self.y_test, verbose=0) | ||
|
||
# Record the stats | ||
with open(self.datafile, 'a') as csvfile: | ||
writer = csv.writer(csvfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) | ||
writer = csv.writer(csvfile, delimiter=',', | ||
quotechar='"', quoting=csv.QUOTE_MINIMAL) | ||
row = list(genome) + [loss, accuracy] | ||
writer.writerow(row) | ||
writer.writerow(row) | ||
return model, loss, accuracy | ||
|
||
return loss, accuracy, model | ||
|
||
def crossover(self, genome1, genome2): | ||
crossIndexA = rand.randint(0, len(genome1)) | ||
child = genome1[:crossIndexA] + genome2[crossIndexA:] | ||
return child | ||
|
||
def mutate(self, genome, generation): | ||
num_mutations = max(3, generation / 4) # increase mutations as program continues | ||
# increase mutations as program continues | ||
num_mutations = max(3, generation / 4) | ||
return self.genome_handler.mutate(genome, num_mutations) | ||
|
||
|
||
class Population: | ||
|
||
def __len__(self): | ||
return len(self.members) | ||
|
||
def __init__(self, members, fitnesses, score): | ||
def __init__(self, members, fitnesses, score, obj='max'): | ||
self.members = members | ||
fitnesses -= min(fitnesses) | ||
fitnesses /= max(fitnesses) | ||
self.scores = list(map(score or self.score, fitnesses)) | ||
scores = fitnesses - fitnesses.min() | ||
scores /= scores.max() | ||
if obj is 'min': | ||
scores = 1 - scores | ||
if score: | ||
self.scores = score(scores) | ||
else: | ||
self.scores = scores | ||
self.s_fit = sum(self.scores) | ||
|
||
def score(self, fitness): | ||
return (fitness * 100)**4 | ||
|
||
def getBest(self, n): | ||
combined = [(self.members[i], self.scores[i]) \ | ||
for i in range(len(self.members))] | ||
combined = [(self.members[i], self.scores[i]) | ||
for i in range(len(self.members))] | ||
sorted(combined, key=(lambda x: x[1]), reverse=True) | ||
return [x[0] for x in combined[:n]] | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
# DEvol - Deep Neural Network Evolution | ||
f# DEvol - Deep Neural Network Evolution | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Looks like you've got a typo here |
||
|
||
DEvol (DeepEvolution) utilizes genetic programming to automatically architect a deep neural network with optimal hyperparameters for a given dataset using the Keras library. This approach should design an equal or superior model to what a human could design when working under the same constraints as are imposed upon the genetic program (e.g., maximum number of layers, maximum number of convolutional filters per layer, etc.). The current setup is designed for classification problems, though this could be extended to include any other output type as well. | ||
|
||
|
@@ -43,6 +43,6 @@ DEvol is pretty straightforward to use for basic classification problems. See `d | |
|
||
1. **Prep your dataset.** DEvol expects a classification problem with labels that are one-hot encoded as it uses `categorical_crossentropy` for its loss function. Otherwise, you can prep your data however you'd like. Just pass your input shape into `GenomeHandler`. | ||
2. **Create a `GenomeHandler`.** The `GenomeHandler` defines the constraints that you apply to your models. Specify the maximum number of convolutional and dense layers, the max dense nodes and feature maps, and the input shape. You can also specify whether you'd like to allow batch_normalization, dropout, and max_pooling, which are included by default. You can also pass in a list of optimizers and activation functions you'd like to allow. | ||
3. **Create and run the DEvol.** Pass your `GenomeHandler` to the `DEvol` constructor, and run it. Here you have a few more options such as the number of generations, the population size, epochs used for fitness evaluation, and an (optional) fitness function which converts a model's accuracy into a fitness score. | ||
3. **Create and run the DEvol.** Pass your `GenomeHandler` to the `DEvol` constructor, and run it. Here you have a few more options such as the number of generations, the population size, epochs used for fitness evaluation, the evaluation metric to optimize (accuracy or loss) and an (optional) fitness function which converts a model's accuracy or loss into a fitness score. | ||
|
||
See `demo.ipynb` for a basic example. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think it might be cleaner here to just go
K.set_image_data_format("channels_first")
and keeping(1, 28, 28)
rather than changing the shape of the images based on their current setting.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
wow I was unaware
K.set_image_data_format()
actually changes the fact what tf or theano expects, I was under the impression sets the format in keras. but i can see that it works* in tf.I have to very very respectfully put my two cents in here and say that generally I think people use one main backend and are used to seeing dimensions ordered that way. I personally think a backend check is more polite to other backend users, and it's also the way it's handled in keras examples, I think i'd find it a bit annoying if someone changed my expected dim ordering.
But of course it's just a demo and if theano dims are your preference that's totally fine. Maybe we should just comment that we're changing the dims to theano style if we do that, particularly since it's a one liner it could be missed and could be a faff for someone used to using tf and who wants to play with the notebook and import some of their own images etc... also for new users this backend dimensions thing can be confusing.
what do you think?
if you want me to just shut up and do
K.set_image_data_format("channels_first")
that fine.*I am concerned about the batch normalization though for theano since it should be checking backend in genome_handler and setting axis to 1 for theano users since it's using the default which is -1 for tensorflow. Since you seem to use theano I would think this would be messing up your results on batch normalized networks? Have you had any issues?
I can open a separate PR for this... but I'm sure you'll agree that while changing dim ordering is fine in demo we should not be changing backend dim ordering in actual library since it would really mess people up and need to implement a proper check and a global or something for bn axis.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Well,
K.set_image_data_format
doesn't permanently change your backend configuration in yourkeras.conf
file or anything - it just changes it within the scope that you're in, so I don' think it'll be a problem. If people prefer it the other way then they can just swap them. I'm more concerned about the code looking simple and straightforward, and inferring the backend is much less conventional and straightforward looking than setting it. That said, it may be best to set it tochannels_last
as I believe that's tensorflow's default, and therefore more people will be using it that way. A comment is fine if you'd like, but I don't think it's needed.Until you mentioned it, I've never heard of problems with BN with Theano... Is it not supported or something? It doesn't say anything like that in the Keras docs. I've used Theano and Tensorflow and have never had problems (at least not that I was aware of) with it.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
cool I shall update to
channels_last
this as you say.Re BN keras default axis for bn is -1 (channels_last) so if you're using Theano and channels_first config it should be set to 1 to normalize along the feature axis.
see docstring at BatchNormalization
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Wow. Great catch. Yeah, we should add that in.