NVIDIA · lukeyeager · Mar 10, 2016 · Feb 26, 2016 · Feb 29, 2016
diff --git a/digits/inference/tasks/inference.py b/digits/inference/tasks/inference.py
@@ -105,7 +105,6 @@ def after_run(self):
         super(InferenceTask, self).after_run()
 
         # retrieve inference data
-        inputs = None
         visualizations = []
         outputs = {}
         if self.inference_data_filename is not None:
@@ -115,8 +114,9 @@ def after_run(self):
             # - layer activations and weights, if requested, in a group "/layers/"
             db = h5py.File(self.inference_data_filename, 'r')
 
-            # collect inputs
-            inputs = db['inputs'][...]
+            # collect paths and data
+            input_ids = db['input_ids'][...]
+            input_data = db['input_data'][...]
 
             # collect outputs
             for output_key, output_data in db['outputs'].items():
@@ -152,13 +152,12 @@ def after_run(self):
                 # sort by layer ID (as HDF5 ASCII sorts)
                 visualizations = sorted(visualizations,key=lambda x:x['id'])
             db.close()
+            # save inference data for further use
+            self.inference_inputs = {'ids': input_ids, 'data': input_data}
+            self.inference_outputs = outputs
+            self.inference_layers = visualizations
         self.inference_log.close()
 
-        # save inference to data for further use
-        self.inference_inputs = inputs
-        self.inference_outputs = outputs
-        self.inference_layers = visualizations
-
     @override
     def offer_resources(self, resources):
         reserved_resources = {}

diff --git a/digits/model/images/classification/views.py b/digits/model/images/classification/views.py
@@ -306,8 +306,8 @@ def classify_one():
 
     image = None
     predictions = []
-    if inputs is not None and len(inputs) == 1:
-        image = utils.image.embed_image_html(inputs[0])
+    if inputs is not None and len(inputs['data']) == 1:
+        image = utils.image.embed_image_html(inputs['data'][0])
         # convert to class probabilities for viewing
         last_output_name, last_output_data = outputs.items()[-1]
 
@@ -410,8 +410,25 @@ def classify_many():
     # delete job
     scheduler.delete_job(inference_job)
 
+    if outputs is not None and len(outputs) < 1:
+        # an error occurred
+        outputs = None
+
+    if inputs is not None:
+        # retrieve path and ground truth of images that were successfully processed
+        paths = [paths[idx] for idx in inputs['ids']]
+        ground_truths = [ground_truths[idx] for idx in inputs['ids']]
+
+    # defaults
     classifications = None
-    if len(outputs) > 0:
+    show_ground_truth = None
+    top1_accuracy = None
+    top5_accuracy = None
+    confusion_matrix = None
+    per_class_accuracy = None
+    labels = None
+
+    if outputs is not None:
         # convert to class probabilities for viewing
         last_output_name, last_output_data = outputs.items()[-1]
         if len(last_output_data) < 1:
@@ -423,28 +440,66 @@ def classify_many():
         indices = (-scores).argsort()[:, :5]
 
         labels = model_job.train_task().get_labels()
+        n_labels = len(labels)
+
+        # remove invalid ground truth
+        ground_truths = [x if x is not None and (0 <= x < n_labels) else None for x in ground_truths]
+
+        # how many pieces of ground truth to we have?
+        n_ground_truth = len([1 for x in ground_truths if x is not None])
+        show_ground_truth = n_ground_truth > 0
+
+        # compute classifications and statistics
         classifications = []
+        n_top1_accurate = 0
+        n_top5_accurate = 0
+        confusion_matrix = np.zeros((n_labels,n_labels), dtype=np.dtype(int))
         for image_index, index_list in enumerate(indices):
             result = []
+            if ground_truths[image_index] is not None:
+                if ground_truths[image_index] == index_list[0]:
+                    n_top1_accurate += 1
+                if ground_truths[image_index] in index_list:
+                    n_top5_accurate += 1
+                if (0 <= ground_truths[image_index] < n_labels) and (0 <= index_list[0] < n_labels):
+                   confusion_matrix[ground_truths[image_index], index_list[0]] += 1
             for i in index_list:
                 # `i` is a category in labels and also an index into scores
                 result.append((labels[i], round(100.0*scores[image_index, i],2)))
             classifications.append(result)
 
+        # accuracy
+        if show_ground_truth:
+            top1_accuracy = round(100.0 * n_top1_accurate / n_ground_truth, 2)
+            top5_accuracy = round(100.0 * n_top5_accurate / n_ground_truth, 2)
+            per_class_accuracy = []
+            for x in xrange(n_labels):
+                n_examples = sum(confusion_matrix[x])
+                per_class_accuracy.append(round(100.0 * confusion_matrix[x,x] / n_examples, 2) if n_examples > 0 else None)
+        else:
+            top1_accuracy = None
+            top5_accuracy = None
+            per_class_accuracy = None
+
         # replace ground truth indices with labels
-        ground_truths = [labels[x] if x is not None and (0 <= x < len(labels)) else None for x in ground_truths]
+        ground_truths = [labels[x] if x is not None and (0 <= x < n_labels ) else None for x in ground_truths]
 
     if request_wants_json():
         joined = dict(zip(paths, classifications))
         return flask.jsonify({'classifications': joined})
     else:
         return flask.render_template('models/images/classification/classify_many.html',
-                model_job       = model_job,
-                job             = inference_job,
-                paths           = paths,
-                classifications = classifications,
-                show_ground_truth= not(ground_truths == [None]*len(ground_truths)),
-                ground_truths   = ground_truths
+                model_job          = model_job,
+                job                = inference_job,
+                paths              = paths,
+                classifications    = classifications,
+                show_ground_truth  = show_ground_truth,
+                ground_truths      = ground_truths,
+                top1_accuracy      = top1_accuracy,
+                top5_accuracy      = top5_accuracy,
+                confusion_matrix   = confusion_matrix,
+                per_class_accuracy = per_class_accuracy,
+                labels             = labels,
                 )
 
 @blueprint.route('/top_n', methods=['POST'])
@@ -512,7 +567,7 @@ def top_n():
     scheduler.delete_job(inference_job)
 
     results = None
-    if len(outputs) > 0:
+    if outputs is not None and len(outputs) > 0:
         # convert to class probabilities for viewing
         last_output_name, last_output_data = outputs.items()[-1]
         scores = last_output_data
@@ -521,14 +576,15 @@ def top_n():
             raise RuntimeError('An error occured while processing the images')
 
         labels = model_job.train_task().get_labels()
+        images = inputs['data']
         indices = (-scores).argsort(axis=0)[:top_n]
         results = []
         # Can't have more images per category than the number of images
-        images_per_category = min(top_n, len(inputs))
+        images_per_category = min(top_n, len(images))
         for i in xrange(indices.shape[1]):
             result_images = []
             for j in xrange(images_per_category):
-                result_images.append(inputs[indices[j][i]])
+                result_images.append(images[indices[j][i]])
             results.append((
                     labels[i],
                     utils.image.embed_image_html(

diff --git a/digits/model/images/generic/views.py b/digits/model/images/generic/views.py
@@ -286,8 +286,8 @@ def infer_one():
         pass
 
     image = None
-    if inputs is not None and len(inputs) == 1:
-        image = utils.image.embed_image_html(inputs[0])
+    if inputs is not None and len(inputs['data']) == 1:
+        image = utils.image.embed_image_html(inputs['data'][0])
 
     if request_wants_json():
         return flask.jsonify({'outputs': dict((name, blob.tolist()) for name,blob in outputs.iteritems())})
@@ -373,10 +373,13 @@ def infer_many():
     # delete job folder and remove from scheduler list
     scheduler.delete_job(inference_job)
 
-    if len(outputs) < 1:
+    if outputs is not None and len(outputs) < 1:
         # an error occurred
         outputs = None
 
+    if inputs is not None:
+        paths = [paths[idx] for idx in inputs['ids']]
+
     if request_wants_json():
         result = {}
         for i, path in enumerate(paths):

diff --git a/digits/templates/models/images/classification/classify_many.html b/digits/templates/models/images/classification/classify_many.html
@@ -23,17 +23,63 @@ <h1>
 </div>
 {% endif %}
 
+{% if show_ground_truth %}
+<div class="panel-heading">
+    <h4>Summary</h4>
+</div>
+<div class="panel-body">
+    <dl>
+        <dt>Top-1 accuracy</dt>
+        <dd>{{top1_accuracy}}%</dd>
+    </dl>
+    <dl>
+        <dt>Top-5 accuracy</dt>
+        <dd>{{top5_accuracy}}%</dd>
+    </dl>
+</div>
+{% endif %}
+
 {% endblock %}
 
 {% block job_content_details %}
 
+{% if classifications %}
+    {% if show_ground_truth %}
+    <div class="panel-heading">
+        <h4>Confusion matrix</h4>
+    </div>
+    <table class="table">
+        <tr>
+            <th></th>
+            {% for label in labels %}
+                <th>{{label}}</th>
+            {% endfor %}
+            <th>Per-class accuracy</th>
+        </tr>
+        {% for row in confusion_matrix %}
+            {% set label_idx = loop.index0 %}
+            {% if per_class_accuracy[label_idx] is not none %}
+            <tr>
+                <th>{{labels[label_idx]}}</th>
+                {% for column in row %}
+                    {% set column_idx = loop.index[0] %}
+                    <td>{{column}}</td>
+                {% endfor %}
+                <td>{{per_class_accuracy[label_idx]}}%</td>
+            </tr>
+            {% endif %}
+        {% endfor %}
+    </table>
+    {% endif %}
+<div class="panel-heading">
+    <h4>All classifications</h4>
+</div>
 <table class="table">
     <tr>
         <th>Path</th>
         {% if show_ground_truth %}<th>Ground truth</th>{% endif %}
         <th colspan=10>Top predictions</th>
     </tr>
-    {% if classifications %}
     {% for path in paths %}
     {% set result = classifications[loop.index0] %}
     {% set ground_truth = ground_truths[loop.index0] %}
@@ -50,8 +96,8 @@ <h1>
         {% endfor %}
     </tr>
     {% endfor %}
-    {% endif %}
 </table>
+{% endif %}
 
 {% endblock %}
 
diff --git a/tools/inference.py b/tools/inference.py
@@ -79,12 +79,16 @@ def infer(input_list, output_dir, jobs_dir, model_id, epoch, batch_size, layers,
     else:
         raise InferenceError("Unknown dataset type")
 
-    # load and resize images
-    images = []
+    n_input_samples = 0  # number of samples we were able to load
+    input_ids = []       # indices of samples within file list
+    input_data = []      # sample data
+
+    # load paths from file
     paths = None
     with open(input_list) as infile:
         paths = infile.readlines()
-    for path in paths:
+    # load and resize images
+    for idx, path in enumerate(paths):
         path = path.strip()
         try:
             image = utils.image.load_image(path.strip())
@@ -93,29 +97,32 @@ def infer(input_list, output_dir, jobs_dir, model_id, epoch, batch_size, layers,
                         channels    = channels,
                         resize_mode = resize_mode,
                         )
-            images.append(image)
+            input_ids.append(idx)
+            input_data.append(image)
+            n_input_samples = n_input_samples + 1
         except utils.errors.LoadImageError as e:
             print e
 
     # perform inference
     visualizations = None
     predictions = []
 
-    if len(images) == 0:
+    if n_input_samples == 0:
         raise InferenceError("Unable to load any image from file '%s'" % repr(input_list))
-    elif len(images) == 1:
+    elif n_input_samples == 1:
         # single image inference
-        outputs, visualizations = model.train_task().infer_one(images[0], snapshot_epoch=epoch, layers=layers, gpu=gpu)
+        outputs, visualizations = model.train_task().infer_one(input_data[0], snapshot_epoch=epoch, layers=layers, gpu=gpu)
     else:
         assert layers == 'none'
-        outputs = model.train_task().infer_many(images, snapshot_epoch=epoch, gpu=gpu)
+        outputs = model.train_task().infer_many(input_data, snapshot_epoch=epoch, gpu=gpu)
 
     # write to hdf5 file
     db_path = os.path.join(output_dir, 'inference.hdf5')
     db = h5py.File(db_path, 'w')
 
-    # write input images to database
-    db.create_dataset("inputs", data = images)
+    # write input paths and images to database
+    db.create_dataset("input_ids", data = input_ids)
+    db.create_dataset("input_data", data = input_data)
 
     # write outputs to database
     db_outputs = db.create_group("outputs")