Fix comments

openvinotoolkit · Feb 2, 2021 · 94919c7 · 94919c7
1 parent 002dc7c
commit 94919c7
Show file tree

Hide file tree

Showing 13 changed files with 38 additions and 45 deletions.
diff --git a/ci/prepare-documentation.py b/ci/prepare-documentation.py
@@ -70,6 +70,7 @@
         'machine_translation',
         'monocular_depth_estimation',
         'optical_character_recognition',
+        'place_recognition',
         'question_answering',
         'semantic_segmentation',
         'sound_classification',

diff --git a/data/dataset_definitions.yml b/data/dataset_definitions.yml
@@ -1154,7 +1154,6 @@ datasets:
 
   - name: pitts30k_val
     data_source: pitts250k
-    reader: pillow_imread
     annotation_conversion:
       converter: place_recognition
       split_file: pitts250k/datasets/pitts30k_val.mat

diff --git a/demos/place_recognition_demo/python/README.md b/demos/place_recognition_demo/python/README.md
@@ -78,7 +78,7 @@ To run the demo, you can use public or pre-trained models. To download the pre-t
 To run the demo, please provide paths to the model in the IR format, to directory with gallery images, and to an input video, image, or folder with images:
 ```bash
 python place_recognition_demo.py \
--m /home/user/netvlad.xml \
+-m /home/user/netvlad-tf.xml \
 -i /home/user/image.jpg \
 -gf /home/user/gallery_folder
 ```

diff --git a/demos/place_recognition_demo/python/models.lst b/demos/place_recognition_demo/python/models.lst
@@ -1,2 +1,2 @@
 # This file can be used with the --list option of the model downloader.
-netvlad
+netvlad-tf
diff --git a/demos/place_recognition_demo/python/place_recognition_demo.py b/demos/place_recognition_demo/python/place_recognition_demo.py
@@ -33,9 +33,6 @@
 from images_capture import open_images_capture
 
 
-INPUT_SIZE = [200, 300]
-
-
 def build_argparser():
     """ Returns argument parser. """
 
@@ -45,13 +42,13 @@ def build_argparser():
                       help='Show this help message and exit.')
     args.add_argument('-m', '--model',
                       help='Required. Path to an .xml file with a trained model.',
-                      required=True, type=str)
+                      required=True, type=Path)
     args.add_argument('-i', '--input', required=True,
                       help='Required. An input to process. The input must be a single image, '
                            'a folder of images, video file or camera id.')
     args.add_argument('-gf', '--gallery_folder',
                       help='Required. Path to a folder with images in the gallery.',
-                      required=True, type=str)
+                      required=True, type=Path)
     args.add_argument('--gallery_size', required=False, type=int,
                       help='Optional. Number of images from the gallery used for processing')
     args.add_argument('--loop', default=False, action='store_true',
@@ -92,7 +89,7 @@ def main():
     log.basicConfig(format='[ %(levelname)s ] %(message)s', level=log.INFO, stream=sys.stdout)
     args = build_argparser().parse_args()
 
-    place_recognition = PlaceRecognition(args.model, args.device, args.gallery_folder, INPUT_SIZE, args.cpu_extension,
+    place_recognition = PlaceRecognition(args.model, args.device, args.gallery_folder, args.cpu_extension,
                                          args.gallery_size)
 
     cap = open_images_capture(args.input, args.loop)

diff --git a/demos/place_recognition_demo/python/place_recognition_demo/place_recognition.py b/demos/place_recognition_demo/python/place_recognition_demo/place_recognition.py
@@ -18,45 +18,42 @@
 
 import cv2
 from tqdm import tqdm
-from pathlib import Path
 
 from place_recognition_demo.common import crop_resize, l2_distance
 
 from openvino.inference_engine import IECore # pylint: disable=no-name-in-module
 
 
-class IEModel(): # pylint: disable=too-few-public-methods
+class IEModel: # pylint: disable=too-few-public-methods
     """ Class that allows working with Inference Engine model. """
 
     def __init__(self, model_path, device, cpu_extension):
         ie = IECore()
         if cpu_extension and device == 'CPU':
             ie.add_extension(cpu_extension, 'CPU')
 
-        path = '.'.join(model_path.split('.')[:-1])
-        self.net = ie.read_network(path + '.xml', path + '.bin')
-        self.output_name = list(self.net.outputs.keys())[0]
+        self.net = ie.read_network(model_path, model_path.with_suffix('.bin'))
+        self.input_name = next(iter(self.net.input_info))
+        self.output_name = next(iter(self.net.outputs))
+        self.input_size = self.net.input_info[self.input_name].input_data.shape
         self.exec_net = ie.load_network(network=self.net, device_name=device)
 
     def predict(self, image):
         ''' Takes input image and returns L2-normalized embedding vector. '''
 
         assert len(image.shape) == 4
         image = np.transpose(image, (0, 3, 1, 2))
-        out = self.exec_net.infer(inputs={'Placeholder': image})[self.output_name]
+        out = self.exec_net.infer(inputs={self.input_name: image})[self.output_name]
         return out
 
 
 class PlaceRecognition:
     """ Class representing Place Recognition algorithm. """
 
-    def __init__(self, model_path, device, gallery_path, input_size, cpu_extension, gallery_size):
-        if gallery_size:
-            self.impaths = (list(Path(gallery_path).rglob("*.jpg")))[:gallery_size]
-        else:
-            self.impaths = (list(Path(gallery_path).rglob("*.jpg")))
-        self.input_size = input_size
+    def __init__(self, model_path, device, gallery_path, cpu_extension, gallery_size):
+        self.impaths = (list(gallery_path.rglob("*.jpg")))[:gallery_size or None]
         self.model = IEModel(model_path, device, cpu_extension)
+        self.input_size = self.model.input_size[2:]
         self.embeddings = self.compute_gallery_embeddings()
 
     def compute_embedding(self, image):
@@ -82,15 +79,12 @@ def compute_gallery_embeddings(self):
         for full_path in tqdm(self.impaths, desc='Reading gallery images.'):
             image = cv2.imread(str(full_path))
             if image is None:
-                print("ERROR: cannot find image, full_path =", str(full_path))
+                print("ERROR: cannot process image, full_path =", str(full_path))
+                continue
             image = crop_resize(image, self.input_size)
             images.append(image)
 
-        embeddings = [None for _ in self.impaths]
-
-        index = 0
-        for image in tqdm(images, desc='Computing embeddings of gallery images.'):
-            embeddings[index] = self.model.predict(image).reshape([-1])
-            index += 1
+        embeddings = [self.model.predict(image).reshape([-1]) for image in tqdm(
+            images, desc='Computing embeddings of gallery images.')]
 
         return embeddings
diff --git a/models/public/index.md b/models/public/index.md
@@ -247,9 +247,9 @@ The task of image translation is to generate the output based on exemplar.
 
 The task of place recognition is to quickly and accurately recognize the location of a given query photograph.
 
-| Model Name | Implementation | OMZ Model Name                  | Accuracy | GFlops | mParams |
-| ---------- | ---------------| --------------------------------| -------- | ------ | ------- |
-| NetVLAD    | TensorFlow\*   | [netvlad](./netvlad/netvlad.md) | 82.0321% | 36.6374| 149.0021|
+| Model Name | Implementation | OMZ Model Name                           | Accuracy | GFlops | mParams |
+| ---------- | ---------------| -----------------------------------------| -------- | ------ | ------- |
+| NetVLAD    | TensorFlow\*   | [netvlad-tf](./netvlad-tf/netvlad-tf.md) | 82.0321% | 36.6374| 149.0021|
 
 ## Legal Information
 

diff --git a/models/public/netvlad/accuracy-check.yml → models/public/netvlad-tf/accuracy-check.yml b/models/public/netvlad/accuracy-check.yml → models/public/netvlad-tf/accuracy-check.yml
@@ -1,10 +1,11 @@
 models:
-  - name: netvlad
+  - name: netvlad-tf
     launchers:
       - framework: dlsdk
         adapter: reid
     datasets:
       - name: pitts30k_val
+        reader: pillow_imread
 
         preprocessing:
           - type: rgb_to_bgr

diff --git a/models/public/netvlad/model.yml → models/public/netvlad-tf/model.yml b/models/public/netvlad/model.yml → models/public/netvlad-tf/model.yml
@@ -20,7 +20,7 @@ description: >-
   dataset.
 
   For details see repository <https://github.com/uzh-rpg/netvlad_tf_open> and paper
-  <https://arxiv.org/pdf/1511.07247.pdf>.
+  <https://arxiv.org/abs/1511.07247>.
 task_type: place_recognition
 files:
   - name: netvlad.zip

diff --git a/models/public/netvlad/netvlad.md → models/public/netvlad-tf/netvlad-tf.md b/models/public/netvlad/netvlad.md → models/public/netvlad-tf/netvlad-tf.md
@@ -1,10 +1,10 @@
-# netvlad
+# netvlad-tf
 
 ## Use Case and High-Level Description
 
 NetVLAD is a CNN architecture which tackles the problem of large scale visual place recognition. The architecture uses VGG 16 as base network and NetVLAD - a new trainable generalized VLAD (Vector of Locally Aggregated Descriptors) layer. It is a place recognition model pretrained on the [Pittsburgh 250k](http://www.ok.ctrl.titech.ac.jp/~torii/project/repttile/) dataset.
 
-For details see [repository](https://github.com/uzh-rpg/netvlad_tf_open) and [paper](https://arxiv.org/pdf/1511.07247.pdf).
+For details see [repository](https://github.com/uzh-rpg/netvlad_tf_open) and [paper](https://arxiv.org/abs/1511.07247).
 
 ## Specification
 
@@ -58,7 +58,10 @@ Floating point embeddings, name - `vgg16_netvlad_pca/l2_normalize_1`,  shape - `
 
 ### Converted model
 
-The converted model has the same parameters as the original model.
+Floating point embeddings, name - `vgg16_netvlad_pca/l2_normalize_1`,  shape - `1,4096`, output data format  - `B,C`, where:
+
+- `B` - batch size
+- `C` - vector of 4096 floating points values, local image descriptors
 
 ## Legal Information
 

diff --git a/models/public/netvlad/pre-convert.py → models/public/netvlad-tf/pre-convert.py b/models/public/netvlad/pre-convert.py → models/public/netvlad-tf/pre-convert.py
@@ -35,18 +35,16 @@ def main():
     sys.path.append(str(args.input_dir))
     nets = importlib.import_module('netvlad_tf.nets')
 
-    tf.reset_default_graph()
     image_batch = tf.placeholder(dtype=tf.float32, shape=[None, None, None, 3])
-    nets.vgg16NetvladPca(image_batch)
+    net_out = nets.vgg16NetvladPca(image_batch)
     saver = tf.train.Saver()
 
-    sess = tf.Session()
-    saver.restore(sess, str(args.input_dir / NETWORK_NAME / NETWORK_NAME))
-    outputs = ['vgg16_netvlad_pca/l2_normalize_1']
-    graph_def_freezed = tf.graph_util.convert_variables_to_constants(sess, sess.graph.as_graph_def(), outputs)
+    with tf.Session() as sess:
+        saver.restore(sess, str(args.input_dir / NETWORK_NAME / NETWORK_NAME))
+        graph_def_frozen = tf.graph_util.convert_variables_to_constants(sess, sess.graph.as_graph_def(),
+                                                                        [net_out.op.name])
 
-    tf.io.write_graph(graph_def_freezed, str(args.output_dir), str(args.output_dir / 'model_frozen.pb'),
-                         as_text=False)
+    tf.io.write_graph(graph_def_frozen, str(args.output_dir), 'model_frozen.pb', as_text=False)
 
 
 if __name__ == '__main__':

diff --git a/tools/accuracy_checker/configs/netvlad-tf.yml b/tools/accuracy_checker/configs/netvlad-tf.yml
@@ -0,0 +1 @@
+../../../models/public/netvlad-tf/accuracy-check.yml
diff --git a/tools/accuracy_checker/configs/netvlad.yml b/tools/accuracy_checker/configs/netvlad.yml